From 9b4e6c3b4d852883a372332461253ef9eae6d014 Mon Sep 17 00:00:00 2001
From: Jack Jackson <scubbojj@gmail.com>
Date: Thu, 17 Apr 2025 22:17:26 -0700
Subject: [PATCH] Introduce ability to seed data directly from GSheet

Reasonably hacky, in that I introduce a facade to reuse the data format
previously provided by the `csv` module, rather than using the
`list[list[str]]` directly.

Next I want to introduce something like Celery to continually refresh.

Note that this will require changes to the deployment repo in order to
provide the required secrets.
---
 README.md                     | 14 ++++++++
 app/routers/seed.py           | 56 ++++++++++++++++++++++++++-----
 app/services/google_sheets.py | 63 +++++++++++++++++++++++++++++++++++
 app/templates/seed.html       |  6 ++--
 compose.yaml                  |  4 +++
 requirements.txt              |  3 ++
 tests/routers/test_seed.py    | 15 +++++++++
 tests/test_fresh_db_tests.py  |  8 +++++
 8 files changed, 157 insertions(+), 12 deletions(-)
 create mode 100644 app/services/google_sheets.py
 create mode 100644 tests/routers/test_seed.py

diff --git a/README.md b/README.md
index 8620427..aa6e974 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,17 @@
 [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
 
 This is a score tracker for an EDH ("Commander") group.
+
+# Reading from a Google Sheet
+
+Eventually, I'd want this application to be data-resilient enough to be the primary store of data. For now, though, I'm reading data in from a(n impressive!) Google Sheet maintained by my playgroup.
+
+If you want to enable that syncing, you'll need to:
+
+* Go to the [Google Cloud Console](https://console.cloud.google.com)
+  * If you haven't got one already, create a Project
+* Go to "IAM and admin > Service Accounts"
+* Create a new Service Account
+* Note the resultant email address, and share the Google Sheet with it
+
+If you haven't used the Google Sheets API in your Project, you may need to enable it, at `console.cloud.google.com/apis/library/sheets.googleapis.com?project=<your_project_slug>`
diff --git a/app/routers/seed.py b/app/routers/seed.py
index dcfcc8b..ed19763 100644
--- a/app/routers/seed.py
+++ b/app/routers/seed.py
@@ -9,6 +9,8 @@ from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile
 from fastapi.responses import HTMLResponse
 from sqlalchemy.orm import Session
 
+from app.services import google_sheets
+
 from .decks import list_decks
 from .games import create_game, latest_game, list_games
 from .players import list_players
@@ -86,10 +88,10 @@ def seed_games(file: UploadFile, db: Session = Depends(get_db)):
     return "OK!"
 
 
-@api_router.post("/all_in_one")
-def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
-    file_contents = file.file.read().decode("utf-8").split("\n")
-    reader = csv.DictReader(file_contents, delimiter=",")
+@api_router.post("/from_google_sheets")
+def from_google_sheets(db: Session = Depends(get_db)):
+    data = google_sheets.get_data()
+    reader = CSVFacade(data)
 
     # Fetch the currently-known-information so that we can avoid recreating existing data
     current_player_ids_by_name = {
@@ -184,7 +186,7 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
     # (Again, if we _really_ cared about efficiency we could have stored this data on the first pass to avoid a
     # retraversal. I suspect that the overhead of O(2*n) vs. O(n) data-reads is going to be insignificant)
     # ((Yes, I know that's an abuse of Big-O notation, shut up - you knew what I meant :P ))
-    reader = csv.DictReader(file_contents, delimiter=",")
+    reader = CSVFacade(data)
     for row in reader:
         # Skip any games created before the date of the latest current game
         # (Note that this means that the `all_in_one` method cannot be used to backfill any previously-played games. If
@@ -205,9 +207,16 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
 
         if not row["Winning Deck"].startswith("Tie"):
             print(f"DEBUG - checking row {row}")
-            index_of_winning_deck = [
-                row[f"Deck {i+1}"] == row["Winning Deck"] for i in range(6)
-            ].index(True)
+            try:
+                index_of_winning_deck = [
+                    row[f"Deck {i+1}"] == row["Winning Deck"] for i in range(6)
+                ].index(True)
+            except ValueError:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Error when processing row {row_idx}, game on {row['Date']}: Winning Deck is named {row['Winning Deck']}, but no deck with that name was found",
+                )
+
             created_game = create_game(
                 schemas.GameCreate(
                     date=date_of_current_row,
@@ -294,6 +303,37 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
     return "Ok!"
 
 
+# Facade class to imitate the CSV-reader-interface when passed a list[list[str]]
+# as we get from the Google Sheets API
+# dict[str, str], where the keys are the header names and the values are the row values
+class CSVFacade:
+    def __init__(self, data: list[list[str]]):
+        self.headers = data[0]
+        self.data = data[1:]
+        logging.critical(f"Headers: {self.headers}")
+        logging.critical(f"First row: {self.data[0]}")
+
+    def __iter__(self):
+        self.index = 0
+        return self
+
+    def __next__(self):
+        if self.index >= len(self.data):
+            raise StopIteration
+        result = self.data[self.index]
+        self.index += 1
+        if len(self.headers) != len(result):
+            # If the last column - "notes" - is empty, fill it in with an empty string.
+            if len(result) == len(self.headers) - 1:
+                result.append("")
+            # If there's any other kind of discrepancy, though, that's probably an error
+            else:
+                raise ValueError(
+                    f"Row {self.index} has {len(result)} columns, but {len(self.headers)} are expected"
+                )
+        return dict(zip(self.headers, result))
+
+
 @html_router.get("/")
 def main(request: Request, db=Depends(get_db)):
     return jinja_templates.TemplateResponse(
diff --git a/app/services/google_sheets.py b/app/services/google_sheets.py
new file mode 100644
index 0000000..541bb5b
--- /dev/null
+++ b/app/services/google_sheets.py
@@ -0,0 +1,63 @@
+from google.oauth2 import service_account
+from googleapiclient.discovery import build
+import os
+
+
+def get_data() -> list[list[str]]:
+    spreadsheet_id = os.getenv("SPREADSHEET_ID")
+    path_to_credentials = os.getenv("PATH_TO_GOOGLE_SHEETS_CREDENTIALS")
+    resp = []
+    # Hardcoding the format (and page names) to the spreadsheet that my group uses, because I'm lazy -
+    # should be easy to change those tousing environment variables if needed
+    for idx, year in enumerate(["2024", "2025"]):
+        # Assumes that the headers are the same for all years
+        if idx == 0:
+            resp = read_sheet(spreadsheet_id, f"{year} games", path_to_credentials)
+        else:
+            # Drop the headers from this year, because they were already added by the previous year.
+            resp.extend(
+                read_sheet(spreadsheet_id, f"{year} games", path_to_credentials)[1:]
+            )
+    return resp
+
+
+# The scope for Google Sheets API
+SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly"]
+
+
+def get_service_account_credentials(credentials_path):
+    """Gets service account credentials from a JSON key file.
+
+    Args:
+        credentials_path: Path to the service account JSON key file
+
+    Returns:
+        Credentials object for the service account
+    """
+    return service_account.Credentials.from_service_account_file(
+        credentials_path, scopes=SCOPES
+    )
+
+
+def read_sheet(spreadsheet_id, range_name, credentials_path):
+    """Reads data from a Google Sheet using service account credentials.
+
+    Args:
+        spreadsheet_id: The ID of the spreadsheet to read from
+        range_name: The A1 notation of the range to read from (e.g., 'Sheet1!A1:D10')
+        credentials_path: Path to the service account JSON key file
+
+    Returns:
+        A list of lists containing the values from the specified range
+    """
+    creds = get_service_account_credentials(credentials_path)
+    service = build("sheets", "v4", credentials=creds)
+
+    # Call the Sheets API
+    sheet = service.spreadsheets()
+    result = (
+        sheet.values().get(spreadsheetId=spreadsheet_id, range=range_name).execute()
+    )
+
+    values = result.get("values", [])
+    return values
diff --git a/app/templates/seed.html b/app/templates/seed.html
index b8b22f4..dada4a9 100644
--- a/app/templates/seed.html
+++ b/app/templates/seed.html
@@ -35,10 +35,8 @@
 </div>
 <hr/>
 <div>
-    <form action="/api/seed/all_in_one" method="post" enctype="multipart/form-data">
-        <label for="file">Upload All-in-One</label>
-        <input type="file" id="file" name="file" accept=".csv"/>
-        <input type="submit">Upload</button>
+    <form action="/api/seed/from_google_sheets" method="post" enctype="multipart/form-data">
+        <input type="submit">Trigger Seed from Google Sheets</button>
     </form>
 </div>
 {% endblock %}
\ No newline at end of file
diff --git a/compose.yaml b/compose.yaml
index 2d28771..84406c6 100644
--- a/compose.yaml
+++ b/compose.yaml
@@ -4,12 +4,16 @@ services:
     build:
       context: .
       target: dev
+    environment:
+      SPREADSHEET_ID: "1ITgXXfq7KaNP8JTQMvoZJSbu7zPpCcfNio_aooULRfc"
+      PATH_TO_GOOGLE_SHEETS_CREDENTIALS: "./google_sheets_credentials.json"
     volumes:
       - type: bind
         source: ./app/
         # Yes, really - we're using `/app` as the WD within the container, but `uvicorn` requires an import path.
         target: /app/app
       - ./local-run-log-config.yaml:/app/local-run-log-config.yaml:delegated
+      - /Users/scubbo/Downloads/google_sheets_credentials.json:/app/google_sheets_credentials.json
     ports:
       - 8000:8000
 
diff --git a/requirements.txt b/requirements.txt
index b340c49..2307e35 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,6 @@ uvicorn
 Jinja2
 pyyaml
 PyFunctional
+google-api-python-client
+google-auth-oauthlib
+google-auth-httplib2
diff --git a/tests/routers/test_seed.py b/tests/routers/test_seed.py
new file mode 100644
index 0000000..4733b8f
--- /dev/null
+++ b/tests/routers/test_seed.py
@@ -0,0 +1,15 @@
+# A test for the hacky workaround class I made that imitates the CSV-reader-interface when passed a list[list[str]]
+# as we get from the Google Sheets API
+
+from app.routers.seed import CSVFacade
+
+
+def test_csv_facade():
+    facade = CSVFacade([["a", "b", "c"], ["1", "2", "3"], ["4", "5", "6"]])
+    for i, row in enumerate(facade):
+        if i == 0:
+            assert row == {"a": "1", "b": "2", "c": "3"}
+        elif i == 1:
+            assert row == {"a": "4", "b": "5", "c": "6"}
+        else:
+            raise ValueError(f"Expected 2 rows, but got {i+1}")
diff --git a/tests/test_fresh_db_tests.py b/tests/test_fresh_db_tests.py
index 1e849e7..3cd84bd 100644
--- a/tests/test_fresh_db_tests.py
+++ b/tests/test_fresh_db_tests.py
@@ -1,3 +1,4 @@
+import pytest
 from typing import Mapping
 
 import httpx
@@ -68,6 +69,13 @@ def test_add_and_retrieve_deck(test_client: TestClient, cleanups):
     cleanups.add_success(success_cleanup)
 
 
+# Keeping this around because it would be useful to reintroduce it if I factor out data-sourcing
+# (I did briefly try doing so, but because the logic of full-seeding involves recreating the iterable,
+# and the two different data sources have different object-types, it wasn't obvious how to reinitialize.
+# Probably would be possible, but given that I'm likely gonna deprecate the csv-upload model anyway, probably not worth it.)
+@pytest.mark.skip(
+    reason="Moved from an injected-data model, to reading from Google Sheets"
+)
 def test_incremental_add_of_games(test_client: TestClient, cleanups):
     latest_game_response = _json_get(test_client, "/game/latest_game")
     assert latest_game_response.status_code == 404