Introduce ability to seed data directly from GSheet

Reasonably hacky, in that I introduce a facade to reuse the data format previously provided by the `csv` module, rather than using the `list[list[str]]` directly. Next I want to introduce something like Celery to continually refresh. Note that this will require changes to the deployment repo in order to provide the required secrets.
2025-04-17 22:17:26 -07:00 · 2025-04-17 22:17:26 -07:00 · 9b4e6c3b4d
commit 9b4e6c3b4d
parent 0434ec1e98
8 changed files with 157 additions and 12 deletions
--- a/README.md
+++ b/README.md
@ -1,3 +1,17 @@
 [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)

 This is a score tracker for an EDH ("Commander") group.
+
+# Reading from a Google Sheet
+
+Eventually, I'd want this application to be data-resilient enough to be the primary store of data. For now, though, I'm reading data in from a(n impressive!) Google Sheet maintained by my playgroup.
+
+If you want to enable that syncing, you'll need to:
+
+* Go to the [Google Cloud Console](https://console.cloud.google.com)
+  * If you haven't got one already, create a Project
+* Go to "IAM and admin > Service Accounts"
+* Create a new Service Account
+* Note the resultant email address, and share the Google Sheet with it
+
+If you haven't used the Google Sheets API in your Project, you may need to enable it, at `console.cloud.google.com/apis/library/sheets.googleapis.com?project=<your_project_slug>`
--- a/app/routers/seed.py
+++ b/app/routers/seed.py
@ -9,6 +9,8 @@ from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile
 from fastapi.responses import HTMLResponse
 from sqlalchemy.orm import Session

+from app.services import google_sheets
+
 from .decks import list_decks
 from .games import create_game, latest_game, list_games
 from .players import list_players
@ -86,10 +88,10 @@ def seed_games(file: UploadFile, db: Session = Depends(get_db)):
    return "OK!"


-@api_router.post("/all_in_one")
-def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
-    file_contents = file.file.read().decode("utf-8").split("\n")
-    reader = csv.DictReader(file_contents, delimiter=",")
+@api_router.post("/from_google_sheets")
+def from_google_sheets(db: Session = Depends(get_db)):
+    data = google_sheets.get_data()
+    reader = CSVFacade(data)

    # Fetch the currently-known-information so that we can avoid recreating existing data
    current_player_ids_by_name = {
@ -184,7 +186,7 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
    # (Again, if we _really_ cared about efficiency we could have stored this data on the first pass to avoid a
    # retraversal. I suspect that the overhead of O(2*n) vs. O(n) data-reads is going to be insignificant)
    # ((Yes, I know that's an abuse of Big-O notation, shut up - you knew what I meant :P ))
-    reader = csv.DictReader(file_contents, delimiter=",")
+    reader = CSVFacade(data)
    for row in reader:
        # Skip any games created before the date of the latest current game
        # (Note that this means that the `all_in_one` method cannot be used to backfill any previously-played games. If
@ -205,9 +207,16 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)):

        if not row["Winning Deck"].startswith("Tie"):
            print(f"DEBUG - checking row {row}")
+            try:
                index_of_winning_deck = [
                    row[f"Deck {i+1}"] == row["Winning Deck"] for i in range(6)
                ].index(True)
+            except ValueError:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Error when processing row {row_idx}, game on {row['Date']}: Winning Deck is named {row['Winning Deck']}, but no deck with that name was found",
+                )
+
            created_game = create_game(
                schemas.GameCreate(
                    date=date_of_current_row,
@ -294,6 +303,37 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
    return "Ok!"


+# Facade class to imitate the CSV-reader-interface when passed a list[list[str]]
+# as we get from the Google Sheets API
+# dict[str, str], where the keys are the header names and the values are the row values
+class CSVFacade:
+    def __init__(self, data: list[list[str]]):
+        self.headers = data[0]
+        self.data = data[1:]
+        logging.critical(f"Headers: {self.headers}")
+        logging.critical(f"First row: {self.data[0]}")
+
+    def __iter__(self):
+        self.index = 0
+        return self
+
+    def __next__(self):
+        if self.index >= len(self.data):
+            raise StopIteration
+        result = self.data[self.index]
+        self.index += 1
+        if len(self.headers) != len(result):
+            # If the last column - "notes" - is empty, fill it in with an empty string.
+            if len(result) == len(self.headers) - 1:
+                result.append("")
+            # If there's any other kind of discrepancy, though, that's probably an error
+            else:
+                raise ValueError(
+                    f"Row {self.index} has {len(result)} columns, but {len(self.headers)} are expected"
+                )
+        return dict(zip(self.headers, result))
+
+
@html_router.get("/")
 def main(request: Request, db=Depends(get_db)):
    return jinja_templates.TemplateResponse(
--- a/app/services/google_sheets.py
+++ b/app/services/google_sheets.py
@ -0,0 +1,63 @@
+from google.oauth2 import service_account
+from googleapiclient.discovery import build
+import os
+
+
+def get_data() -> list[list[str]]:
+    spreadsheet_id = os.getenv("SPREADSHEET_ID")
+    path_to_credentials = os.getenv("PATH_TO_GOOGLE_SHEETS_CREDENTIALS")
+    resp = []
+    # Hardcoding the format (and page names) to the spreadsheet that my group uses, because I'm lazy -
+    # should be easy to change those tousing environment variables if needed
+    for idx, year in enumerate(["2024", "2025"]):
+        # Assumes that the headers are the same for all years
+        if idx == 0:
+            resp = read_sheet(spreadsheet_id, f"{year} games", path_to_credentials)
+        else:
+            # Drop the headers from this year, because they were already added by the previous year.
+            resp.extend(
+                read_sheet(spreadsheet_id, f"{year} games", path_to_credentials)[1:]
+            )
+    return resp
+
+
+# The scope for Google Sheets API
+SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly"]
+
+
+def get_service_account_credentials(credentials_path):
+    """Gets service account credentials from a JSON key file.
+
+    Args:
+        credentials_path: Path to the service account JSON key file
+
+    Returns:
+        Credentials object for the service account
+    """
+    return service_account.Credentials.from_service_account_file(
+        credentials_path, scopes=SCOPES
+    )
+
+
+def read_sheet(spreadsheet_id, range_name, credentials_path):
+    """Reads data from a Google Sheet using service account credentials.
+
+    Args:
+        spreadsheet_id: The ID of the spreadsheet to read from
+        range_name: The A1 notation of the range to read from (e.g., 'Sheet1!A1:D10')
+        credentials_path: Path to the service account JSON key file
+
+    Returns:
+        A list of lists containing the values from the specified range
+    """
+    creds = get_service_account_credentials(credentials_path)
+    service = build("sheets", "v4", credentials=creds)
+
+    # Call the Sheets API
+    sheet = service.spreadsheets()
+    result = (
+        sheet.values().get(spreadsheetId=spreadsheet_id, range=range_name).execute()
+    )
+
+    values = result.get("values", [])
+    return values
--- a/app/templates/seed.html
+++ b/app/templates/seed.html
@ -35,10 +35,8 @@
 </div>
 <hr/>
 <div>
-    <form action="/api/seed/all_in_one" method="post" enctype="multipart/form-data">
-        <label for="file">Upload All-in-One</label>
-        <input type="file" id="file" name="file" accept=".csv"/>
-        <input type="submit">Upload</button>
+    <form action="/api/seed/from_google_sheets" method="post" enctype="multipart/form-data">
+        <input type="submit">Trigger Seed from Google Sheets</button>
    </form>
 </div>
 {% endblock %}
--- a/compose.yaml
+++ b/compose.yaml
@ -4,12 +4,16 @@ services:
    build:
      context: .
      target: dev
+    environment:
+      SPREADSHEET_ID: "1ITgXXfq7KaNP8JTQMvoZJSbu7zPpCcfNio_aooULRfc"
+      PATH_TO_GOOGLE_SHEETS_CREDENTIALS: "./google_sheets_credentials.json"
    volumes:
      - type: bind
        source: ./app/
        # Yes, really - we're using `/app` as the WD within the container, but `uvicorn` requires an import path.
        target: /app/app
      - ./local-run-log-config.yaml:/app/local-run-log-config.yaml:delegated
+      - /Users/scubbo/Downloads/google_sheets_credentials.json:/app/google_sheets_credentials.json
    ports:
      - 8000:8000

--- a/requirements.txt
+++ b/requirements.txt
@ -5,3 +5,6 @@ uvicorn
 Jinja2
 pyyaml
 PyFunctional
+google-api-python-client
+google-auth-oauthlib
+google-auth-httplib2
--- a/tests/routers/test_seed.py
+++ b/tests/routers/test_seed.py
@ -0,0 +1,15 @@
+# A test for the hacky workaround class I made that imitates the CSV-reader-interface when passed a list[list[str]]
+# as we get from the Google Sheets API
+
+from app.routers.seed import CSVFacade
+
+
+def test_csv_facade():
+    facade = CSVFacade([["a", "b", "c"], ["1", "2", "3"], ["4", "5", "6"]])
+    for i, row in enumerate(facade):
+        if i == 0:
+            assert row == {"a": "1", "b": "2", "c": "3"}
+        elif i == 1:
+            assert row == {"a": "4", "b": "5", "c": "6"}
+        else:
+            raise ValueError(f"Expected 2 rows, but got {i+1}")
--- a/tests/test_fresh_db_tests.py
+++ b/tests/test_fresh_db_tests.py
@ -1,3 +1,4 @@
+import pytest
 from typing import Mapping

 import httpx
@ -68,6 +69,13 @@ def test_add_and_retrieve_deck(test_client: TestClient, cleanups):
    cleanups.add_success(success_cleanup)


+# Keeping this around because it would be useful to reintroduce it if I factor out data-sourcing
+# (I did briefly try doing so, but because the logic of full-seeding involves recreating the iterable,
+# and the two different data sources have different object-types, it wasn't obvious how to reinitialize.
+# Probably would be possible, but given that I'm likely gonna deprecate the csv-upload model anyway, probably not worth it.)
+@pytest.mark.skip(
+    reason="Moved from an injected-data model, to reading from Google Sheets"
+)
 def test_incremental_add_of_games(test_client: TestClient, cleanups):
    latest_game_response = _json_get(test_client, "/game/latest_game")
    assert latest_game_response.status_code == 404