Introduce ability to seed data directly from GSheet

Reasonably hacky, in that I introduce a facade to reuse the data format previously provided by the `csv` module, rather than using the `list[list[str]]` directly. Next I want to introduce something like Celery to continually refresh. Note that this will require changes to the deployment repo in order to provide the required secrets.
2025-04-17 22:17:26 -07:00 · 2025-04-17 22:17:26 -07:00 · 9b4e6c3b4d
commit 9b4e6c3b4d
parent 0434ec1e98
8 changed files with 157 additions and 12 deletions
--- a/README.md
+++ b/README.md
@ -1,3 +1,17 @@
 [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
 This is a score tracker for an EDH ("Commander") group.
 # Reading from a Google Sheet
 Eventually, I'd want this application to be data-resilient enough to be the primary store of data. For now, though, I'm reading data in from a(n impressive!) Google Sheet maintained by my playgroup.
 If you want to enable that syncing, you'll need to:
 * Go to the [Google Cloud Console](https://console.cloud.google.com)
  * If you haven't got one already, create a Project
 * Go to "IAM and admin > Service Accounts"
 * Create a new Service Account
 * Note the resultant email address, and share the Google Sheet with it
 If you haven't used the Google Sheets API in your Project, you may need to enable it, at `console.cloud.google.com/apis/library/sheets.googleapis.com?project=<your_project_slug>`
--- a/app/routers/seed.py
+++ b/app/routers/seed.py
@ -9,6 +9,8 @@ from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile
 from fastapi.responses import HTMLResponse
 from sqlalchemy.orm import Session
 from app.services import google_sheets
 from .decks import list_decks
 from .games import create_game, latest_game, list_games
 from .players import list_players
@ -86,10 +88,10 @@ def seed_games(file: UploadFile, db: Session = Depends(get_db)):
    return "OK!"
-@api_router.post("/all_in_one")
+@api_router.post("/from_google_sheets")
-def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
+def from_google_sheets(db: Session = Depends(get_db)):
-    file_contents = file.file.read().decode("utf-8").split("\n")
+    data = google_sheets.get_data()
-    reader = csv.DictReader(file_contents, delimiter=",")
+    reader = CSVFacade(data)
    # Fetch the currently-known-information so that we can avoid recreating existing data
    current_player_ids_by_name = {
@ -184,7 +186,7 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
    # (Again, if we _really_ cared about efficiency we could have stored this data on the first pass to avoid a
    # retraversal. I suspect that the overhead of O(2*n) vs. O(n) data-reads is going to be insignificant)
    # ((Yes, I know that's an abuse of Big-O notation, shut up - you knew what I meant :P ))
-    reader = csv.DictReader(file_contents, delimiter=",")
+    reader = CSVFacade(data)
    for row in reader:
        # Skip any games created before the date of the latest current game
        # (Note that this means that the `all_in_one` method cannot be used to backfill any previously-played games. If
@ -205,9 +207,16 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
        if not row["Winning Deck"].startswith("Tie"):
            print(f"DEBUG - checking row {row}")
-            index_of_winning_deck = [
+            try:
-                row[f"Deck {i+1}"] == row["Winning Deck"] for i in range(6)
+                index_of_winning_deck = [
-            ].index(True)
+                    row[f"Deck {i+1}"] == row["Winning Deck"] for i in range(6)
                ].index(True)
            except ValueError:
                raise HTTPException(
                    status_code=400,
                    detail=f"Error when processing row {row_idx}, game on {row['Date']}: Winning Deck is named {row['Winning Deck']}, but no deck with that name was found",
                )
            created_game = create_game(
                schemas.GameCreate(
                    date=date_of_current_row,
@ -294,6 +303,37 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
    return "Ok!"
 # Facade class to imitate the CSV-reader-interface when passed a list[list[str]]
 # as we get from the Google Sheets API
 # dict[str, str], where the keys are the header names and the values are the row values
 class CSVFacade:
    def __init__(self, data: list[list[str]]):
        self.headers = data[0]
        self.data = data[1:]
        logging.critical(f"Headers: {self.headers}")
        logging.critical(f"First row: {self.data[0]}")
    def __iter__(self):
        self.index = 0
        return self
    def __next__(self):
        if self.index >= len(self.data):
            raise StopIteration
        result = self.data[self.index]
        self.index += 1
        if len(self.headers) != len(result):
            # If the last column - "notes" - is empty, fill it in with an empty string.
            if len(result) == len(self.headers) - 1:
                result.append("")
            # If there's any other kind of discrepancy, though, that's probably an error
            else:
                raise ValueError(
                    f"Row {self.index} has {len(result)} columns, but {len(self.headers)} are expected"
                )
        return dict(zip(self.headers, result))
@html_router.get("/")
 def main(request: Request, db=Depends(get_db)):
    return jinja_templates.TemplateResponse(
--- a/app/services/google_sheets.py
+++ b/app/services/google_sheets.py
@ -0,0 +1,63 @@
 from google.oauth2 import service_account
 from googleapiclient.discovery import build
 import os
 def get_data() -> list[list[str]]:
    spreadsheet_id = os.getenv("SPREADSHEET_ID")
    path_to_credentials = os.getenv("PATH_TO_GOOGLE_SHEETS_CREDENTIALS")
    resp = []
    # Hardcoding the format (and page names) to the spreadsheet that my group uses, because I'm lazy -
    # should be easy to change those tousing environment variables if needed
    for idx, year in enumerate(["2024", "2025"]):
        # Assumes that the headers are the same for all years
        if idx == 0:
            resp = read_sheet(spreadsheet_id, f"{year} games", path_to_credentials)
        else:
            # Drop the headers from this year, because they were already added by the previous year.
            resp.extend(
                read_sheet(spreadsheet_id, f"{year} games", path_to_credentials)[1:]
            )
    return resp
 # The scope for Google Sheets API
 SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly"]
 def get_service_account_credentials(credentials_path):
    """Gets service account credentials from a JSON key file.
    Args:
        credentials_path: Path to the service account JSON key file
    Returns:
        Credentials object for the service account
    """
    return service_account.Credentials.from_service_account_file(
        credentials_path, scopes=SCOPES
    )
 def read_sheet(spreadsheet_id, range_name, credentials_path):
    """Reads data from a Google Sheet using service account credentials.
    Args:
        spreadsheet_id: The ID of the spreadsheet to read from
        range_name: The A1 notation of the range to read from (e.g., 'Sheet1!A1:D10')
        credentials_path: Path to the service account JSON key file
    Returns:
        A list of lists containing the values from the specified range
    """
    creds = get_service_account_credentials(credentials_path)
    service = build("sheets", "v4", credentials=creds)
    # Call the Sheets API
    sheet = service.spreadsheets()
    result = (
        sheet.values().get(spreadsheetId=spreadsheet_id, range=range_name).execute()
    )
    values = result.get("values", [])
    return values
--- a/app/templates/seed.html
+++ b/app/templates/seed.html
@ -35,10 +35,8 @@
 </div>
 <hr/>
 <div>
-    <form action="/api/seed/all_in_one" method="post" enctype="multipart/form-data">
+    <form action="/api/seed/from_google_sheets" method="post" enctype="multipart/form-data">
-        <label for="file">Upload All-in-One</label>
+        <input type="submit">Trigger Seed from Google Sheets</button>
        <input type="file" id="file" name="file" accept=".csv"/>
        <input type="submit">Upload</button>
    </form>
 </div>
 {% endblock %}
--- a/compose.yaml
+++ b/compose.yaml
@ -4,12 +4,16 @@ services:
    build:
      context: .
      target: dev
    environment:
      SPREADSHEET_ID: "1ITgXXfq7KaNP8JTQMvoZJSbu7zPpCcfNio_aooULRfc"
      PATH_TO_GOOGLE_SHEETS_CREDENTIALS: "./google_sheets_credentials.json"
    volumes:
      - type: bind
        source: ./app/
        # Yes, really - we're using `/app` as the WD within the container, but `uvicorn` requires an import path.
        target: /app/app
      - ./local-run-log-config.yaml:/app/local-run-log-config.yaml:delegated
      - /Users/scubbo/Downloads/google_sheets_credentials.json:/app/google_sheets_credentials.json
    ports:
      - 8000:8000
--- a/requirements.txt
+++ b/requirements.txt
@ -5,3 +5,6 @@ uvicorn
 Jinja2
 pyyaml
 PyFunctional
 google-api-python-client
 google-auth-oauthlib
 google-auth-httplib2
--- a/tests/routers/test_seed.py
+++ b/tests/routers/test_seed.py
@ -0,0 +1,15 @@
 # A test for the hacky workaround class I made that imitates the CSV-reader-interface when passed a list[list[str]]
 # as we get from the Google Sheets API
 from app.routers.seed import CSVFacade
 def test_csv_facade():
    facade = CSVFacade([["a", "b", "c"], ["1", "2", "3"], ["4", "5", "6"]])
    for i, row in enumerate(facade):
        if i == 0:
            assert row == {"a": "1", "b": "2", "c": "3"}
        elif i == 1:
            assert row == {"a": "4", "b": "5", "c": "6"}
        else:
            raise ValueError(f"Expected 2 rows, but got {i+1}")
--- a/tests/test_fresh_db_tests.py
+++ b/tests/test_fresh_db_tests.py
@ -1,3 +1,4 @@
 import pytest
 from typing import Mapping
 import httpx
@ -68,6 +69,13 @@ def test_add_and_retrieve_deck(test_client: TestClient, cleanups):
    cleanups.add_success(success_cleanup)
 # Keeping this around because it would be useful to reintroduce it if I factor out data-sourcing
 # (I did briefly try doing so, but because the logic of full-seeding involves recreating the iterable,
 # and the two different data sources have different object-types, it wasn't obvious how to reinitialize.
 # Probably would be possible, but given that I'm likely gonna deprecate the csv-upload model anyway, probably not worth it.)
@pytest.mark.skip(
    reason="Moved from an injected-data model, to reading from Google Sheets"
 )
 def test_incremental_add_of_games(test_client: TestClient, cleanups):
    latest_game_response = _json_get(test_client, "/game/latest_game")
    assert latest_game_response.status_code == 404