From 9b4e6c3b4d852883a372332461253ef9eae6d014 Mon Sep 17 00:00:00 2001 From: Jack Jackson Date: Thu, 17 Apr 2025 22:17:26 -0700 Subject: [PATCH] Introduce ability to seed data directly from GSheet Reasonably hacky, in that I introduce a facade to reuse the data format previously provided by the `csv` module, rather than using the `list[list[str]]` directly. Next I want to introduce something like Celery to continually refresh. Note that this will require changes to the deployment repo in order to provide the required secrets. --- README.md | 14 ++++++++ app/routers/seed.py | 56 ++++++++++++++++++++++++++----- app/services/google_sheets.py | 63 +++++++++++++++++++++++++++++++++++ app/templates/seed.html | 6 ++-- compose.yaml | 4 +++ requirements.txt | 3 ++ tests/routers/test_seed.py | 15 +++++++++ tests/test_fresh_db_tests.py | 8 +++++ 8 files changed, 157 insertions(+), 12 deletions(-) create mode 100644 app/services/google_sheets.py create mode 100644 tests/routers/test_seed.py diff --git a/README.md b/README.md index 8620427..aa6e974 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,17 @@ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) This is a score tracker for an EDH ("Commander") group. + +# Reading from a Google Sheet + +Eventually, I'd want this application to be data-resilient enough to be the primary store of data. For now, though, I'm reading data in from a(n impressive!) Google Sheet maintained by my playgroup. + +If you want to enable that syncing, you'll need to: + +* Go to the [Google Cloud Console](https://console.cloud.google.com) + * If you haven't got one already, create a Project +* Go to "IAM and admin > Service Accounts" +* Create a new Service Account +* Note the resultant email address, and share the Google Sheet with it + +If you haven't used the Google Sheets API in your Project, you may need to enable it, at `console.cloud.google.com/apis/library/sheets.googleapis.com?project=` diff --git a/app/routers/seed.py b/app/routers/seed.py index dcfcc8b..ed19763 100644 --- a/app/routers/seed.py +++ b/app/routers/seed.py @@ -9,6 +9,8 @@ from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile from fastapi.responses import HTMLResponse from sqlalchemy.orm import Session +from app.services import google_sheets + from .decks import list_decks from .games import create_game, latest_game, list_games from .players import list_players @@ -86,10 +88,10 @@ def seed_games(file: UploadFile, db: Session = Depends(get_db)): return "OK!" -@api_router.post("/all_in_one") -def all_in_one(file: UploadFile, db: Session = Depends(get_db)): - file_contents = file.file.read().decode("utf-8").split("\n") - reader = csv.DictReader(file_contents, delimiter=",") +@api_router.post("/from_google_sheets") +def from_google_sheets(db: Session = Depends(get_db)): + data = google_sheets.get_data() + reader = CSVFacade(data) # Fetch the currently-known-information so that we can avoid recreating existing data current_player_ids_by_name = { @@ -184,7 +186,7 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)): # (Again, if we _really_ cared about efficiency we could have stored this data on the first pass to avoid a # retraversal. I suspect that the overhead of O(2*n) vs. O(n) data-reads is going to be insignificant) # ((Yes, I know that's an abuse of Big-O notation, shut up - you knew what I meant :P )) - reader = csv.DictReader(file_contents, delimiter=",") + reader = CSVFacade(data) for row in reader: # Skip any games created before the date of the latest current game # (Note that this means that the `all_in_one` method cannot be used to backfill any previously-played games. If @@ -205,9 +207,16 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)): if not row["Winning Deck"].startswith("Tie"): print(f"DEBUG - checking row {row}") - index_of_winning_deck = [ - row[f"Deck {i+1}"] == row["Winning Deck"] for i in range(6) - ].index(True) + try: + index_of_winning_deck = [ + row[f"Deck {i+1}"] == row["Winning Deck"] for i in range(6) + ].index(True) + except ValueError: + raise HTTPException( + status_code=400, + detail=f"Error when processing row {row_idx}, game on {row['Date']}: Winning Deck is named {row['Winning Deck']}, but no deck with that name was found", + ) + created_game = create_game( schemas.GameCreate( date=date_of_current_row, @@ -294,6 +303,37 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)): return "Ok!" +# Facade class to imitate the CSV-reader-interface when passed a list[list[str]] +# as we get from the Google Sheets API +# dict[str, str], where the keys are the header names and the values are the row values +class CSVFacade: + def __init__(self, data: list[list[str]]): + self.headers = data[0] + self.data = data[1:] + logging.critical(f"Headers: {self.headers}") + logging.critical(f"First row: {self.data[0]}") + + def __iter__(self): + self.index = 0 + return self + + def __next__(self): + if self.index >= len(self.data): + raise StopIteration + result = self.data[self.index] + self.index += 1 + if len(self.headers) != len(result): + # If the last column - "notes" - is empty, fill it in with an empty string. + if len(result) == len(self.headers) - 1: + result.append("") + # If there's any other kind of discrepancy, though, that's probably an error + else: + raise ValueError( + f"Row {self.index} has {len(result)} columns, but {len(self.headers)} are expected" + ) + return dict(zip(self.headers, result)) + + @html_router.get("/") def main(request: Request, db=Depends(get_db)): return jinja_templates.TemplateResponse( diff --git a/app/services/google_sheets.py b/app/services/google_sheets.py new file mode 100644 index 0000000..541bb5b --- /dev/null +++ b/app/services/google_sheets.py @@ -0,0 +1,63 @@ +from google.oauth2 import service_account +from googleapiclient.discovery import build +import os + + +def get_data() -> list[list[str]]: + spreadsheet_id = os.getenv("SPREADSHEET_ID") + path_to_credentials = os.getenv("PATH_TO_GOOGLE_SHEETS_CREDENTIALS") + resp = [] + # Hardcoding the format (and page names) to the spreadsheet that my group uses, because I'm lazy - + # should be easy to change those tousing environment variables if needed + for idx, year in enumerate(["2024", "2025"]): + # Assumes that the headers are the same for all years + if idx == 0: + resp = read_sheet(spreadsheet_id, f"{year} games", path_to_credentials) + else: + # Drop the headers from this year, because they were already added by the previous year. + resp.extend( + read_sheet(spreadsheet_id, f"{year} games", path_to_credentials)[1:] + ) + return resp + + +# The scope for Google Sheets API +SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly"] + + +def get_service_account_credentials(credentials_path): + """Gets service account credentials from a JSON key file. + + Args: + credentials_path: Path to the service account JSON key file + + Returns: + Credentials object for the service account + """ + return service_account.Credentials.from_service_account_file( + credentials_path, scopes=SCOPES + ) + + +def read_sheet(spreadsheet_id, range_name, credentials_path): + """Reads data from a Google Sheet using service account credentials. + + Args: + spreadsheet_id: The ID of the spreadsheet to read from + range_name: The A1 notation of the range to read from (e.g., 'Sheet1!A1:D10') + credentials_path: Path to the service account JSON key file + + Returns: + A list of lists containing the values from the specified range + """ + creds = get_service_account_credentials(credentials_path) + service = build("sheets", "v4", credentials=creds) + + # Call the Sheets API + sheet = service.spreadsheets() + result = ( + sheet.values().get(spreadsheetId=spreadsheet_id, range=range_name).execute() + ) + + values = result.get("values", []) + return values diff --git a/app/templates/seed.html b/app/templates/seed.html index b8b22f4..dada4a9 100644 --- a/app/templates/seed.html +++ b/app/templates/seed.html @@ -35,10 +35,8 @@
-
- - - Upload + + Trigger Seed from Google Sheets
{% endblock %} \ No newline at end of file diff --git a/compose.yaml b/compose.yaml index 2d28771..84406c6 100644 --- a/compose.yaml +++ b/compose.yaml @@ -4,12 +4,16 @@ services: build: context: . target: dev + environment: + SPREADSHEET_ID: "1ITgXXfq7KaNP8JTQMvoZJSbu7zPpCcfNio_aooULRfc" + PATH_TO_GOOGLE_SHEETS_CREDENTIALS: "./google_sheets_credentials.json" volumes: - type: bind source: ./app/ # Yes, really - we're using `/app` as the WD within the container, but `uvicorn` requires an import path. target: /app/app - ./local-run-log-config.yaml:/app/local-run-log-config.yaml:delegated + - /Users/scubbo/Downloads/google_sheets_credentials.json:/app/google_sheets_credentials.json ports: - 8000:8000 diff --git a/requirements.txt b/requirements.txt index b340c49..2307e35 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,6 @@ uvicorn Jinja2 pyyaml PyFunctional +google-api-python-client +google-auth-oauthlib +google-auth-httplib2 diff --git a/tests/routers/test_seed.py b/tests/routers/test_seed.py new file mode 100644 index 0000000..4733b8f --- /dev/null +++ b/tests/routers/test_seed.py @@ -0,0 +1,15 @@ +# A test for the hacky workaround class I made that imitates the CSV-reader-interface when passed a list[list[str]] +# as we get from the Google Sheets API + +from app.routers.seed import CSVFacade + + +def test_csv_facade(): + facade = CSVFacade([["a", "b", "c"], ["1", "2", "3"], ["4", "5", "6"]]) + for i, row in enumerate(facade): + if i == 0: + assert row == {"a": "1", "b": "2", "c": "3"} + elif i == 1: + assert row == {"a": "4", "b": "5", "c": "6"} + else: + raise ValueError(f"Expected 2 rows, but got {i+1}") diff --git a/tests/test_fresh_db_tests.py b/tests/test_fresh_db_tests.py index 1e849e7..3cd84bd 100644 --- a/tests/test_fresh_db_tests.py +++ b/tests/test_fresh_db_tests.py @@ -1,3 +1,4 @@ +import pytest from typing import Mapping import httpx @@ -68,6 +69,13 @@ def test_add_and_retrieve_deck(test_client: TestClient, cleanups): cleanups.add_success(success_cleanup) +# Keeping this around because it would be useful to reintroduce it if I factor out data-sourcing +# (I did briefly try doing so, but because the logic of full-seeding involves recreating the iterable, +# and the two different data sources have different object-types, it wasn't obvious how to reinitialize. +# Probably would be possible, but given that I'm likely gonna deprecate the csv-upload model anyway, probably not worth it.) +@pytest.mark.skip( + reason="Moved from an injected-data model, to reading from Google Sheets" +) def test_incremental_add_of_games(test_client: TestClient, cleanups): latest_game_response = _json_get(test_client, "/game/latest_game") assert latest_game_response.status_code == 404