Introduce ability to seed data directly from GSheet
All checks were successful
Publish / build-and-push (push) Successful in 6m14s

Reasonably hacky, in that I introduce a facade to reuse the data format
previously provided by the `csv` module, rather than using the
`list[list[str]]` directly.

Next I want to introduce something like Celery to continually refresh.

Note that this will require changes to the deployment repo in order to
provide the required secrets.
This commit is contained in:
Jack Jackson 2025-04-17 22:17:26 -07:00
parent 0434ec1e98
commit 9b4e6c3b4d
8 changed files with 157 additions and 12 deletions

View File

@ -1,3 +1,17 @@
[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
This is a score tracker for an EDH ("Commander") group.
# Reading from a Google Sheet
Eventually, I'd want this application to be data-resilient enough to be the primary store of data. For now, though, I'm reading data in from a(n impressive!) Google Sheet maintained by my playgroup.
If you want to enable that syncing, you'll need to:
* Go to the [Google Cloud Console](https://console.cloud.google.com)
* If you haven't got one already, create a Project
* Go to "IAM and admin > Service Accounts"
* Create a new Service Account
* Note the resultant email address, and share the Google Sheet with it
If you haven't used the Google Sheets API in your Project, you may need to enable it, at `console.cloud.google.com/apis/library/sheets.googleapis.com?project=<your_project_slug>`

View File

@ -9,6 +9,8 @@ from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile
from fastapi.responses import HTMLResponse
from sqlalchemy.orm import Session
from app.services import google_sheets
from .decks import list_decks
from .games import create_game, latest_game, list_games
from .players import list_players
@ -86,10 +88,10 @@ def seed_games(file: UploadFile, db: Session = Depends(get_db)):
return "OK!"
@api_router.post("/all_in_one")
def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
file_contents = file.file.read().decode("utf-8").split("\n")
reader = csv.DictReader(file_contents, delimiter=",")
@api_router.post("/from_google_sheets")
def from_google_sheets(db: Session = Depends(get_db)):
data = google_sheets.get_data()
reader = CSVFacade(data)
# Fetch the currently-known-information so that we can avoid recreating existing data
current_player_ids_by_name = {
@ -184,7 +186,7 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
# (Again, if we _really_ cared about efficiency we could have stored this data on the first pass to avoid a
# retraversal. I suspect that the overhead of O(2*n) vs. O(n) data-reads is going to be insignificant)
# ((Yes, I know that's an abuse of Big-O notation, shut up - you knew what I meant :P ))
reader = csv.DictReader(file_contents, delimiter=",")
reader = CSVFacade(data)
for row in reader:
# Skip any games created before the date of the latest current game
# (Note that this means that the `all_in_one` method cannot be used to backfill any previously-played games. If
@ -205,9 +207,16 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
if not row["Winning Deck"].startswith("Tie"):
print(f"DEBUG - checking row {row}")
try:
index_of_winning_deck = [
row[f"Deck {i+1}"] == row["Winning Deck"] for i in range(6)
].index(True)
except ValueError:
raise HTTPException(
status_code=400,
detail=f"Error when processing row {row_idx}, game on {row['Date']}: Winning Deck is named {row['Winning Deck']}, but no deck with that name was found",
)
created_game = create_game(
schemas.GameCreate(
date=date_of_current_row,
@ -294,6 +303,37 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
return "Ok!"
# Facade class to imitate the CSV-reader-interface when passed a list[list[str]]
# as we get from the Google Sheets API
# dict[str, str], where the keys are the header names and the values are the row values
class CSVFacade:
def __init__(self, data: list[list[str]]):
self.headers = data[0]
self.data = data[1:]
logging.critical(f"Headers: {self.headers}")
logging.critical(f"First row: {self.data[0]}")
def __iter__(self):
self.index = 0
return self
def __next__(self):
if self.index >= len(self.data):
raise StopIteration
result = self.data[self.index]
self.index += 1
if len(self.headers) != len(result):
# If the last column - "notes" - is empty, fill it in with an empty string.
if len(result) == len(self.headers) - 1:
result.append("")
# If there's any other kind of discrepancy, though, that's probably an error
else:
raise ValueError(
f"Row {self.index} has {len(result)} columns, but {len(self.headers)} are expected"
)
return dict(zip(self.headers, result))
@html_router.get("/")
def main(request: Request, db=Depends(get_db)):
return jinja_templates.TemplateResponse(

View File

@ -0,0 +1,63 @@
from google.oauth2 import service_account
from googleapiclient.discovery import build
import os
def get_data() -> list[list[str]]:
spreadsheet_id = os.getenv("SPREADSHEET_ID")
path_to_credentials = os.getenv("PATH_TO_GOOGLE_SHEETS_CREDENTIALS")
resp = []
# Hardcoding the format (and page names) to the spreadsheet that my group uses, because I'm lazy -
# should be easy to change those tousing environment variables if needed
for idx, year in enumerate(["2024", "2025"]):
# Assumes that the headers are the same for all years
if idx == 0:
resp = read_sheet(spreadsheet_id, f"{year} games", path_to_credentials)
else:
# Drop the headers from this year, because they were already added by the previous year.
resp.extend(
read_sheet(spreadsheet_id, f"{year} games", path_to_credentials)[1:]
)
return resp
# The scope for Google Sheets API
SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly"]
def get_service_account_credentials(credentials_path):
"""Gets service account credentials from a JSON key file.
Args:
credentials_path: Path to the service account JSON key file
Returns:
Credentials object for the service account
"""
return service_account.Credentials.from_service_account_file(
credentials_path, scopes=SCOPES
)
def read_sheet(spreadsheet_id, range_name, credentials_path):
"""Reads data from a Google Sheet using service account credentials.
Args:
spreadsheet_id: The ID of the spreadsheet to read from
range_name: The A1 notation of the range to read from (e.g., 'Sheet1!A1:D10')
credentials_path: Path to the service account JSON key file
Returns:
A list of lists containing the values from the specified range
"""
creds = get_service_account_credentials(credentials_path)
service = build("sheets", "v4", credentials=creds)
# Call the Sheets API
sheet = service.spreadsheets()
result = (
sheet.values().get(spreadsheetId=spreadsheet_id, range=range_name).execute()
)
values = result.get("values", [])
return values

View File

@ -35,10 +35,8 @@
</div>
<hr/>
<div>
<form action="/api/seed/all_in_one" method="post" enctype="multipart/form-data">
<label for="file">Upload All-in-One</label>
<input type="file" id="file" name="file" accept=".csv"/>
<input type="submit">Upload</button>
<form action="/api/seed/from_google_sheets" method="post" enctype="multipart/form-data">
<input type="submit">Trigger Seed from Google Sheets</button>
</form>
</div>
{% endblock %}

View File

@ -4,12 +4,16 @@ services:
build:
context: .
target: dev
environment:
SPREADSHEET_ID: "1ITgXXfq7KaNP8JTQMvoZJSbu7zPpCcfNio_aooULRfc"
PATH_TO_GOOGLE_SHEETS_CREDENTIALS: "./google_sheets_credentials.json"
volumes:
- type: bind
source: ./app/
# Yes, really - we're using `/app` as the WD within the container, but `uvicorn` requires an import path.
target: /app/app
- ./local-run-log-config.yaml:/app/local-run-log-config.yaml:delegated
- /Users/scubbo/Downloads/google_sheets_credentials.json:/app/google_sheets_credentials.json
ports:
- 8000:8000

View File

@ -5,3 +5,6 @@ uvicorn
Jinja2
pyyaml
PyFunctional
google-api-python-client
google-auth-oauthlib
google-auth-httplib2

View File

@ -0,0 +1,15 @@
# A test for the hacky workaround class I made that imitates the CSV-reader-interface when passed a list[list[str]]
# as we get from the Google Sheets API
from app.routers.seed import CSVFacade
def test_csv_facade():
facade = CSVFacade([["a", "b", "c"], ["1", "2", "3"], ["4", "5", "6"]])
for i, row in enumerate(facade):
if i == 0:
assert row == {"a": "1", "b": "2", "c": "3"}
elif i == 1:
assert row == {"a": "4", "b": "5", "c": "6"}
else:
raise ValueError(f"Expected 2 rows, but got {i+1}")

View File

@ -1,3 +1,4 @@
import pytest
from typing import Mapping
import httpx
@ -68,6 +69,13 @@ def test_add_and_retrieve_deck(test_client: TestClient, cleanups):
cleanups.add_success(success_cleanup)
# Keeping this around because it would be useful to reintroduce it if I factor out data-sourcing
# (I did briefly try doing so, but because the logic of full-seeding involves recreating the iterable,
# and the two different data sources have different object-types, it wasn't obvious how to reinitialize.
# Probably would be possible, but given that I'm likely gonna deprecate the csv-upload model anyway, probably not worth it.)
@pytest.mark.skip(
reason="Moved from an injected-data model, to reading from Google Sheets"
)
def test_incremental_add_of_games(test_client: TestClient, cleanups):
latest_game_response = _json_get(test_client, "/game/latest_game")
assert latest_game_response.status_code == 404