Introduce ability to seed data directly from GSheet
All checks were successful
Publish / build-and-push (push) Successful in 6m14s
All checks were successful
Publish / build-and-push (push) Successful in 6m14s
Reasonably hacky, in that I introduce a facade to reuse the data format previously provided by the `csv` module, rather than using the `list[list[str]]` directly. Next I want to introduce something like Celery to continually refresh. Note that this will require changes to the deployment repo in order to provide the required secrets.
This commit is contained in:
parent
0434ec1e98
commit
9b4e6c3b4d
14
README.md
14
README.md
@ -1,3 +1,17 @@
|
|||||||
[](https://github.com/astral-sh/ruff)
|
[](https://github.com/astral-sh/ruff)
|
||||||
|
|
||||||
This is a score tracker for an EDH ("Commander") group.
|
This is a score tracker for an EDH ("Commander") group.
|
||||||
|
|
||||||
|
# Reading from a Google Sheet
|
||||||
|
|
||||||
|
Eventually, I'd want this application to be data-resilient enough to be the primary store of data. For now, though, I'm reading data in from a(n impressive!) Google Sheet maintained by my playgroup.
|
||||||
|
|
||||||
|
If you want to enable that syncing, you'll need to:
|
||||||
|
|
||||||
|
* Go to the [Google Cloud Console](https://console.cloud.google.com)
|
||||||
|
* If you haven't got one already, create a Project
|
||||||
|
* Go to "IAM and admin > Service Accounts"
|
||||||
|
* Create a new Service Account
|
||||||
|
* Note the resultant email address, and share the Google Sheet with it
|
||||||
|
|
||||||
|
If you haven't used the Google Sheets API in your Project, you may need to enable it, at `console.cloud.google.com/apis/library/sheets.googleapis.com?project=<your_project_slug>`
|
||||||
|
@ -9,6 +9,8 @@ from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile
|
|||||||
from fastapi.responses import HTMLResponse
|
from fastapi.responses import HTMLResponse
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from app.services import google_sheets
|
||||||
|
|
||||||
from .decks import list_decks
|
from .decks import list_decks
|
||||||
from .games import create_game, latest_game, list_games
|
from .games import create_game, latest_game, list_games
|
||||||
from .players import list_players
|
from .players import list_players
|
||||||
@ -86,10 +88,10 @@ def seed_games(file: UploadFile, db: Session = Depends(get_db)):
|
|||||||
return "OK!"
|
return "OK!"
|
||||||
|
|
||||||
|
|
||||||
@api_router.post("/all_in_one")
|
@api_router.post("/from_google_sheets")
|
||||||
def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
|
def from_google_sheets(db: Session = Depends(get_db)):
|
||||||
file_contents = file.file.read().decode("utf-8").split("\n")
|
data = google_sheets.get_data()
|
||||||
reader = csv.DictReader(file_contents, delimiter=",")
|
reader = CSVFacade(data)
|
||||||
|
|
||||||
# Fetch the currently-known-information so that we can avoid recreating existing data
|
# Fetch the currently-known-information so that we can avoid recreating existing data
|
||||||
current_player_ids_by_name = {
|
current_player_ids_by_name = {
|
||||||
@ -184,7 +186,7 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
|
|||||||
# (Again, if we _really_ cared about efficiency we could have stored this data on the first pass to avoid a
|
# (Again, if we _really_ cared about efficiency we could have stored this data on the first pass to avoid a
|
||||||
# retraversal. I suspect that the overhead of O(2*n) vs. O(n) data-reads is going to be insignificant)
|
# retraversal. I suspect that the overhead of O(2*n) vs. O(n) data-reads is going to be insignificant)
|
||||||
# ((Yes, I know that's an abuse of Big-O notation, shut up - you knew what I meant :P ))
|
# ((Yes, I know that's an abuse of Big-O notation, shut up - you knew what I meant :P ))
|
||||||
reader = csv.DictReader(file_contents, delimiter=",")
|
reader = CSVFacade(data)
|
||||||
for row in reader:
|
for row in reader:
|
||||||
# Skip any games created before the date of the latest current game
|
# Skip any games created before the date of the latest current game
|
||||||
# (Note that this means that the `all_in_one` method cannot be used to backfill any previously-played games. If
|
# (Note that this means that the `all_in_one` method cannot be used to backfill any previously-played games. If
|
||||||
@ -205,9 +207,16 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
|
|||||||
|
|
||||||
if not row["Winning Deck"].startswith("Tie"):
|
if not row["Winning Deck"].startswith("Tie"):
|
||||||
print(f"DEBUG - checking row {row}")
|
print(f"DEBUG - checking row {row}")
|
||||||
index_of_winning_deck = [
|
try:
|
||||||
row[f"Deck {i+1}"] == row["Winning Deck"] for i in range(6)
|
index_of_winning_deck = [
|
||||||
].index(True)
|
row[f"Deck {i+1}"] == row["Winning Deck"] for i in range(6)
|
||||||
|
].index(True)
|
||||||
|
except ValueError:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"Error when processing row {row_idx}, game on {row['Date']}: Winning Deck is named {row['Winning Deck']}, but no deck with that name was found",
|
||||||
|
)
|
||||||
|
|
||||||
created_game = create_game(
|
created_game = create_game(
|
||||||
schemas.GameCreate(
|
schemas.GameCreate(
|
||||||
date=date_of_current_row,
|
date=date_of_current_row,
|
||||||
@ -294,6 +303,37 @@ def all_in_one(file: UploadFile, db: Session = Depends(get_db)):
|
|||||||
return "Ok!"
|
return "Ok!"
|
||||||
|
|
||||||
|
|
||||||
|
# Facade class to imitate the CSV-reader-interface when passed a list[list[str]]
|
||||||
|
# as we get from the Google Sheets API
|
||||||
|
# dict[str, str], where the keys are the header names and the values are the row values
|
||||||
|
class CSVFacade:
|
||||||
|
def __init__(self, data: list[list[str]]):
|
||||||
|
self.headers = data[0]
|
||||||
|
self.data = data[1:]
|
||||||
|
logging.critical(f"Headers: {self.headers}")
|
||||||
|
logging.critical(f"First row: {self.data[0]}")
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
self.index = 0
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
if self.index >= len(self.data):
|
||||||
|
raise StopIteration
|
||||||
|
result = self.data[self.index]
|
||||||
|
self.index += 1
|
||||||
|
if len(self.headers) != len(result):
|
||||||
|
# If the last column - "notes" - is empty, fill it in with an empty string.
|
||||||
|
if len(result) == len(self.headers) - 1:
|
||||||
|
result.append("")
|
||||||
|
# If there's any other kind of discrepancy, though, that's probably an error
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Row {self.index} has {len(result)} columns, but {len(self.headers)} are expected"
|
||||||
|
)
|
||||||
|
return dict(zip(self.headers, result))
|
||||||
|
|
||||||
|
|
||||||
@html_router.get("/")
|
@html_router.get("/")
|
||||||
def main(request: Request, db=Depends(get_db)):
|
def main(request: Request, db=Depends(get_db)):
|
||||||
return jinja_templates.TemplateResponse(
|
return jinja_templates.TemplateResponse(
|
||||||
|
63
app/services/google_sheets.py
Normal file
63
app/services/google_sheets.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
from google.oauth2 import service_account
|
||||||
|
from googleapiclient.discovery import build
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def get_data() -> list[list[str]]:
|
||||||
|
spreadsheet_id = os.getenv("SPREADSHEET_ID")
|
||||||
|
path_to_credentials = os.getenv("PATH_TO_GOOGLE_SHEETS_CREDENTIALS")
|
||||||
|
resp = []
|
||||||
|
# Hardcoding the format (and page names) to the spreadsheet that my group uses, because I'm lazy -
|
||||||
|
# should be easy to change those tousing environment variables if needed
|
||||||
|
for idx, year in enumerate(["2024", "2025"]):
|
||||||
|
# Assumes that the headers are the same for all years
|
||||||
|
if idx == 0:
|
||||||
|
resp = read_sheet(spreadsheet_id, f"{year} games", path_to_credentials)
|
||||||
|
else:
|
||||||
|
# Drop the headers from this year, because they were already added by the previous year.
|
||||||
|
resp.extend(
|
||||||
|
read_sheet(spreadsheet_id, f"{year} games", path_to_credentials)[1:]
|
||||||
|
)
|
||||||
|
return resp
|
||||||
|
|
||||||
|
|
||||||
|
# The scope for Google Sheets API
|
||||||
|
SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly"]
|
||||||
|
|
||||||
|
|
||||||
|
def get_service_account_credentials(credentials_path):
|
||||||
|
"""Gets service account credentials from a JSON key file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
credentials_path: Path to the service account JSON key file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Credentials object for the service account
|
||||||
|
"""
|
||||||
|
return service_account.Credentials.from_service_account_file(
|
||||||
|
credentials_path, scopes=SCOPES
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def read_sheet(spreadsheet_id, range_name, credentials_path):
|
||||||
|
"""Reads data from a Google Sheet using service account credentials.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
spreadsheet_id: The ID of the spreadsheet to read from
|
||||||
|
range_name: The A1 notation of the range to read from (e.g., 'Sheet1!A1:D10')
|
||||||
|
credentials_path: Path to the service account JSON key file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of lists containing the values from the specified range
|
||||||
|
"""
|
||||||
|
creds = get_service_account_credentials(credentials_path)
|
||||||
|
service = build("sheets", "v4", credentials=creds)
|
||||||
|
|
||||||
|
# Call the Sheets API
|
||||||
|
sheet = service.spreadsheets()
|
||||||
|
result = (
|
||||||
|
sheet.values().get(spreadsheetId=spreadsheet_id, range=range_name).execute()
|
||||||
|
)
|
||||||
|
|
||||||
|
values = result.get("values", [])
|
||||||
|
return values
|
@ -35,10 +35,8 @@
|
|||||||
</div>
|
</div>
|
||||||
<hr/>
|
<hr/>
|
||||||
<div>
|
<div>
|
||||||
<form action="/api/seed/all_in_one" method="post" enctype="multipart/form-data">
|
<form action="/api/seed/from_google_sheets" method="post" enctype="multipart/form-data">
|
||||||
<label for="file">Upload All-in-One</label>
|
<input type="submit">Trigger Seed from Google Sheets</button>
|
||||||
<input type="file" id="file" name="file" accept=".csv"/>
|
|
||||||
<input type="submit">Upload</button>
|
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
@ -4,12 +4,16 @@ services:
|
|||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
target: dev
|
target: dev
|
||||||
|
environment:
|
||||||
|
SPREADSHEET_ID: "1ITgXXfq7KaNP8JTQMvoZJSbu7zPpCcfNio_aooULRfc"
|
||||||
|
PATH_TO_GOOGLE_SHEETS_CREDENTIALS: "./google_sheets_credentials.json"
|
||||||
volumes:
|
volumes:
|
||||||
- type: bind
|
- type: bind
|
||||||
source: ./app/
|
source: ./app/
|
||||||
# Yes, really - we're using `/app` as the WD within the container, but `uvicorn` requires an import path.
|
# Yes, really - we're using `/app` as the WD within the container, but `uvicorn` requires an import path.
|
||||||
target: /app/app
|
target: /app/app
|
||||||
- ./local-run-log-config.yaml:/app/local-run-log-config.yaml:delegated
|
- ./local-run-log-config.yaml:/app/local-run-log-config.yaml:delegated
|
||||||
|
- /Users/scubbo/Downloads/google_sheets_credentials.json:/app/google_sheets_credentials.json
|
||||||
ports:
|
ports:
|
||||||
- 8000:8000
|
- 8000:8000
|
||||||
|
|
||||||
|
@ -5,3 +5,6 @@ uvicorn
|
|||||||
Jinja2
|
Jinja2
|
||||||
pyyaml
|
pyyaml
|
||||||
PyFunctional
|
PyFunctional
|
||||||
|
google-api-python-client
|
||||||
|
google-auth-oauthlib
|
||||||
|
google-auth-httplib2
|
||||||
|
15
tests/routers/test_seed.py
Normal file
15
tests/routers/test_seed.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
# A test for the hacky workaround class I made that imitates the CSV-reader-interface when passed a list[list[str]]
|
||||||
|
# as we get from the Google Sheets API
|
||||||
|
|
||||||
|
from app.routers.seed import CSVFacade
|
||||||
|
|
||||||
|
|
||||||
|
def test_csv_facade():
|
||||||
|
facade = CSVFacade([["a", "b", "c"], ["1", "2", "3"], ["4", "5", "6"]])
|
||||||
|
for i, row in enumerate(facade):
|
||||||
|
if i == 0:
|
||||||
|
assert row == {"a": "1", "b": "2", "c": "3"}
|
||||||
|
elif i == 1:
|
||||||
|
assert row == {"a": "4", "b": "5", "c": "6"}
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Expected 2 rows, but got {i+1}")
|
@ -1,3 +1,4 @@
|
|||||||
|
import pytest
|
||||||
from typing import Mapping
|
from typing import Mapping
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
@ -68,6 +69,13 @@ def test_add_and_retrieve_deck(test_client: TestClient, cleanups):
|
|||||||
cleanups.add_success(success_cleanup)
|
cleanups.add_success(success_cleanup)
|
||||||
|
|
||||||
|
|
||||||
|
# Keeping this around because it would be useful to reintroduce it if I factor out data-sourcing
|
||||||
|
# (I did briefly try doing so, but because the logic of full-seeding involves recreating the iterable,
|
||||||
|
# and the two different data sources have different object-types, it wasn't obvious how to reinitialize.
|
||||||
|
# Probably would be possible, but given that I'm likely gonna deprecate the csv-upload model anyway, probably not worth it.)
|
||||||
|
@pytest.mark.skip(
|
||||||
|
reason="Moved from an injected-data model, to reading from Google Sheets"
|
||||||
|
)
|
||||||
def test_incremental_add_of_games(test_client: TestClient, cleanups):
|
def test_incremental_add_of_games(test_client: TestClient, cleanups):
|
||||||
latest_game_response = _json_get(test_client, "/game/latest_game")
|
latest_game_response = _json_get(test_client, "/game/latest_game")
|
||||||
assert latest_game_response.status_code == 404
|
assert latest_game_response.status_code == 404
|
||||||
|
Loading…
x
Reference in New Issue
Block a user