Add cursory "biggest movers" stats

Also adds rudimentary testing framework for seeding a database from a
given `.db` SQLite file. Probably extract this for general use!
This commit is contained in:
Jack Jackson 2024-08-23 06:16:04 -07:00
parent 460467bd0b
commit f120336f1d
7 changed files with 199 additions and 2 deletions

View File

@ -2,6 +2,7 @@ from fastapi import APIRouter, Depends, Request
from fastapi.responses import HTMLResponse from fastapi.responses import HTMLResponse
from ..sql import crud from ..sql import crud
from ..routers.stats import top_movers
from ..templates import jinja_templates, _jsonify from ..templates import jinja_templates, _jsonify
from ..sql.database import get_db from ..sql.database import get_db
@ -11,8 +12,9 @@ html_router = APIRouter(include_in_schema=False, default_response_class=HTMLResp
@html_router.get("/") @html_router.get("/")
def main(request: Request, db=Depends(get_db)): def main(request: Request, db=Depends(get_db)):
games = crud.get_games(db=db) games = crud.get_games(db=db)
movers = top_movers(db=db)
return jinja_templates.TemplateResponse( return jinja_templates.TemplateResponse(
request, "/main.html", {"games": _jsonify(games)} request, "/main.html", {"games": _jsonify(games), "top_movers": movers}
) )

View File

@ -1,5 +1,6 @@
from collections import defaultdict from collections import defaultdict
from datetime import datetime, MINYEAR from datetime import datetime, timedelta, MINYEAR
from heapq import nlargest, nsmallest
from typing import Optional from typing import Optional
from fastapi import APIRouter, Depends, Request from fastapi import APIRouter, Depends, Request
@ -76,6 +77,77 @@ def stats_graph_api(
} }
# As with many APIs, this is a candidate for parallelization if desired -
# could key by deck_id, then in parallel get scores over the time period for that deck.
# But performance isn't likely to be a big issue!
@api_router.get("/top_movers")
def top_movers(
lookback_in_days: int = 7,
number_of_movers: int = 3,
db=Depends(get_db),
):
# TODO - this will error-out on an empty database
date_of_latest_game = (
db.query(models.Game.date)
.order_by(models.Game.date.desc())
.limit(1)
.first()
._tuple()[0]
)
beginning_of_lookback = date_of_latest_game - timedelta(days=lookback_in_days)
# TODO - this mostly duplicates logic from `stats_graph_api`. Extract?
row_number_column = (
func.row_number()
.over(
partition_by=[models.Deck.name, models.Game.date],
order_by=models.EloScore.id.desc(),
)
.label("row_number")
)
sub_query = (
db.query(
models.Deck.id, models.Deck.name, models.EloScore.score, models.Game.date
)
.outerjoin(models.EloScore, models.Deck.id == models.EloScore.deck_id)
.join(models.Game, models.EloScore.after_game_id == models.Game.id)
.add_column(row_number_column)
.subquery()
)
scores = (
db.query(sub_query)
.filter(sub_query.c.row_number == 1)
.order_by(sub_query.c.date)
.all()
)
score_tracker = defaultdict(dict)
# First, get the score-per-deck at the start and end of the time period
for score in scores:
if score.date <= beginning_of_lookback:
score_tracker[score.id]["start_score"] = score.score
score_tracker[score.id]["latest_score"] = score.score
# Technically we don't need to _keep_ adding this (as it won't change for a given deck_id) - but, until/unless
# this logic is parallelized, there's no efficient way for the algorithm to know that it's operating on a deck
# that's already been seen once before
score_tracker[score.id]["name"] = score.name
# Then, find biggest movers
calculateds = [
{
"deck_id": deck_id,
"name": score_tracker[deck_id]["name"],
"start": score_tracker[deck_id]["start_score"],
"end": score_tracker[deck_id]["latest_score"],
"diff": score_tracker[deck_id]["latest_score"]
- score_tracker[deck_id]["start_score"],
}
for deck_id in score_tracker
]
return {
"positive": nlargest(number_of_movers, calculateds, key=lambda x: x["diff"]),
"negative": nsmallest(number_of_movers, calculateds, key=lambda x: x["diff"]),
}
@html_router.get("/graph") @html_router.get("/graph")
def stats_graph(request: Request, db=Depends(get_db)): def stats_graph(request: Request, db=Depends(get_db)):
return jinja_templates.TemplateResponse(request, "stats/graph.html") return jinja_templates.TemplateResponse(request, "stats/graph.html")

View File

@ -4,8 +4,58 @@
{% block head %} {% block head %}
{{ super() }} {{ super() }}
<!-- https://www.w3schools.com/css/css_tooltip.asp -->
<style>
h2 {
display: inline-block;
}
.tooltip {
position: relative;
display: inline-block;
border-bottom: 1px dotted black; /* If you want dots under the hoverable text */
}
/* Tooltip text */
.tooltip .tooltiptext {
visibility: hidden;
width: 480px;
background-color: black;
color: #fff;
text-align: center;
padding: 5px 0;
border-radius: 6px;
/* Position the tooltip text - see examples below! */
position: absolute;
z-index: 1;
}
/* Show the tooltip text when you mouse over the tooltip container */
.tooltip:hover .tooltiptext {
visibility: visible;
}
</style>
{% endblock %} {% endblock %}
{% block content %} {% block content %}
<p>Welcome to EDH ELO! Click "Games" above to see the list of Games, or "Record New Game" in the top-right to record a game</p> <p>Welcome to EDH ELO! Click "Games" above to see the list of Games, or "Record New Game" in the top-right to record a game</p>
<div>
<h2>Biggest recent movers</h2><div class="tooltip">?<span class="tooltiptext">Logic:<br/>Find the date of the latest game<br/>Look back a period of 7 days from that date<br/>Calculate score differential between those two dates for all decks<br/>Rank by that<br/><br/>TODO - add a dedicated "biggest movers" page under "Stats" where anchor dates and number-of-top-movers can be specified</span></span></div>
<h3>Positive</h3>
<ol>
{% for positive_mover in top_movers['positive'] %}
<li><strong>{{ positive_mover['name'] }}</strong> - +{{ positive_mover['diff'] }} ({{ positive_mover['start'] }} -> {{ positive_mover['end'] }})</li>
{% endfor %}
</ol>
<h3>Negative</h3>
<ol>
{% for negative_mover in top_movers['negative'] %}
<li><strong>{{ negative_mover['name'] }}</strong> - {{ negative_mover['diff'] }} ({{ negative_mover['start'] }} -> {{ negative_mover['end'] }})</li>
{% endfor %}
</ol>
</div>
{% endblock %} {% endblock %}

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,72 @@
import pathlib
import pytest
import random
import shutil
import string
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from app.sql.models import Base
from app.routers.stats import top_movers
# TODO - this is almost a copy-paste of `isolated_database` from `tests/sql/test_crud` -
# consider unifying and extracting.
@pytest.mark.parametrize("isolated_database", [["populated_db.db"]], indirect=True)
def test_initialization(isolated_database):
response = top_movers(db=isolated_database)
biggest_positive = response["positive"][0]
assert biggest_positive["deck_id"] == 74
assert biggest_positive["name"] == "Goose Mother"
assert float(biggest_positive["diff"]) == 6.758
biggest_negative = response["negative"][0]
assert biggest_negative["deck_id"] == 45
assert biggest_negative["name"] == "Ashad"
assert float(biggest_negative["diff"]) == -6.542
# This fixture expects a parameter representing the filename within `test-data/sqlite-database-snapshots` that should be
# used to initialize the database.
# See http://stackoverflow.com/a/33879151
@pytest.fixture(scope="function")
def isolated_database(request, cleanups):
database_dir = "database"
db_dir_path = pathlib.Path(database_dir)
if not db_dir_path.exists():
db_dir_path.mkdir()
db_dir_path.chmod(0o777)
isolated_db_name = f"isolated_database_{''.join([random.choice(string.ascii_lowercase) for _ in range(5)])}.db"
isolated_db_path = db_dir_path.joinpath(isolated_db_name)
seedPath = pathlib.Path("test-data").joinpath(
"sqlite-database-snapshots", request.param[0]
)
if not seedPath.exists():
raise Exception(
f"Cannot initialize a database from {seedPath} - does not exist"
)
shutil.copy(str(seedPath), isolated_db_path.absolute())
engine = create_engine(f"sqlite:///{isolated_db_path.absolute()}")
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base.metadata.create_all(bind=engine)
def success_cleanup():
isolated_db_path.unlink()
def failure_cleanup():
print(
f"Isolated database {isolated_db_path.absolute()}, used in test `{request.node.name}` at path `{request.node.path.absolute()}`, has been preserved for debugging"
)
cleanups.add_success(success_cleanup)
cleanups.add_failure(failure_cleanup)
yield SessionLocal()
# yield isolated_db_path

View File

@ -38,6 +38,7 @@ def _test_create_and_retrieve(db, name: str):
assert get_player.name == name assert get_player.name == name
# TODO - there's an almost-duplicate of this in `tests/routers/test_stats` - consider extracting.
@pytest.fixture(scope="function") @pytest.fixture(scope="function")
def isolated_database(request, cleanups): def isolated_database(request, cleanups):
database_dir = "database" database_dir = "database"