Add cursory "biggest movers" stats

Also adds rudimentary testing framework for seeding a database from a given `.db` SQLite file. Probably extract this for general use!
2024-08-23 06:16:04 -07:00 · 2024-08-23 06:16:04 -07:00 · f120336f1d
commit f120336f1d
parent 460467bd0b
7 changed files with 199 additions and 2 deletions
--- a/app/routers/base.py
+++ b/app/routers/base.py
@ -2,6 +2,7 @@ from fastapi import APIRouter, Depends, Request
 from fastapi.responses import HTMLResponse

 from ..sql import crud
+from ..routers.stats import top_movers
 from ..templates import jinja_templates, _jsonify
 from ..sql.database import get_db

@ -11,8 +12,9 @@ html_router = APIRouter(include_in_schema=False, default_response_class=HTMLResp
@html_router.get("/")
 def main(request: Request, db=Depends(get_db)):
    games = crud.get_games(db=db)
+    movers = top_movers(db=db)
    return jinja_templates.TemplateResponse(
-        request, "/main.html", {"games": _jsonify(games)}
+        request, "/main.html", {"games": _jsonify(games), "top_movers": movers}
    )


--- a/app/routers/stats.py
+++ b/app/routers/stats.py
@ -1,5 +1,6 @@
 from collections import defaultdict
-from datetime import datetime, MINYEAR
+from datetime import datetime, timedelta, MINYEAR
+from heapq import nlargest, nsmallest
 from typing import Optional

 from fastapi import APIRouter, Depends, Request
@ -76,6 +77,77 @@ def stats_graph_api(
    }


+# As with many APIs, this is a candidate for parallelization if desired -
+# could key by deck_id, then in parallel get scores over the time period for that deck.
+# But performance isn't likely to be a big issue!
+@api_router.get("/top_movers")
+def top_movers(
+    lookback_in_days: int = 7,
+    number_of_movers: int = 3,
+    db=Depends(get_db),
+):
+    # TODO - this will error-out on an empty database
+    date_of_latest_game = (
+        db.query(models.Game.date)
+        .order_by(models.Game.date.desc())
+        .limit(1)
+        .first()
+        ._tuple()[0]
+    )
+    beginning_of_lookback = date_of_latest_game - timedelta(days=lookback_in_days)
+
+    # TODO - this mostly duplicates logic from `stats_graph_api`. Extract?
+    row_number_column = (
+        func.row_number()
+        .over(
+            partition_by=[models.Deck.name, models.Game.date],
+            order_by=models.EloScore.id.desc(),
+        )
+        .label("row_number")
+    )
+    sub_query = (
+        db.query(
+            models.Deck.id, models.Deck.name, models.EloScore.score, models.Game.date
+        )
+        .outerjoin(models.EloScore, models.Deck.id == models.EloScore.deck_id)
+        .join(models.Game, models.EloScore.after_game_id == models.Game.id)
+        .add_column(row_number_column)
+        .subquery()
+    )
+    scores = (
+        db.query(sub_query)
+        .filter(sub_query.c.row_number == 1)
+        .order_by(sub_query.c.date)
+        .all()
+    )
+    score_tracker = defaultdict(dict)
+    # First, get the score-per-deck at the start and end of the time period
+    for score in scores:
+        if score.date <= beginning_of_lookback:
+            score_tracker[score.id]["start_score"] = score.score
+        score_tracker[score.id]["latest_score"] = score.score
+        # Technically we don't need to _keep_ adding this (as it won't change for a given deck_id) - but, until/unless
+        # this logic is parallelized, there's no efficient way for the algorithm to know that it's operating on a deck
+        # that's already been seen once before
+        score_tracker[score.id]["name"] = score.name
+    # Then, find biggest movers
+    calculateds = [
+        {
+            "deck_id": deck_id,
+            "name": score_tracker[deck_id]["name"],
+            "start": score_tracker[deck_id]["start_score"],
+            "end": score_tracker[deck_id]["latest_score"],
+            "diff": score_tracker[deck_id]["latest_score"]
+            - score_tracker[deck_id]["start_score"],
+        }
+        for deck_id in score_tracker
+    ]
+    return {
+        "positive": nlargest(number_of_movers, calculateds, key=lambda x: x["diff"]),
+        "negative": nsmallest(number_of_movers, calculateds, key=lambda x: x["diff"]),
+    }
+
+
@html_router.get("/graph")
 def stats_graph(request: Request, db=Depends(get_db)):
    return jinja_templates.TemplateResponse(request, "stats/graph.html")
--- a/app/templates/main.html
+++ b/app/templates/main.html
@ -4,8 +4,58 @@

 {% block head %}
 {{ super() }}
+<!-- https://www.w3schools.com/css/css_tooltip.asp -->
+<style>
+
+h2 {
+    display: inline-block;
+}
+
+.tooltip {
+  position: relative;
+  display: inline-block;
+  border-bottom: 1px dotted black; /* If you want dots under the hoverable text */
+}
+
+/* Tooltip text */
+.tooltip .tooltiptext {
+  visibility: hidden;
+  width: 480px;
+  background-color: black;
+  color: #fff;
+  text-align: center;
+  padding: 5px 0;
+  border-radius: 6px;
+ 
+  /* Position the tooltip text - see examples below! */
+  position: absolute;
+  z-index: 1;
+}
+
+/* Show the tooltip text when you mouse over the tooltip container */
+.tooltip:hover .tooltiptext {
+  visibility: visible;
+}
+
+</style>
 {% endblock %}

 {% block content %}
 <p>Welcome to EDH ELO! Click "Games" above to see the list of Games, or "Record New Game" in the top-right to record a game</p>
+<div>
+    <h2>Biggest recent movers</h2><div class="tooltip">?<span class="tooltiptext">Logic:<br/>Find the date of the latest game<br/>Look back a period of 7 days from that date<br/>Calculate score differential between those two dates for all decks<br/>Rank by that<br/><br/>TODO  - add a dedicated "biggest movers" page under "Stats" where anchor dates and number-of-top-movers can be specified</span></span></div>
+    <h3>Positive</h3>
+    <ol>
+    {% for positive_mover in top_movers['positive'] %}
+    <li><strong>{{ positive_mover['name'] }}</strong> - +{{ positive_mover['diff'] }} ({{ positive_mover['start'] }} -> {{ positive_mover['end'] }})</li>
+    {% endfor %}
+    </ol>
+
+    <h3>Negative</h3>
+    <ol>
+    {% for negative_mover in top_movers['negative'] %}
+    <li><strong>{{ negative_mover['name'] }}</strong> - {{ negative_mover['diff'] }} ({{ negative_mover['start'] }} -> {{ negative_mover['end'] }})</li>
+    {% endfor %}
+    </ol>
+</div>
 {% endblock %}
--- a/test-data/sqlite-database-snapshots/empty_db.db
+++ b/test-data/sqlite-database-snapshots/empty_db.db
--- a/test-data/sqlite-database-snapshots/populated_db.db
+++ b/test-data/sqlite-database-snapshots/populated_db.db
--- a/tests/routers/test_stats.py
+++ b/tests/routers/test_stats.py
@ -0,0 +1,72 @@
+import pathlib
+import pytest
+import random
+import shutil
+import string
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+
+from app.sql.models import Base
+from app.routers.stats import top_movers
+
+# TODO - this is almost a copy-paste of `isolated_database` from `tests/sql/test_crud` -
+# consider unifying and extracting.
+
+
+@pytest.mark.parametrize("isolated_database", [["populated_db.db"]], indirect=True)
+def test_initialization(isolated_database):
+    response = top_movers(db=isolated_database)
+
+    biggest_positive = response["positive"][0]
+    assert biggest_positive["deck_id"] == 74
+    assert biggest_positive["name"] == "Goose Mother"
+    assert float(biggest_positive["diff"]) == 6.758
+
+    biggest_negative = response["negative"][0]
+    assert biggest_negative["deck_id"] == 45
+    assert biggest_negative["name"] == "Ashad"
+    assert float(biggest_negative["diff"]) == -6.542
+
+
+# This fixture expects a parameter representing the filename within `test-data/sqlite-database-snapshots` that should be
+# used to initialize the database.
+# See http://stackoverflow.com/a/33879151
+@pytest.fixture(scope="function")
+def isolated_database(request, cleanups):
+    database_dir = "database"
+    db_dir_path = pathlib.Path(database_dir)
+    if not db_dir_path.exists():
+        db_dir_path.mkdir()
+    db_dir_path.chmod(0o777)
+
+    isolated_db_name = f"isolated_database_{''.join([random.choice(string.ascii_lowercase) for _ in range(5)])}.db"
+    isolated_db_path = db_dir_path.joinpath(isolated_db_name)
+
+    seedPath = pathlib.Path("test-data").joinpath(
+        "sqlite-database-snapshots", request.param[0]
+    )
+    if not seedPath.exists():
+        raise Exception(
+            f"Cannot initialize a database from {seedPath} - does not exist"
+        )
+    shutil.copy(str(seedPath), isolated_db_path.absolute())
+
+    engine = create_engine(f"sqlite:///{isolated_db_path.absolute()}")
+
+    SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+    Base.metadata.create_all(bind=engine)
+
+    def success_cleanup():
+        isolated_db_path.unlink()
+
+    def failure_cleanup():
+        print(
+            f"Isolated database {isolated_db_path.absolute()}, used in test `{request.node.name}` at path `{request.node.path.absolute()}`, has been preserved for debugging"
+        )
+
+    cleanups.add_success(success_cleanup)
+    cleanups.add_failure(failure_cleanup)
+
+    yield SessionLocal()
+    # yield isolated_db_path
--- a/tests/sql/test_crud.py
+++ b/tests/sql/test_crud.py
@ -38,6 +38,7 @@ def _test_create_and_retrieve(db, name: str):
    assert get_player.name == name


+# TODO - there's an almost-duplicate of this in `tests/routers/test_stats` - consider extracting.
@pytest.fixture(scope="function")
 def isolated_database(request, cleanups):
    database_dir = "database"