Add testing with cool dataset

This commit is contained in:
2025-11-05 16:06:44 +01:00
parent 4b6ed51cfc
commit 4ff6f4841c
5 changed files with 124 additions and 970 deletions

View File

@@ -3,21 +3,24 @@
import os
import sys
from pathlib import Path
from typing import TypedDict
import orjson
sys.path.append(os.path.join(os.path.dirname(__file__), "..")) # noqa: PTH120, PTH118
import tempfile
from pathlib import Path
from typing import TypedDict
import pytest
from typer.testing import CliRunner
from src.__main__ import app
data_path = Path(__file__).parent / "esc_data.json"
DATASET_BASE = Path(__file__).parent / "dataset" / "data"
DATASET_PATH = DATASET_BASE / "senior"
YEARS_TO_TEST = list(range(2016, 2026))
class ESCData(TypedDict):
@@ -26,12 +29,116 @@ class ESCData(TypedDict):
jury: dict[str, int]
televote: dict[str, int]
winner: str
participating_countries: int
with open(DATASET_BASE / "countries.json", "rb") as f:
COUNTRY_NAMES: dict[str, str] = orjson.loads(f.read())
def get_country_name(country_code: str) -> str:
"""Convert country code to full country name."""
return COUNTRY_NAMES.get(country_code.upper(), country_code)
with data_path.open("r", encoding="utf-8") as f:
data = orjson.loads(f.read())
def get_country_mapping(year: int) -> dict[int, str]:
"""
Map contestant IDs to country codes by reading the contestants directory.
Returns a dict: {contestant_id: country_code}
"""
contestants_dir = DATASET_PATH / str(year) / "contestants"
country_mapping: dict[int, str] = {}
ESC_DATA: dict[int, "ESCData"] = {int(year): value for year, value in data.items()}
if not contestants_dir.exists():
return country_mapping
for item in contestants_dir.iterdir():
if item.is_dir():
# Directory name format: N_XX where N is contestant ID and XX is country code
dir_name = item.name
if "_" in dir_name:
contestant_id_str, country_code = dir_name.split("_", 1)
try:
contestant_id = int(contestant_id_str)
country_mapping[contestant_id] = country_code.upper()
except ValueError:
pass
return country_mapping
def count_participating_countries(year: int) -> int:
"""Count the number of directories in the contestants folder."""
contestants_dir = DATASET_PATH / str(year) / "contestants"
if not contestants_dir.exists():
return 0
return sum(1 for item in contestants_dir.iterdir() if item.is_dir())
def parse_year_data(year: int) -> ESCData:
"""Parse the data for a single year from the dataset."""
final_json_path = DATASET_PATH / str(year) / "rounds" / "final.json"
if not final_json_path.exists():
pytest.skip(f"Data not found for year {year}")
# Load the final.json data
with final_json_path.open("rb") as f:
data = orjson.loads(f.read())
country_mapping = get_country_mapping(year)
# Count participating countries
participating_countries = count_participating_countries(year)
jury_scores = {}
televote_scores = {}
winner = None
performances = data.get("performances", [])
for performance in performances:
contestant_id = performance.get("contestantId")
place = performance.get("place")
country_code = country_mapping.get(contestant_id)
if not country_code:
continue
country_name = get_country_name(country_code)
if place == 1:
winner = country_name
# Extract scores
scores = performance.get("scores", [])
for score in scores:
score_name = score.get("name")
points = score.get("points", 0)
if score_name == "jury":
jury_scores[country_name] = points
elif score_name == "public":
televote_scores[country_name] = points
# Sort by points (descending)
jury_scores = dict(sorted(jury_scores.items(), key=lambda x: x[1], reverse=True))
televote_scores = dict(sorted(televote_scores.items(), key=lambda x: x[1], reverse=True))
return {
"jury": jury_scores,
"televote": televote_scores,
"winner": winner if winner else "Unknown",
"participating_countries": participating_countries,
}
# Parse all years data
ESC_DATA: dict[int, ESCData] = {}
for year in YEARS_TO_TEST:
try:
ESC_DATA[year] = parse_year_data(year)
except Exception as e:
print(f"Warning: Could not parse data for year {year}: {e}") # noqa: T201
TADA = "🎉"
@@ -43,6 +150,7 @@ def test_esc_grand_final(year: int, data: ESCData) -> None:
jury_scores: dict[str, int] = data["jury"]
televote_scores: dict[str, int] = data["televote"]
expected_winner: str = data["winner"]
participating_countries: int = data["participating_countries"]
with tempfile.NamedTemporaryFile("w", delete=False, encoding="utf-8") as f:
for country, score in jury_scores.items():
@@ -54,11 +162,15 @@ def test_esc_grand_final(year: int, data: ESCData) -> None:
inputs.append("y") # to confirm the winner
runner = CliRunner()
result = runner.invoke(app, ["--jury-path", f.name], input="\n".join(inputs))
result = runner.invoke(
app,
["--jury-path", f.name, "--participating-countries", str(participating_countries)],
input="\n".join(inputs),
)
try:
actual = result.output.split(TADA)[1].strip().split()[0]
except Exception:
pytest.fail(f"Could not parse winner from output:\n{result.output}", pytrace=False)
assert actual == expected_winner, f"For {year}, expected winner {expected_winner} but got {actual!r}"
assert actual == expected_winner, f"For {year}, expected winner {expected_winner} but got {actual!r}"