✨ Add testing with cool dataset

2026-03-02 18:44:54 +00:00 · 2025-11-05 16:06:44 +01:00
parent 4b6ed51cfc
commit 4ff6f4841c
5 changed files with 124 additions and 970 deletions
--- a/tests/dataset
+++ b/tests/dataset
--- a/tests/esc_data.json
+++ b/tests/esc_data.json
@@ -1,118 +0,0 @@
-{
-  "2025": {
-    "jury": {
-      "Austria": 258,
-      "Switzerland": 214,
-      "France": 180,
-      "Italy": 159,
-      "Netherlands": 133,
-      "Sweden": 126,
-      "Latvia": 116,
-      "Greece": 105,
-      "Estonia": 98,
-      "United Kingdom": 88,
-      "Finland": 88,
-      "Malta": 83,
-      "Germany": 77,
-      "Ukraine": 60,
-      "Israel": 60,
-      "Albania": 45,
-      "Denmark": 45,
-      "Armenia": 42,
-      "Portugal": 37,
-      "Lithuania": 34,
-      "Spain": 27,
-      "Luxembourg": 23,
-      "Norway": 22,
-      "Poland": 17,
-      "San Marino": 9,
-      "Iceland": 0
-    },
-    "televote": {
-      "Israel": 297,
-      "Estonia": 258,
-      "Sweden": 195,
-      "Austria": 178,
-      "Albania": 173,
-      "Ukraine": 158,
-      "Poland": 139,
-      "Greece": 126,
-      "Finland": 108,
-      "Italy": 97,
-      "Germany": 74,
-      "Norway": 67,
-      "Lithuania": 62,
-      "France": 50,
-      "Latvia": 42,
-      "Netherlands": 42,
-      "Iceland": 33,
-      "Armenia": 30,
-      "Luxembourg": 24,
-      "San Marino": 18,
-      "Portugal": 13,
-      "Spain": 10,
-      "Malta": 8,
-      "Denmark": 2,
-      "United Kingdom": 0,
-      "Switzerland": 0
-    },
-    "winner": "Austria"
-  },
-  "2024": {
-    "jury": {
-      "Switzerland": 365,
-      "France": 218,
-      "Croatia": 210,
-      "Italy": 164,
-      "Ukraine": 146,
-      "Ireland": 142,
-      "Portugal": 139,
-      "Sweden": 125,
-      "Armenia": 101,
-      "Germany": 99,
-      "Luxembourg": 83,
-      "Israel": 52,
-      "United Kingdom": 46,
-      "Greece": 41,
-      "Latvia": 36,
-      "Cyprus": 34,
-      "Lithuania": 32,
-      "Serbia": 22,
-      "Spain": 19,
-      "Austria": 19,
-      "Georgia": 15,
-      "Slovenia": 15,
-      "Norway": 12,
-      "Finland": 7,
-      "Estonia": 4
-    },
-    "televote": {
-      "Croatia": 337,
-      "Israel": 323,
-      "Ukraine": 307,
-      "France": 227,
-      "Switzerland": 226,
-      "Ireland": 136,
-      "Italy": 104,
-      "Greece": 85,
-      "Armenia": 82,
-      "Lithuania": 58,
-      "Sweden": 49,
-      "Cyprus": 44,
-      "Estonia": 33,
-      "Serbia": 32,
-      "Finland": 31,
-      "Latvia": 28,
-      "Luxembourg": 20,
-      "Georgia": 19,
-      "Germany": 18,
-      "Portugal": 13,
-      "Slovenia": 12,
-      "Spain": 11,
-      "Austria": 5,
-      "Norway": 4,
-      "United Kingdom": 0
-    },
-    "winner": "Switzerland"
-  }
-}
--- a/tests/test_esc.py
+++ b/tests/test_esc.py
@@ -3,21 +3,24 @@

 import os
 import sys
+from pathlib import Path
+from typing import TypedDict

 import orjson

 sys.path.append(os.path.join(os.path.dirname(__file__), ".."))  # noqa: PTH120, PTH118

 import tempfile
-from pathlib import Path
-from typing import TypedDict

 import pytest
 from typer.testing import CliRunner

 from src.__main__ import app

-data_path = Path(__file__).parent / "esc_data.json"
+DATASET_BASE = Path(__file__).parent / "dataset" / "data"
+DATASET_PATH = DATASET_BASE / "senior"
+
+YEARS_TO_TEST = list(range(2016, 2026))


 class ESCData(TypedDict):
@@ -26,12 +29,116 @@ class ESCData(TypedDict):
    jury: dict[str, int]
    televote: dict[str, int]
    winner: str
+    participating_countries: int
+
+with open(DATASET_BASE / "countries.json", "rb") as f:
+    COUNTRY_NAMES: dict[str, str] = orjson.loads(f.read())
+
+def get_country_name(country_code: str) -> str:
+    """Convert country code to full country name."""
+
+    return COUNTRY_NAMES.get(country_code.upper(), country_code)


-with data_path.open("r", encoding="utf-8") as f:
-    data = orjson.loads(f.read())
+def get_country_mapping(year: int) -> dict[int, str]:
+    """
+    Map contestant IDs to country codes by reading the contestants directory.
+    Returns a dict: {contestant_id: country_code}
+    """
+    contestants_dir = DATASET_PATH / str(year) / "contestants"
+    country_mapping: dict[int, str] = {}

-ESC_DATA: dict[int, "ESCData"] = {int(year): value for year, value in data.items()}
+    if not contestants_dir.exists():
+        return country_mapping
+
+    for item in contestants_dir.iterdir():
+        if item.is_dir():
+            # Directory name format: N_XX where N is contestant ID and XX is country code
+            dir_name = item.name
+            if "_" in dir_name:
+                contestant_id_str, country_code = dir_name.split("_", 1)
+                try:
+                    contestant_id = int(contestant_id_str)
+                    country_mapping[contestant_id] = country_code.upper()
+                except ValueError:
+                    pass
+
+    return country_mapping
+
+
+def count_participating_countries(year: int) -> int:
+    """Count the number of directories in the contestants folder."""
+    contestants_dir = DATASET_PATH / str(year) / "contestants"
+    if not contestants_dir.exists():
+        return 0
+    return sum(1 for item in contestants_dir.iterdir() if item.is_dir())
+
+
+def parse_year_data(year: int) -> ESCData:
+    """Parse the data for a single year from the dataset."""
+    final_json_path = DATASET_PATH / str(year) / "rounds" / "final.json"
+
+    if not final_json_path.exists():
+        pytest.skip(f"Data not found for year {year}")
+
+    # Load the final.json data
+    with final_json_path.open("rb") as f:
+        data = orjson.loads(f.read())
+
+    country_mapping = get_country_mapping(year)
+
+    # Count participating countries
+    participating_countries = count_participating_countries(year)
+
+    jury_scores = {}
+    televote_scores = {}
+    winner = None
+
+    performances = data.get("performances", [])
+
+    for performance in performances:
+        contestant_id = performance.get("contestantId")
+        place = performance.get("place")
+
+        country_code = country_mapping.get(contestant_id)
+        if not country_code:
+            continue
+
+        country_name = get_country_name(country_code)
+
+        if place == 1:
+            winner = country_name
+
+        # Extract scores
+        scores = performance.get("scores", [])
+        for score in scores:
+            score_name = score.get("name")
+            points = score.get("points", 0)
+
+            if score_name == "jury":
+                jury_scores[country_name] = points
+            elif score_name == "public":
+                televote_scores[country_name] = points
+
+    # Sort by points (descending)
+    jury_scores = dict(sorted(jury_scores.items(), key=lambda x: x[1], reverse=True))
+    televote_scores = dict(sorted(televote_scores.items(), key=lambda x: x[1], reverse=True))
+
+    return {
+        "jury": jury_scores,
+        "televote": televote_scores,
+        "winner": winner if winner else "Unknown",
+        "participating_countries": participating_countries,
+    }
+
+
+# Parse all years data
+ESC_DATA: dict[int, ESCData] = {}
+for year in YEARS_TO_TEST:
+    try:
+        ESC_DATA[year] = parse_year_data(year)
+    except Exception as e:
+        print(f"Warning: Could not parse data for year {year}: {e}")  # noqa: T201


 TADA = "🎉"
@@ -43,6 +150,7 @@ def test_esc_grand_final(year: int, data: ESCData) -> None:
    jury_scores: dict[str, int] = data["jury"]
    televote_scores: dict[str, int] = data["televote"]
    expected_winner: str = data["winner"]
+    participating_countries: int = data["participating_countries"]

    with tempfile.NamedTemporaryFile("w", delete=False, encoding="utf-8") as f:
        for country, score in jury_scores.items():
@@ -54,11 +162,15 @@ def test_esc_grand_final(year: int, data: ESCData) -> None:
    inputs.append("y")  # to confirm the winner

    runner = CliRunner()
-    result = runner.invoke(app, ["--jury-path", f.name], input="\n".join(inputs))
+    result = runner.invoke(
+        app,
+        ["--jury-path", f.name, "--participating-countries", str(participating_countries)],
+        input="\n".join(inputs),
+    )

    try:
        actual = result.output.split(TADA)[1].strip().split()[0]
    except Exception:
        pytest.fail(f"Could not parse winner from output:\n{result.output}", pytrace=False)

-    assert actual == expected_winner, f"For {year}, expected winner {expected_winner} but got {actual!r}"
+    assert actual == expected_winner, f"For {year}, expected winner {expected_winner} but got {actual!r}"