Refactor code structure and enhance documentation in main modules

- Updated `pyproject.toml` to include configuration for `ruff` linting tool. - Improved `download_build.py` by adding a docstring to the `dowload` function. - Refactored `__main__.py` for clarity and consistency in path handling. - Enhanced `extract.py` with detailed docstrings for error classes and functions.
2026-03-02 22:14:54 +00:00 · 2025-05-03 18:13:58 +02:00
parent d6df9c76d0
commit 1749dbffc7
4 changed files with 47 additions and 18 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,6 +14,33 @@ dev = [
 ]
 [tool.basedpyright]
 pythonVersion = "3.13"
 typeCheckingMode = "all"
 reportUnusedCallResult = false
 reportAny = false
 [tool.ruff]
 target-version = "py313"
 line-length = 120
 indent-width = 4
 [tool.ruff.format]
 quote-style = "double"
 indent-style = "space"
 skip-magic-trailing-comma = false
 line-ending = "auto"
 docstring-code-format = false
 docstring-code-line-length = "dynamic"
 [tool.ruff.lint]
 select = ["ALL"]
 extend-ignore = [
    "D203",
    "D213",
    "COM812",
    "EM101",
    "TRY003",
    "T201",
    "D100",
    "D400"
 ]
--- a/src/main.py
+++ b/src/main.py
@@ -1,20 +1,19 @@
 # Copyright (c) Paillat-dev
 # SPDX-License-Identifier: MIT
 import os
 import pathlib
 import hashlib
 import json
-
+import pathlib
 import sys
 from typing import Any
 from extract import extract_emojis_from_str
 from download_build import dowload
 from extract import extract_emojis_from_str
 def main() -> None:
-    build_path = pathlib.Path(os.getcwd()) / "build"
+    """Download the latest discord build and extract emojis."""
    build_path = pathlib.Path.cwd() / "build"
    build_path.mkdir(exist_ok=True)
    out_path = build_path / "emojis.json"
--- a/src/download_build.py
+++ b/src/download_build.py
@@ -7,7 +7,11 @@ URL = "https://raw.githubusercontent.com/Discord-Datamining/Discord-Datamining/r
 def dowload() -> str:
    """Download the latest discord build from the datamining repository.
    Returns the content of the file as a string.
    """
    print("Downloading the latest discord build")
-    response = requests.get(URL)
+    response = requests.get(URL, timeout=10)
    response.raise_for_status()
    return response.text
--- a/src/extract.py
+++ b/src/extract.py
@@ -3,11 +3,11 @@
 import json
 import re
 from warnings import warn
 import json5
 from typing import Any
 from collections.abc import Mapping, Sequence
 from typing import Any
 from warnings import warn
 import json5
 type AnyDict = dict[Any, Any]  # pyright: ignore[reportExplicitAny]
 type AnyList = list[Any]  # pyright: ignore[reportExplicitAny]
@@ -17,34 +17,32 @@ PATTERN = re.compile(r"""(?<=\(')(\{"emojis".*?\})(?='\))""")
 class ExtractError(Exception):
-    pass
+    """Base class for all extract errors."""
 class NotFoundError(ExtractError):
-    pass
+    """No matches found in the build."""
 class MultipleFoundError(ExtractError):
-    pass
+    """Multiple matches found in the build."""
 _SUR = re.compile(r"[\uD800-\uDFFF]")
 def report_surrogates(node: AnyDict | AnyList | AnyTuple | str, path: str = "") -> None:
-    """
+    r"""Recursively walk *node* (dict / list / tuple / str) and print the location and code-point of every UTF-16 surrogate half it encounters.
    Recursively walk *node* (dict / list / tuple / str) and print the location
    and code-point of every UTF-16 surrogate half it encounters.
    >>> data = {"a": "OK", "b": ["\\uD83D", {"c": "x\\uDE00y"}]}
    >>> report_surrogates(data)
    b[0] : U+D83D
    b[1].c : U+DE00
-    """
+    """  # noqa: E501
    if isinstance(node, str):
        for m in _SUR.finditer(node):
            cp = ord(m.group())
-            warn(f"Surrogate found at {path or '<root>'} : U+{cp:04X}")
+            warn(f"Surrogate found at {path or '<root>'} : U+{cp:04X}", SyntaxWarning, 2)
        return
    if isinstance(node, Mapping):
@@ -59,12 +57,13 @@ def report_surrogates(node: AnyDict | AnyList | AnyTuple | str, path: str = "")
 def extract_emojis_from_str(content: str) -> AnyDict:
    """Extract emojis from a string containing the discord build."""
    print("Searching for emojis...")
    matches: list[str] = PATTERN.findall(content)
    if len(matches) == 0:
        raise NotFoundError("No matches found")
-    elif len(matches) > 1:
+    if len(matches) > 1:
        raise MultipleFoundError("Multiple matches found")
    match: str = matches[0]