From 1749dbffc7912b78d0d1c748663b9123c90cd97d Mon Sep 17 00:00:00 2001 From: Paillat Date: Sat, 3 May 2025 18:13:58 +0200 Subject: [PATCH] Refactor code structure and enhance documentation in main modules - Updated `pyproject.toml` to include configuration for `ruff` linting tool. - Improved `download_build.py` by adding a docstring to the `dowload` function. - Refactored `__main__.py` for clarity and consistency in path handling. - Enhanced `extract.py` with detailed docstrings for error classes and functions. --- pyproject.toml | 27 +++++++++++++++++++++++++++ src/__main__.py | 9 ++++----- src/download_build.py | 6 +++++- src/extract.py | 23 +++++++++++------------ 4 files changed, 47 insertions(+), 18 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 25b6a61..a530a0e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,33 @@ dev = [ ] [tool.basedpyright] +pythonVersion = "3.13" typeCheckingMode = "all" reportUnusedCallResult = false reportAny = false + +[tool.ruff] +target-version = "py313" +line-length = 120 +indent-width = 4 + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" +docstring-code-format = false +docstring-code-line-length = "dynamic" + +[tool.ruff.lint] +select = ["ALL"] +extend-ignore = [ + "D203", + "D213", + "COM812", + "EM101", + "TRY003", + "T201", + "D100", + "D400" +] \ No newline at end of file diff --git a/src/__main__.py b/src/__main__.py index 950dce7..dc98d15 100644 --- a/src/__main__.py +++ b/src/__main__.py @@ -1,20 +1,19 @@ # Copyright (c) Paillat-dev # SPDX-License-Identifier: MIT -import os -import pathlib import hashlib import json - +import pathlib import sys from typing import Any -from extract import extract_emojis_from_str from download_build import dowload +from extract import extract_emojis_from_str def main() -> None: - build_path = pathlib.Path(os.getcwd()) / "build" + """Download the latest discord build and extract emojis.""" + build_path = pathlib.Path.cwd() / "build" build_path.mkdir(exist_ok=True) out_path = build_path / "emojis.json" diff --git a/src/download_build.py b/src/download_build.py index 8ec3e22..f7a5330 100644 --- a/src/download_build.py +++ b/src/download_build.py @@ -7,7 +7,11 @@ URL = "https://raw.githubusercontent.com/Discord-Datamining/Discord-Datamining/r def dowload() -> str: + """Download the latest discord build from the datamining repository. + + Returns the content of the file as a string. + """ print("Downloading the latest discord build") - response = requests.get(URL) + response = requests.get(URL, timeout=10) response.raise_for_status() return response.text diff --git a/src/extract.py b/src/extract.py index 51251b4..7274674 100644 --- a/src/extract.py +++ b/src/extract.py @@ -3,11 +3,11 @@ import json import re -from warnings import warn -import json5 -from typing import Any from collections.abc import Mapping, Sequence +from typing import Any +from warnings import warn +import json5 type AnyDict = dict[Any, Any] # pyright: ignore[reportExplicitAny] type AnyList = list[Any] # pyright: ignore[reportExplicitAny] @@ -17,34 +17,32 @@ PATTERN = re.compile(r"""(?<=\(')(\{"emojis".*?\})(?='\))""") class ExtractError(Exception): - pass + """Base class for all extract errors.""" class NotFoundError(ExtractError): - pass + """No matches found in the build.""" class MultipleFoundError(ExtractError): - pass + """Multiple matches found in the build.""" _SUR = re.compile(r"[\uD800-\uDFFF]") def report_surrogates(node: AnyDict | AnyList | AnyTuple | str, path: str = "") -> None: - """ - Recursively walk *node* (dict / list / tuple / str) and print the location - and code-point of every UTF-16 surrogate half it encounters. + r"""Recursively walk *node* (dict / list / tuple / str) and print the location and code-point of every UTF-16 surrogate half it encounters. >>> data = {"a": "OK", "b": ["\\uD83D", {"c": "x\\uDE00y"}]} >>> report_surrogates(data) b[0] : U+D83D b[1].c : U+DE00 - """ + """ # noqa: E501 if isinstance(node, str): for m in _SUR.finditer(node): cp = ord(m.group()) - warn(f"Surrogate found at {path or ''} : U+{cp:04X}") + warn(f"Surrogate found at {path or ''} : U+{cp:04X}", SyntaxWarning, 2) return if isinstance(node, Mapping): @@ -59,12 +57,13 @@ def report_surrogates(node: AnyDict | AnyList | AnyTuple | str, path: str = "") def extract_emojis_from_str(content: str) -> AnyDict: + """Extract emojis from a string containing the discord build.""" print("Searching for emojis...") matches: list[str] = PATTERN.findall(content) if len(matches) == 0: raise NotFoundError("No matches found") - elif len(matches) > 1: + if len(matches) > 1: raise MultipleFoundError("Multiple matches found") match: str = matches[0]