mirror of
https://github.com/Paillat-dev/discord-emojis.git
synced 2026-01-02 00:56:19 +00:00
First commit
This commit is contained in:
47
src/__main__.py
Normal file
47
src/__main__.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import os
|
||||
import pathlib
|
||||
import hashlib
|
||||
import json
|
||||
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
from extract import extract_emojis_from_str
|
||||
from download_build import dowload
|
||||
|
||||
|
||||
def main() -> None:
|
||||
build_path = pathlib.Path(os.getcwd()) / "build"
|
||||
build_path.mkdir(exist_ok=True)
|
||||
|
||||
out_path = build_path / "emojis.json"
|
||||
hash_path = build_path / "hash.txt"
|
||||
|
||||
if not out_path.exists():
|
||||
out_path.touch()
|
||||
|
||||
if hash_path.exists():
|
||||
with hash_path.open("r", encoding="utf-8") as hash_file:
|
||||
current_hash = hash_file.read()
|
||||
else:
|
||||
current_hash = ""
|
||||
|
||||
new: dict[Any, Any] = extract_emojis_from_str(dowload()) # pyright: ignore[reportExplicitAny]
|
||||
new_dump = json.dumps(new, indent=4, ensure_ascii=False).encode("utf-8")
|
||||
new_hash = hashlib.sha256(string=new_dump).hexdigest()
|
||||
|
||||
if current_hash == new_hash:
|
||||
print("No changes")
|
||||
sys.exit(2) # No changes
|
||||
|
||||
with out_path.open("wb") as out_file:
|
||||
out_file.write(new_dump)
|
||||
|
||||
with hash_path.open("w", encoding="utf-8") as hash_file:
|
||||
hash_file.write(new_hash)
|
||||
|
||||
print("Updated emojis.json")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
10
src/download_build.py
Normal file
10
src/download_build.py
Normal file
@@ -0,0 +1,10 @@
|
||||
import requests
|
||||
|
||||
URL = "https://raw.githubusercontent.com/Discord-Datamining/Discord-Datamining/refs/heads/master/current.js"
|
||||
|
||||
|
||||
def dowload() -> str:
|
||||
print("Downloading the latest discord build")
|
||||
response = requests.get(URL)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
78
src/extract.py
Normal file
78
src/extract.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import json
|
||||
import re
|
||||
from warnings import warn
|
||||
import json5
|
||||
from typing import Any
|
||||
from collections.abc import Mapping, Sequence
|
||||
|
||||
|
||||
type AnyDict = dict[Any, Any] # pyright: ignore[reportExplicitAny]
|
||||
type AnyList = list[Any] # pyright: ignore[reportExplicitAny]
|
||||
type AnyTuple = tuple[Any, ...] # pyright: ignore[reportExplicitAny]
|
||||
|
||||
PATTERN = re.compile(r"""(?<=\(')(\{"emojis".*?\})(?='\))""")
|
||||
|
||||
|
||||
class ExtractError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class NotFoundError(ExtractError):
|
||||
pass
|
||||
|
||||
|
||||
class MultipleFoundError(ExtractError):
|
||||
pass
|
||||
|
||||
|
||||
_SUR = re.compile(r"[\uD800-\uDFFF]")
|
||||
|
||||
|
||||
def report_surrogates(node: AnyDict | AnyList | AnyTuple | str, path: str = "") -> None:
|
||||
"""
|
||||
Recursively walk *node* (dict / list / tuple / str) and print the location
|
||||
and code-point of every UTF-16 surrogate half it encounters.
|
||||
|
||||
>>> data = {"a": "OK", "b": ["\\uD83D", {"c": "x\\uDE00y"}]}
|
||||
>>> report_surrogates(data)
|
||||
b[0] : U+D83D
|
||||
b[1].c : U+DE00
|
||||
"""
|
||||
if isinstance(node, str):
|
||||
for m in _SUR.finditer(node):
|
||||
cp = ord(m.group())
|
||||
warn(f"Surrogate found at {path or '<root>'} : U+{cp:04X}")
|
||||
return
|
||||
|
||||
if isinstance(node, Mapping):
|
||||
for k, v in node.items():
|
||||
sub = f"{path}.{k}" if path else str(k)
|
||||
report_surrogates(v, sub)
|
||||
return
|
||||
|
||||
if isinstance(node, Sequence) and not isinstance(node, (str, bytes, bytearray)): # pyright: ignore[reportUnnecessaryIsInstance]
|
||||
for i, v in enumerate(node):
|
||||
report_surrogates(v, f"{path}[{i}]")
|
||||
|
||||
|
||||
def extract_emojis_from_str(content: str) -> AnyDict:
|
||||
print("Searching for emojis...")
|
||||
matches: list[str] = PATTERN.findall(content)
|
||||
|
||||
if len(matches) == 0:
|
||||
raise NotFoundError("No matches found")
|
||||
elif len(matches) > 1:
|
||||
raise MultipleFoundError("Multiple matches found")
|
||||
|
||||
match: str = matches[0]
|
||||
|
||||
print("Found emojis!")
|
||||
print("Parsing json")
|
||||
|
||||
# First load with json5 to handle \x escapes, then dump and reload with json to collapse
|
||||
# the surrogate pairs into a single code point. This is necessary because json5 doesn't
|
||||
# support surrogate pairs, and json doesn't support \x escapes.
|
||||
r = json.loads(json.dumps(json5.loads(match)))
|
||||
report_surrogates(r)
|
||||
|
||||
return r
|
||||
Reference in New Issue
Block a user