diff --git a/README.md b/README.md index 2d13401..9078bbb 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ A Python tool that automatically fetches and extracts the latest emoji data from ## Overview -This project automatically downloads the latest Discord build from the [Discord-Datamining](https://github.com/Discord-Datamining/Discord-Datamining) repository, extracts emoji data, and saves it in a structured JSON format. It runs as a GitHub Actions workflow twice a week to keep the emoji data up-to-date without manual intervention. +This project automatically downloads the latest Discord build from the [Discord-Datamining](https://github.com/Discord-Datamining/Discord-Datamining) repository, extracts emoji data, and saves it in a structured JSON format. It runs as a GitHub Actions workflow twice a week and opens a pull-request to keep the emoji data up-to-date, without manual intervention. ## How It Works @@ -13,7 +13,7 @@ This project automatically downloads the latest Discord build from the [Discord- - Extracts emoji information - Saves data in a standardized JSON format - Tracks changes using hash comparison to avoid unnecessary updates -- Detects and reports UTF-16 surrogate pairs +- Detects and reports unhandled UTF-16 surrogate pairs ## Technical Details @@ -21,26 +21,75 @@ The project uses: - Python 3.13+ - Dependencies: - json5 - - orjson - requests ## Output The emoji data is saved in `build/emojis.json` in the following format: + ```json { "emojis": [ { - "name": "emoji_name", - "id": "emoji_id", - ... + "names": [ + "grinning", + "grinning_face" + ], + "surrogates": "😀", + "unicodeVersion": 6.1, + "spriteIndex": 0 }, - ... - ] + // More emoji entries... + ], + "emojisByCategory": { + "people": [ + 0, + 509 + ], + // More categories... + }, + "nameToEmoji": { + "100": 1410, + "1234": 1488, + "grinning": 0, + // More name mappings... + }, + "surrogateToEmoji": { + "😀": 0, + "😃": 1, + "😄": 2, + // More surrogate mappings... + }, + "numDiversitySprites": 310, + "numNonDiversitySprites": 1614 } ``` -The main emojis.json file in the root directory is the updated version that consumers can access via GitHub raw URLs or by cloning this repository. +### Format Explanation + +- **emojis**: Array of emoji objects containing: + - **names**: Array of names/aliases for the emoji + - **surrogates**: Unicode representation of the emoji + - **unicodeVersion**: Version where the emoji was introduced + - **spriteIndex**: Index in Discord's sprite sheet + +- **emojisByCategory**: Object mapping category names to arrays of starting and ending indices in the emoji array + +- **nameToEmoji**: Mapping of emoji names to their index in the emoji array (used for quick lookups) + +- **surrogateToEmoji**: Mapping of emoji unicode characters to their index in the emoji array (used for quick lookups) + +- **numDiversitySprites**: Number of skin tone modifier sprites available (e.g., different skin tones for hand gestures) + +- **numNonDiversitySprites**: Number of standard emoji sprites that don't have skin tone modifiers + +## Easy Access + +The easiest way to access the emojis data is via the direct raw GitHub URL: + +``` +https://raw.githubusercontent.com/Paillat-dev/discord-emojis/refs/heads/master/build/emojis.json +``` ## Development diff --git a/emojis.json b/emojis.json deleted file mode 100644 index e69de29..0000000 diff --git a/pyproject.toml b/pyproject.toml index d301394..c1fa15d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,6 @@ version = "0.1.0" requires-python = ">=3.13" dependencies = [ "json5>=0.12.0", - "orjson>=3.10.18", "requests>=2.32.3", ] diff --git a/uv.lock b/uv.lock index d59c17d..d8a11aa 100644 --- a/uv.lock +++ b/uv.lock @@ -39,7 +39,6 @@ version = "0.1.0" source = { virtual = "." } dependencies = [ { name = "json5" }, - { name = "orjson" }, { name = "requests" }, ] @@ -51,7 +50,6 @@ dev = [ [package.metadata] requires-dist = [ { name = "json5", specifier = ">=0.12.0" }, - { name = "orjson", specifier = ">=3.10.18" }, { name = "requests", specifier = ">=2.32.3" }, ] @@ -76,29 +74,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/41/9f/3500910d5a98549e3098807493851eeef2b89cdd3032227558a104dfe926/json5-0.12.0-py3-none-any.whl", hash = "sha256:6d37aa6c08b0609f16e1ec5ff94697e2cbbfbad5ac112afa05794da9ab7810db", size = 36079 }, ] -[[package]] -name = "orjson" -version = "3.10.18" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/81/0b/fea456a3ffe74e70ba30e01ec183a9b26bec4d497f61dcfce1b601059c60/orjson-3.10.18.tar.gz", hash = "sha256:e8da3947d92123eda795b68228cafe2724815621fe35e8e320a9e9593a4bcd53", size = 5422810 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/f0/8aedb6574b68096f3be8f74c0b56d36fd94bcf47e6c7ed47a7bd1474aaa8/orjson-3.10.18-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:69c34b9441b863175cc6a01f2935de994025e773f814412030f269da4f7be147", size = 249087 }, - { url = "https://files.pythonhosted.org/packages/bc/f7/7118f965541aeac6844fcb18d6988e111ac0d349c9b80cda53583e758908/orjson-3.10.18-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:1ebeda919725f9dbdb269f59bc94f861afbe2a27dce5608cdba2d92772364d1c", size = 133273 }, - { url = "https://files.pythonhosted.org/packages/fb/d9/839637cc06eaf528dd8127b36004247bf56e064501f68df9ee6fd56a88ee/orjson-3.10.18-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5adf5f4eed520a4959d29ea80192fa626ab9a20b2ea13f8f6dc58644f6927103", size = 136779 }, - { url = "https://files.pythonhosted.org/packages/2b/6d/f226ecfef31a1f0e7d6bf9a31a0bbaf384c7cbe3fce49cc9c2acc51f902a/orjson-3.10.18-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7592bb48a214e18cd670974f289520f12b7aed1fa0b2e2616b8ed9e069e08595", size = 132811 }, - { url = "https://files.pythonhosted.org/packages/73/2d/371513d04143c85b681cf8f3bce743656eb5b640cb1f461dad750ac4b4d4/orjson-3.10.18-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f872bef9f042734110642b7a11937440797ace8c87527de25e0c53558b579ccc", size = 137018 }, - { url = "https://files.pythonhosted.org/packages/69/cb/a4d37a30507b7a59bdc484e4a3253c8141bf756d4e13fcc1da760a0b00cb/orjson-3.10.18-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0315317601149c244cb3ecef246ef5861a64824ccbcb8018d32c66a60a84ffbc", size = 138368 }, - { url = "https://files.pythonhosted.org/packages/1e/ae/cd10883c48d912d216d541eb3db8b2433415fde67f620afe6f311f5cd2ca/orjson-3.10.18-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0da26957e77e9e55a6c2ce2e7182a36a6f6b180ab7189315cb0995ec362e049", size = 142840 }, - { url = "https://files.pythonhosted.org/packages/6d/4c/2bda09855c6b5f2c055034c9eda1529967b042ff8d81a05005115c4e6772/orjson-3.10.18-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb70d489bc79b7519e5803e2cc4c72343c9dc1154258adf2f8925d0b60da7c58", size = 133135 }, - { url = "https://files.pythonhosted.org/packages/13/4a/35971fd809a8896731930a80dfff0b8ff48eeb5d8b57bb4d0d525160017f/orjson-3.10.18-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e9e86a6af31b92299b00736c89caf63816f70a4001e750bda179e15564d7a034", size = 134810 }, - { url = "https://files.pythonhosted.org/packages/99/70/0fa9e6310cda98365629182486ff37a1c6578e34c33992df271a476ea1cd/orjson-3.10.18-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:c382a5c0b5931a5fc5405053d36c1ce3fd561694738626c77ae0b1dfc0242ca1", size = 413491 }, - { url = "https://files.pythonhosted.org/packages/32/cb/990a0e88498babddb74fb97855ae4fbd22a82960e9b06eab5775cac435da/orjson-3.10.18-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8e4b2ae732431127171b875cb2668f883e1234711d3c147ffd69fe5be51a8012", size = 153277 }, - { url = "https://files.pythonhosted.org/packages/92/44/473248c3305bf782a384ed50dd8bc2d3cde1543d107138fd99b707480ca1/orjson-3.10.18-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2d808e34ddb24fc29a4d4041dcfafbae13e129c93509b847b14432717d94b44f", size = 137367 }, - { url = "https://files.pythonhosted.org/packages/ad/fd/7f1d3edd4ffcd944a6a40e9f88af2197b619c931ac4d3cfba4798d4d3815/orjson-3.10.18-cp313-cp313-win32.whl", hash = "sha256:ad8eacbb5d904d5591f27dee4031e2c1db43d559edb8f91778efd642d70e6bea", size = 142687 }, - { url = "https://files.pythonhosted.org/packages/4b/03/c75c6ad46be41c16f4cfe0352a2d1450546f3c09ad2c9d341110cd87b025/orjson-3.10.18-cp313-cp313-win_amd64.whl", hash = "sha256:aed411bcb68bf62e85588f2a7e03a6082cc42e5a2796e06e72a962d7c6310b52", size = 134794 }, - { url = "https://files.pythonhosted.org/packages/c2/28/f53038a5a72cc4fd0b56c1eafb4ef64aec9685460d5ac34de98ca78b6e29/orjson-3.10.18-cp313-cp313-win_arm64.whl", hash = "sha256:f54c1385a0e6aba2f15a40d703b858bedad36ded0491e55d35d905b2c34a4cc3", size = 131186 }, -] - [[package]] name = "requests" version = "2.32.3"