Implement demojize function for bidirectional emoji conversion (#16)

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
2025-05-29 13:38:56 +02:00
committed by GitHub
parent a1d2592ee1
commit ad98dd9a58
3 changed files with 82 additions and 6 deletions

View File

@@ -12,7 +12,8 @@
<!-- end badges -->
A Python library for converting Discord emoji names to their Unicode equivalents.
A Python library for converting Discord emoji names to their Unicode equivalents and
vice versa.
</div>
@@ -31,9 +32,9 @@ A Python library for converting Discord emoji names to their Unicode equivalents
## Overview
Dismoji is a lightweight Python library that provides a simple way to convert Discord
emoji names to their Unicode equivalents. With just a single function call, you can
transform text containing Discord-style emoji codes (like `:smile:`) into text with
actual Unicode emoji characters (like "😄").
emoji names to their Unicode equivalents and vice versa. With just two function calls,
you can transform text containing Discord-style emoji codes (like `:smile:`) into text
with actual Unicode emoji characters (like "😄") and back again.
This library uses
[Paillat-dev/discord-emojis](https://github.com/Paillat-dev/discord-emojis) as the
@@ -56,16 +57,23 @@ import dismoji
text = "Hello, :wave: I'm excited! :partying_face:"
converted_text = dismoji.emojize(text)
print(converted_text) # Output: "Hello, 👋 I'm excited! 🥳"
# Convert Unicode emojis back to Discord emoji names
emoji_text = "Hello, 👋 I'm excited! 🥳"
named_text = dismoji.demojize(emoji_text)
print(named_text) # Output: "Hello, :wave: I'm excited! :partying_face:"
```
## Features
- **Simple API**: Just one function to remember - `dismoji.emojize()`
- **Simple API**: Just two functions to remember - `dismoji.emojize()` and
`dismoji.demojize()`
- **Discord Compatible**: Supports Discord's emoji naming conventions
- **Comprehensive**: Includes all standard emojis available on Discord
- **Type Safe**: Fully type-annotated for better IDE integration
- **Zero Dependencies**: Lightweight with no external dependencies
- **Fast**: Optimized for quick emoji replacement
- **Bidirectional**: Convert between emoji names and characters in both directions
## Getting Help

View File

@@ -14,10 +14,21 @@ with EMOJIS_PATH.open("r", encoding="utf-8") as f:
EMOJI_MAPPING: dict[str, str] = {k: EMOJIS["emojis"][v]["surrogates"] for k, v in EMOJIS["nameToEmoji"].items()}
# Create a reverse mapping for demojizing (emoji to name)
REVERSE_EMOJI_MAPPING: dict[str, str] = {}
for emoji_index_str, emoji_index in sorted(EMOJIS["surrogateToEmoji"].items(), key=lambda x: len(x[0]), reverse=True):
# Get the first name in the list as the preferred name
e = EMOJIS["emojis"][emoji_index]
# If it has multiple diversity parents, use the last name because it is the most specific one
# e.g. :handshake_light_skin_tone_dark_skin_tone: vs :handshake_tone1_tone5:
REVERSE_EMOJI_MAPPING[emoji_index_str] = e["names"][-1 if e.get("hasMultiDiversityParent") else 0]
del EMOJIS # Clean up to save memory
EMOJI_PATTERN = re.compile(r":([a-zA-Z0-9_-]+):")
EMOJI_CHARS_PATTERN = re.compile("|".join(map(re.escape, REVERSE_EMOJI_MAPPING.keys())))
def emojize(s: str) -> str:
"""Convert a string with emoji names to a string with emoji characters.
@@ -37,3 +48,21 @@ def emojize(s: str) -> str:
return match.group(0)
return EMOJI_PATTERN.sub(replace, s)
def demojize(s: str) -> str:
"""Convert a string with emoji characters to a string with emoji names.
Args:
s (str): The input string containing emoji characters.
Returns:
str: The input string with emoji characters replaced by emoji names.
"""
def replace(match: re.Match[str]) -> str:
emoji = match.group(0)
return f":{REVERSE_EMOJI_MAPPING[emoji]}:"
return EMOJI_CHARS_PATTERN.sub(replace, s)

View File

@@ -1,7 +1,7 @@
# Copyright (c) Paillat-dev
# SPDX-License-Identifier: MIT
from dismoji import emojize
from dismoji import REVERSE_EMOJI_MAPPING, demojize, emojize
def test_basic() -> None:
@@ -68,3 +68,42 @@ def test_emoji_with_special_characters() -> None:
]
for input_str, expected_output in special_char_tests:
assert emojize(input_str) == expected_output
def test_demojize_basic() -> None:
"""Test basic functionality of demojize function."""
assert demojize("Hello 😄") == "Hello :smile:"
def test_demojize_no_match() -> None:
"""Test demojize function with no matches."""
assert demojize("Hello world") == "Hello world"
def test_demojize_multiple_emojis() -> None:
"""Test demojize function with multiple emojis."""
assert demojize("😄 👋") == ":smile: :wave:"
def test_demojize_complex_sentence() -> None:
"""Test demojize function with a complex sentence."""
assert demojize("Hello 👋, what's up? 😄 ✅ 😄") == "Hello :wave:, what's up? :smile: :white_check_mark: :smile:"
def test_demojize_surrogate() -> None:
"""Test demojize function with surrogate pairs."""
surrogate_pairs = [
("🫱🏻‍🫲🏿", ":handshake_light_skin_tone_dark_skin_tone:"),
("🫱🏿‍🫲🏻", ":handshake_dark_skin_tone_light_skin_tone:"),
("🫱🏽‍🫲🏻", ":handshake_medium_skin_tone_light_skin_tone:"),
("🫱🏼‍🫲🏿", ":handshake_medium_light_skin_tone_dark_skin_tone:"),
("🫱🏾‍🫲🏻", ":handshake_medium_dark_skin_tone_light_skin_tone:"),
]
for surrogate, emoji_name in surrogate_pairs:
assert demojize(surrogate) == emoji_name
def test_demojize_all() -> None:
for emoji, name in REVERSE_EMOJI_MAPPING.items():
assert demojize(emoji) == f":{name}:"