mirror of
https://github.com/Paillat-dev/Botator.git
synced 2026-01-02 09:16:19 +00:00
[GENERAL] Code reformatting using Black standard
This commit is contained in:
173
code/toxicity.py
173
code/toxicity.py
@@ -1,7 +1,19 @@
|
||||
from googleapiclient import discovery
|
||||
from config import perspective_api_key
|
||||
import re
|
||||
toxicity_names = ["toxicity", "severe_toxicity", "identity_attack", "insult", "profanity", "threat", "sexually_explicit", "flirtation", "obscene", "spam"]
|
||||
|
||||
toxicity_names = [
|
||||
"toxicity",
|
||||
"severe_toxicity",
|
||||
"identity_attack",
|
||||
"insult",
|
||||
"profanity",
|
||||
"threat",
|
||||
"sexually_explicit",
|
||||
"flirtation",
|
||||
"obscene",
|
||||
"spam",
|
||||
]
|
||||
toxicity_definitions = [
|
||||
"A rude, disrespectful, or unreasonable message that is likely to make people leave a discussion.",
|
||||
"A very hateful, aggressive, disrespectful message or otherwise very likely to make a user leave a discussion or give up on sharing their perspective. This attribute is much less sensitive to more mild forms of toxicity, such as messages that include positive uses of curse words.",
|
||||
@@ -12,66 +24,133 @@ toxicity_definitions = [
|
||||
"Contains references to sexual acts, body parts, or other lewd content. \n **English only**",
|
||||
"Pickup lines, complimenting appearance, subtle sexual innuendos, etc. \n **English only**",
|
||||
"Obscene or vulgar language such as cursing. \n **English only**",
|
||||
"Irrelevant and unsolicited commercial content. \n **English only**"
|
||||
"Irrelevant and unsolicited commercial content. \n **English only**",
|
||||
]
|
||||
|
||||
|
||||
|
||||
client = discovery.build("commentanalyzer",
|
||||
"v1alpha1",
|
||||
developerKey=perspective_api_key,
|
||||
discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
|
||||
static_discovery=False,
|
||||
)
|
||||
client = discovery.build(
|
||||
"commentanalyzer",
|
||||
"v1alpha1",
|
||||
developerKey=perspective_api_key,
|
||||
discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
|
||||
static_discovery=False,
|
||||
)
|
||||
|
||||
analyze_request = {
|
||||
'comment': {'text': ''}, # The text to analyze
|
||||
#we will ask the following attributes to google: TOXICITY, SEVERE_TOXICITY, IDENTITY_ATTACK, INSULT, PRPFANITY, THREAT, SEXUALLY_EXPLICIT, FLIRTATION, OBSCENE, SPAM
|
||||
'requestedAttributes': {'TOXICITY': {}, 'SEVERE_TOXICITY': {}, 'IDENTITY_ATTACK': {}, 'INSULT': {}, 'PROFANITY': {}, 'THREAT': {}, 'SEXUALLY_EXPLICIT': {}, 'FLIRTATION': {}, 'OBSCENE': {}, 'SPAM': {}},
|
||||
#we will analyze the text in any language automatically detected by google
|
||||
'languages': [],
|
||||
'doNotStore': 'true' # We don't want google to store the data because of privacy reasons & the GDPR (General Data Protection Regulation, an EU law that protects the privacy of EU citizens and residents for data privacy and security purposes https://gdpr-info.eu/)
|
||||
"comment": {"text": ""}, # The text to analyze
|
||||
# we will ask the following attributes to google: TOXICITY, SEVERE_TOXICITY, IDENTITY_ATTACK, INSULT, PRPFANITY, THREAT, SEXUALLY_EXPLICIT, FLIRTATION, OBSCENE, SPAM
|
||||
"requestedAttributes": {
|
||||
"TOXICITY": {},
|
||||
"SEVERE_TOXICITY": {},
|
||||
"IDENTITY_ATTACK": {},
|
||||
"INSULT": {},
|
||||
"PROFANITY": {},
|
||||
"THREAT": {},
|
||||
"SEXUALLY_EXPLICIT": {},
|
||||
"FLIRTATION": {},
|
||||
"OBSCENE": {},
|
||||
"SPAM": {},
|
||||
},
|
||||
# we will analyze the text in any language automatically detected by google
|
||||
"languages": [],
|
||||
"doNotStore": "true", # We don't want google to store the data because of privacy reasons & the GDPR (General Data Protection Regulation, an EU law that protects the privacy of EU citizens and residents for data privacy and security purposes https://gdpr-info.eu/)
|
||||
}
|
||||
analyze_request_not_en = {
|
||||
'comment': {'text': ''}, # The text to analyze
|
||||
#we will ask the following attributes to google: TOXICITY, SEVERE_TOXICITY, IDENTITY_ATTACK, INSULT, PRPFANITY, THREAT, SEXUALLY_EXPLICIT, FLIRTATION, OBSCENE, SPAM
|
||||
'requestedAttributes': {'TOXICITY': {}, 'SEVERE_TOXICITY': {}, 'IDENTITY_ATTACK': {}, 'INSULT': {}, 'PROFANITY': {}, 'THREAT': {}},
|
||||
#we will analyze the text in any language automatically detected by google
|
||||
'languages': [],
|
||||
'doNotStore': 'true' # We don't want google to store the data because of privacy reasons & the GDPR (General Data Protection Regulation, an EU law that protects the privacy of EU citizens and residents for data privacy and security purposes https://gdpr-info.eu/)
|
||||
}
|
||||
"comment": {"text": ""}, # The text to analyze
|
||||
# we will ask the following attributes to google: TOXICITY, SEVERE_TOXICITY, IDENTITY_ATTACK, INSULT, PRPFANITY, THREAT, SEXUALLY_EXPLICIT, FLIRTATION, OBSCENE, SPAM
|
||||
"requestedAttributes": {
|
||||
"TOXICITY": {},
|
||||
"SEVERE_TOXICITY": {},
|
||||
"IDENTITY_ATTACK": {},
|
||||
"INSULT": {},
|
||||
"PROFANITY": {},
|
||||
"THREAT": {},
|
||||
},
|
||||
# we will analyze the text in any language automatically detected by google
|
||||
"languages": [],
|
||||
"doNotStore": "true", # We don't want google to store the data because of privacy reasons & the GDPR (General Data Protection Regulation, an EU law that protects the privacy of EU citizens and residents for data privacy and security purposes https://gdpr-info.eu/)
|
||||
}
|
||||
|
||||
|
||||
def get_toxicity(message: str):
|
||||
#we first remove all kind of markdown from the message to avoid exploits
|
||||
message = re.sub(r'\*([^*]+)\*', r'\1', message)
|
||||
message = re.sub(r'\_([^_]+)\_', r'\1', message)
|
||||
message = re.sub(r'\*\*([^*]+)\*\*', r'\1', message)
|
||||
message = re.sub(r'\_\_([^_]+)\_\_', r'\1', message)
|
||||
message = re.sub(r'\|\|([^|]+)\|\|', r'\1', message)
|
||||
message = re.sub(r'\~([^~]+)\~', r'\1', message)
|
||||
message = re.sub(r'\~\~([^~]+)\~\~', r'\1', message)
|
||||
message = re.sub(r'\`([^`]+)\`', r'\1', message)
|
||||
message = re.sub(r'\`\`\`([^`]+)\`\`\`', r'\1', message)
|
||||
|
||||
#we try doing the request in english, but if we get 'errorType': 'LANGUAGE_NOT_SUPPORTED_BY_ATTRIBUTE' we try again with the analyze_request_not_en
|
||||
# we first remove all kind of markdown from the message to avoid exploits
|
||||
message = re.sub(r"\*([^*]+)\*", r"\1", message)
|
||||
message = re.sub(r"\_([^_]+)\_", r"\1", message)
|
||||
message = re.sub(r"\*\*([^*]+)\*\*", r"\1", message)
|
||||
message = re.sub(r"\_\_([^_]+)\_\_", r"\1", message)
|
||||
message = re.sub(r"\|\|([^|]+)\|\|", r"\1", message)
|
||||
message = re.sub(r"\~([^~]+)\~", r"\1", message)
|
||||
message = re.sub(r"\~\~([^~]+)\~\~", r"\1", message)
|
||||
message = re.sub(r"\`([^`]+)\`", r"\1", message)
|
||||
message = re.sub(r"\`\`\`([^`]+)\`\`\`", r"\1", message)
|
||||
|
||||
# we try doing the request in english, but if we get 'errorType': 'LANGUAGE_NOT_SUPPORTED_BY_ATTRIBUTE' we try again with the analyze_request_not_en
|
||||
try:
|
||||
analyze_request['comment']['text'] = message
|
||||
analyze_request["comment"]["text"] = message
|
||||
response = client.comments().analyze(body=analyze_request).execute()
|
||||
except:
|
||||
analyze_request_not_en['comment']['text'] = message
|
||||
analyze_request_not_en["comment"]["text"] = message
|
||||
response = client.comments().analyze(body=analyze_request_not_en).execute()
|
||||
try: return [float(response['attributeScores']['TOXICITY']['summaryScore']['value']), float(response['attributeScores']['SEVERE_TOXICITY']['summaryScore']['value']), float(response['attributeScores']['IDENTITY_ATTACK']['summaryScore']['value']), float(response['attributeScores']['INSULT']['summaryScore']['value']), float(response['attributeScores']['PROFANITY']['summaryScore']['value']), float(response['attributeScores']['THREAT']['summaryScore']['value']), float(response['attributeScores']['SEXUALLY_EXPLICIT']['summaryScore']['value']), float(response['attributeScores']['FLIRTATION']['summaryScore']['value']), float(response['attributeScores']['OBSCENE']['summaryScore']['value']), float(response['attributeScores']['SPAM']['summaryScore']['value'])]
|
||||
except: return [float(response['attributeScores']['TOXICITY']['summaryScore']['value']), float(response['attributeScores']['SEVERE_TOXICITY']['summaryScore']['value']), float(response['attributeScores']['IDENTITY_ATTACK']['summaryScore']['value']), float(response['attributeScores']['INSULT']['summaryScore']['value']), float(response['attributeScores']['PROFANITY']['summaryScore']['value']), float(response['attributeScores']['THREAT']['summaryScore']['value'])]
|
||||
try:
|
||||
return [
|
||||
float(response["attributeScores"]["TOXICITY"]["summaryScore"]["value"]),
|
||||
float(
|
||||
response["attributeScores"]["SEVERE_TOXICITY"]["summaryScore"]["value"]
|
||||
),
|
||||
float(
|
||||
response["attributeScores"]["IDENTITY_ATTACK"]["summaryScore"]["value"]
|
||||
),
|
||||
float(response["attributeScores"]["INSULT"]["summaryScore"]["value"]),
|
||||
float(response["attributeScores"]["PROFANITY"]["summaryScore"]["value"]),
|
||||
float(response["attributeScores"]["THREAT"]["summaryScore"]["value"]),
|
||||
float(
|
||||
response["attributeScores"]["SEXUALLY_EXPLICIT"]["summaryScore"][
|
||||
"value"
|
||||
]
|
||||
),
|
||||
float(response["attributeScores"]["FLIRTATION"]["summaryScore"]["value"]),
|
||||
float(response["attributeScores"]["OBSCENE"]["summaryScore"]["value"]),
|
||||
float(response["attributeScores"]["SPAM"]["summaryScore"]["value"]),
|
||||
]
|
||||
except:
|
||||
return [
|
||||
float(response["attributeScores"]["TOXICITY"]["summaryScore"]["value"]),
|
||||
float(
|
||||
response["attributeScores"]["SEVERE_TOXICITY"]["summaryScore"]["value"]
|
||||
),
|
||||
float(
|
||||
response["attributeScores"]["IDENTITY_ATTACK"]["summaryScore"]["value"]
|
||||
),
|
||||
float(response["attributeScores"]["INSULT"]["summaryScore"]["value"]),
|
||||
float(response["attributeScores"]["PROFANITY"]["summaryScore"]["value"]),
|
||||
float(response["attributeScores"]["THREAT"]["summaryScore"]["value"]),
|
||||
]
|
||||
|
||||
#test part
|
||||
|
||||
# test part
|
||||
def test():
|
||||
print("Testing toxicity.py...")
|
||||
print("Hello world:")
|
||||
result = get_toxicity('Hello world')
|
||||
try: print(f"TOXICITY: {result[0]}; SEVERE_TOXICITY: {result[1]}; IDENTITY ATTACK: {result[2]}; INSULT: {result[3]}; PROFANITY: {result[4]}; THREAT: {result[5]}; SEXUALLY EXPLICIT: {result[6]}; FLIRTATION: {result[7]}; OBSCENE: {result[8]}; SPAM: {result[9]}")
|
||||
except: print(f"TOXICITY: {result[0]}; SEVERE_TOXICITY: {result[1]}; IDENTITY ATTACK: {result[2]}; INSULT: {result[3]}; PROFANITY: {result[4]}; THREAT: {result[5]}")
|
||||
result = get_toxicity("Hello world")
|
||||
try:
|
||||
print(
|
||||
f"TOXICITY: {result[0]}; SEVERE_TOXICITY: {result[1]}; IDENTITY ATTACK: {result[2]}; INSULT: {result[3]}; PROFANITY: {result[4]}; THREAT: {result[5]}; SEXUALLY EXPLICIT: {result[6]}; FLIRTATION: {result[7]}; OBSCENE: {result[8]}; SPAM: {result[9]}"
|
||||
)
|
||||
except:
|
||||
print(
|
||||
f"TOXICITY: {result[0]}; SEVERE_TOXICITY: {result[1]}; IDENTITY ATTACK: {result[2]}; INSULT: {result[3]}; PROFANITY: {result[4]}; THREAT: {result[5]}"
|
||||
)
|
||||
print("HELLO WORLD GET ABSOLUTELY BUY MY NEW MERCH OMGGGGGGG:")
|
||||
result = get_toxicity('HELLO WORLD GET ABSOLUTELY BUY MY NEW MERCH OMGGGGGGG')
|
||||
try: print(f"TOXICITY: {result[0]}; SEVERE_TOXICITY: {result[1]}; IDENTITY ATTACK: {result[2]}; INSULT: {result[3]}; PROFANITY: {result[4]}; THREAT: {result[5]}; SEXUALLY EXPLICIT: {result[6]}; FLIRTATION: {result[7]}; OBSCENE: {result[8]}; SPAM: {result[9]}")
|
||||
except: print(f"TOXICITY: {result[0]}; SEVERE_TOXICITY: {result[1]}; IDENTITY ATTACK: {result[2]}; INSULT: {result[3]}; PROFANITY: {result[4]}; THREAT: {result[5]}")
|
||||
#uncomment the following line to test the code
|
||||
#test()
|
||||
result = get_toxicity("HELLO WORLD GET ABSOLUTELY BUY MY NEW MERCH OMGGGGGGG")
|
||||
try:
|
||||
print(
|
||||
f"TOXICITY: {result[0]}; SEVERE_TOXICITY: {result[1]}; IDENTITY ATTACK: {result[2]}; INSULT: {result[3]}; PROFANITY: {result[4]}; THREAT: {result[5]}; SEXUALLY EXPLICIT: {result[6]}; FLIRTATION: {result[7]}; OBSCENE: {result[8]}; SPAM: {result[9]}"
|
||||
)
|
||||
except:
|
||||
print(
|
||||
f"TOXICITY: {result[0]}; SEVERE_TOXICITY: {result[1]}; IDENTITY ATTACK: {result[2]}; INSULT: {result[3]}; PROFANITY: {result[4]}; THREAT: {result[5]}"
|
||||
)
|
||||
|
||||
|
||||
# uncomment the following line to test the code
|
||||
# test()
|
||||
|
||||
Reference in New Issue
Block a user