Skip to content
Snippets Groups Projects
Commit 4e17d567 authored by Ruben Bimberg's avatar Ruben Bimberg 💻
Browse files

Remove celadon

parent 63b689b9
No related merge requests found
Pipeline #144137 failed with stages
in 38 seconds
...@@ -5,5 +5,4 @@ __pycache__ ...@@ -5,5 +5,4 @@ __pycache__
/test /test
/train/distilbert-token-swearword /train/distilbert-token-swearword
/train/celadon /train/celadon
/app/celadon
/.key /.key
\ No newline at end of file
from fastapi import APIRouter, Body, Request from fastapi import APIRouter, Body, Request
from app.security.oauth2 import ROOM_DEPENDENCIES, per_req_config_modifier from app.security.oauth2 import ROOM_DEPENDENCIES, per_req_config_modifier
from app.celadon.model import MultiHeadDebertaForSequenceClassification
from transformers import AutoTokenizer
from detoxify import Detoxify from detoxify import Detoxify
from openai import OpenAI from openai import OpenAI
from more_itertools import chunked from more_itertools import chunked
from transformers import pipeline from transformers import pipeline
celadon_tokenizer = AutoTokenizer.from_pretrained("PleIAs/celadon")
celadon = MultiHeadDebertaForSequenceClassification.from_pretrained("PleIAs/celadon")
celadon.eval()
detox = Detoxify("multilingual") detox = Detoxify("multilingual")
sentiment_pipeline = pipeline( sentiment_pipeline = pipeline(
...@@ -169,42 +163,6 @@ def run_detox(text_list): ...@@ -169,42 +163,6 @@ def run_detox(text_list):
return [{k: v[i] for (k, v) in result_dict.items()} for i in range(length)] return [{k: v[i] for (k, v) in result_dict.items()} for i in range(length)]
def run_celadon(text_list):
LABEL_MAX = 3
inputs = celadon_tokenizer(
text_list, return_tensors="pt", padding=True, truncation=True
)
outputs = celadon(
input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"]
)
predictions = outputs.argmax(dim=-1).squeeze()
iterations = predictions.size()[0]
if len(predictions.size()) == 1:
iterations = 1
predictions = [predictions]
result = [
{
"Race/Origin": predictions[i][0].item() / LABEL_MAX,
"Gender/Sex": predictions[i][1].item() / LABEL_MAX,
"Religion": predictions[i][2].item() / LABEL_MAX,
"Ability": predictions[i][3].item() / LABEL_MAX,
"Violence": predictions[i][4].item() / LABEL_MAX,
}
for i in range(iterations)
]
for result_item in result:
summed_values = sum(result_item.values())
if summed_values > 2:
result_item["Flagged"] = "Toxic"
elif summed_values > 1 or max(result_item.values()) == 1:
result_item["Flagged"] = "Mild"
else:
result_item["Flagged"] = "No"
return result
def run_moderate(text_list, api_key): def run_moderate(text_list, api_key):
result = {} result = {}
...@@ -213,11 +171,6 @@ def run_moderate(text_list, api_key): ...@@ -213,11 +171,6 @@ def run_moderate(text_list, api_key):
result_list.extend(run_detox(texts)) result_list.extend(run_detox(texts))
result["detoxify"] = result_list result["detoxify"] = result_list
result_list = []
for texts in chunked(text_list, 2):
result_list.extend(run_celadon(texts))
result["celadon"] = result_list
if api_key is not None: if api_key is not None:
result_list = [] result_list = []
for texts in chunked(text_list, 32): for texts in chunked(text_list, 32):
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment