Spaces:
Runtime error
Runtime error
File size: 7,058 Bytes
3a52501 4829b64 258c872 3a52501 d1d936a a09bb13 d1d936a 3a52501 53b0cab 258c872 4829b64 258c872 3a52501 638d345 258c872 4829b64 258c872 4829b64 258c872 4829b64 819654b 4829b64 3a52501 4829b64 3a52501 4829b64 3a52501 4829b64 3a52501 4829b64 3a52501 4829b64 3a52501 4829b64 3a52501 4829b64 819654b 258c872 4829b64 258c872 d1d936a 4829b64 258c872 4829b64 8aa124c 4829b64 258c872 819654b 5f2ec77 a09bb13 258c872 819654b 258c872 d1d936a 6286028 d1d936a 258c872 53b0cab 258c872 53b0cab 258c872 53b0cab 258c872 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
#!/usr/bin/env python
# coding: utf-8
import gradio as gr
import numpy as np
from transformers import (
AutoModelForSequenceClassification,
AutoTokenizer,
TextClassificationPipeline,
pipeline,
)
from sklearn import preprocessing
from langdetect import detect
from matplotlib import pyplot as plt
import imageio
import logging
import warnings
logging.getLogger().setLevel(logging.INFO)
DESCRIPTION = """Diese Anwendung teilt Vorstösse an das federführende Departement zu und
macht einen Vorschlag für das zuständige Amt. Der Vorschlag der Anwendung ist nicht
100% richtig. Der Zuteilungsvorschlag muss von einer Fachperson geprüft und die
effektive Zuteilung muss nach eigenem Ermessen erfolgen. \n\n
Cette application attribue les interventions au département chef de file et fait une
proposition à l'office compétent. La proposition de l'application n'est pas correcte
à 100%. La proposition d'attribution doit être vérifiée par un spécialiste et l'attribution
effective doit être faite à la discrétion de l'utilisateur."""
TITLE_DE = (
"Automatische Zuteilung von Vorstössen an das federführende Departement bzw. Amt"
)
TITLE_FR = "Où aller ? Classification des départements & bureaux"
UNKNOWN_LANG_TEXT = (
"The language is not recognized, it must be either in German or in French."
)
PLACEHOLDER_TEXT = "Geben Sie bitte den Titel und den 'Submitted Text' des Vorstoss ein.\nVeuillez entrer le titre et le 'Submitted Text' de la requête."
UNSURE_DE_TEXT = "Das ML-Modell ist nicht sicher. Die Zuteilung könnte sein: \n\n"
UNSURE_FR_TEXT = "Le modèle ML n'est pas sûr. L'allocation pourrait être: \n\n"
ML_MODEL_SURE = 0.6
BARS_DEP_FR = (
"DDPS",
"DFI",
"AS-MPC",
"DFJP",
"DEFR",
"DETEC",
"DFAE",
"Parl",
"ChF",
"DFF",
"AF",
"TF",
)
BARS_DEP_DE = (
"VBS",
"EDI",
"AB-BA",
"EJPD",
"WBF",
"UVEK",
"EDA",
"Parl",
"BK",
"EFD",
"BV",
"BGer",
)
def load_model(modelFolder):
"""Loads model from model_folder & creates a text classification pipeline."""
model = AutoModelForSequenceClassification.from_pretrained(modelFolder)
tokenizer = AutoTokenizer.from_pretrained(modelFolder)
pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer)
return pipe
def translate_to_de(SubmittedText):
"""Translates french user input to German for the model to reach better classification."""
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-fr-de")
translatedText = translator(SubmittedText[0:1000])
text = translatedText[0]["translation_text"]
return text
def create_bar_plot(rates, barnames):
y_pos = np.arange(len(barnames))
plt.barh(y_pos, rates)
plt.yticks(y_pos, barnames)
# Save the bar chart as png and load it (enables better display)
plt.savefig("rates.png")
im = imageio.v2.imread("rates.png")
return im, barnames
def show_chosen_category(barnames, rates, language):
"""Creates the output text
- adds disclaimer if ML model is not sure
- when unsure, adds all categories with prob. > 10% to output"""
maxRate = np.max(rates)
maxIndex = np.argmax(rates)
distance = "\t\t\t\t\t"
# ML model not sure if highest probability < 60%
if maxRate < ML_MODEL_SURE:
name = UNSURE_FR_TEXT if language == "fr" else UNSURE_DE_TEXT
# Show each department that has a probability > 10%
i = 0
while i == 0:
if rates[maxIndex] >= 0.1:
chosenScore = str(rates[maxIndex])[2:4]
chosenCat = barnames[maxIndex]
name = name + "\t" + chosenScore + "%" + distance + chosenCat + "\n"
rates[maxIndex] = 0
maxIndex = np.argmax(rates)
else:
i = 1
# ML model pretty sure, show only one department
else:
name = str(maxRate)[2:4] + "%" + distance + barnames[maxIndex]
return name
pipeDep = load_model("saved_model_dep")
pipeOffice = load_model("saved_model_office")
labelencoderOffice = preprocessing.LabelEncoder()
labelencoderOffice.classes_ = np.load("classes_office.npy")
def textclassification(SubmittedText):
language = detect(SubmittedText)
logging.info(
f"SubmittedText received. Detected language: {language}. SubmittedText: {SubmittedText}"
)
# Translate the input to german if necessary
if language == "fr":
SubmittedText = translate_to_de(SubmittedText)
elif language != "de":
return UNKNOWN_LANG_TEXT, None, None, None
# Make the prediction with the 1000 first characters
images = []
chosenCategoryTexts = []
labelsDep = BARS_DEP_FR if language == "fr" else BARS_DEP_DE
labelsOffice = labelencoderOffice.classes_
for pipe, barnames in zip((pipeDep, pipeOffice), (labelsDep, labelsOffice)):
plt.clf()
# catch deprecation warning, as new functionality following the deprecated way
# sorts results the wrong way and cannot be easily fixed
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
prediction = pipe(SubmittedText[0:1000], return_all_scores=True)
rates = [row["score"] for row in prediction[0]]
# Create barplot & output text
im, barnames = create_bar_plot(rates, barnames)
images.append(im)
chosenCategoryText = show_chosen_category(barnames, rates, language)
chosenCategoryTexts.append(chosenCategoryText)
# return chosenCategoryText & image for both predictions
logging.info(
f"Prediction Department: {chosenCategoryTexts[0]}\n\nPrediction Amt: {chosenCategoryTexts[1]}"
)
return chosenCategoryTexts[0], images[0], chosenCategoryTexts[1], images[1]
# Launch UI
with gr.Blocks(
# Set theme matching BK CH
gr.themes.Monochrome(
primary_hue="red",
secondary_hue="red",
font=[gr.themes.GoogleFont("Inter"), "Arial", "sans-serif"],
)
) as demo:
gr.Markdown(f"# {TITLE_DE}\n # {TITLE_FR}\n\n {DESCRIPTION}")
# Organize layout in three columns for input, prediction I and prediction II
with gr.Row():
with gr.Column(scale=2):
name = gr.Textbox(
label="Vorstosstext:", lines=28, placeholder=PLACEHOLDER_TEXT
)
predict_btn = gr.Button("Submit | Soumettre")
with gr.Column(scale=2):
output_text_dep = gr.Textbox(label="Vorschlag Departement:")
output_image_dep = gr.Image(label="Departement")
with gr.Column(scale=2):
output_text_office = gr.Textbox(label="Vorschlag Amt:")
output_image_office = gr.Image(label="Amt")
predict_btn.click(
fn=textclassification,
inputs=name,
outputs=[
output_text_dep,
output_image_dep,
output_text_office,
output_image_office,
],
api_name="predict",
)
demo.launch()
|