zeynepgulhan commited on
Commit
27c69e9
·
verified ·
1 Parent(s): 30bc46c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+
3
+ import gradio as gr
4
+ import numpy as np
5
+ import torch
6
+ # Load model directly
7
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
8
+
9
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
10
+
11
+
12
+ def get_model():
13
+ start_time = time.time()
14
+ model = AutoModelForSequenceClassification.from_pretrained("TURKCELL/gibberish-detection-model-tr")
15
+ tokenizer = AutoTokenizer.from_pretrained("TURKCELL/gibberish-detection-model-tr", do_lower_case=True,
16
+ use_fast=True)
17
+ model.to(device)
18
+ print(f'bert model loading time {time.time() - start_time}')
19
+ return tokenizer, model
20
+
21
+
22
+ tokenizer, model = get_model()
23
+
24
+
25
+ def get_result_for_one_sample(model, tokenizer, device, sample):
26
+ d = {
27
+ 1: 'gibberish',
28
+ 0: 'real'
29
+ }
30
+ test_sample = tokenizer([sample], padding=True, truncation=True, max_length=256, return_tensors='pt').to(device)
31
+ # test_sample
32
+ output = model(**test_sample)
33
+ y_pred = np.argmax(output.logits.detach().to('cpu').numpy(), axis=1)
34
+ return d[y_pred[0]]
35
+
36
+
37
+ def process_sentence_with_bert(sentence):
38
+ print('processing text with bert')
39
+ start = time.time()
40
+ result = get_result_for_one_sample(model, tokenizer, device,
41
+ sentence) # Bu fonksiyonun implementasyonunu sağlamalısınız.
42
+ print(f'bert processing time {time.time() - start}')
43
+ return result
44
+
45
+
46
+ def classify_gibberish(sentence, ignore_words_file):
47
+ # ignore_words_file işlenmesi gerekiyor. Gradio dosya yükleme ile ilgili bir örneği aşağıda bulabilirsiniz.
48
+ result = process_sentence_with_bert(sentence)
49
+ return result
50
+
51
+
52
+ iface = gr.Interface(fn=classify_gibberish,
53
+ inputs=[gr.Textbox(lines=2, placeholder="Enter Sentence Here..."),
54
+ gr.File(label="Upload Ignore Words File")],
55
+ outputs=gr.Textbox(label="Gibberish Detection Result"),
56
+ title="Simple Gibberish Text Detection For Turkish",
57
+ description="""Simple gibberish text detection given text like
58
+ adsfdnsfnıunf
59
+ sasdlsöefls.""")
60
+ iface.launch()