Spaces:

VGG11
/

armenian_chatbot_bert_multilingual

Runtime error

Mary12 commited on Aug 15, 2023

Commit

622bf47

1 Parent(s): 9b19172

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -31,11 +31,26 @@ def extract_text_from_pdf(file_path):
     pdf_document.close()
     return text
 def model(model_name):
   tokenizer = AutoTokenizer.from_pretrained(model_name)
   model = AutoModelForQuestionAnswering.from_pretrained(model_name,return_dict = False)
-  model_pipeline = pipeline(
     "question-answering",
     model = model,
     tokenizer = tokenizer
@@ -48,12 +63,18 @@ pipe = model(model_name)
 def qa_result( context, question, file, pipe = pipe):
     if file is not None:
-        allowed_types = [".pdf", ".csv", ".doc"]
         extension = "." + file.name.split(".")[-1].lower()
         if not extension in allowed_types:
-            text = "Խնդրում եմ ներբեռնել .pdf, .csv, կամ .doc տիպի ֆայլեր։"
         else:
-            context = extract_text_from_pdf(file.name)
             result = pipe(question=question, context=context)
             answered = result['answer']
             text = remove_references(answered)

     pdf_document.close()
     return text
+def extract_text_from_txt(file_path):
+    text = ""
+    with open(file_path, "r", encoding='utf-8') as txt_file:
+        text = txt_file.read()
+    return text
+def extract_text_from_doc(file_path):
+    text - ""
+    doc = docx.Document(file_path)
+    for texts in doc.paragraphs:
+        text+= texts.text + "\n"
+    return texts
 def model(model_name):
   tokenizer = AutoTokenizer.from_pretrained(model_name)
   model = AutoModelForQuestionAnswering.from_pretrained(model_name,return_dict = False)
+  model_pipeline = pipeline4
     "question-answering",
     model = model,
     tokenizer = tokenizer
 def qa_result( context, question, file, pipe = pipe):
     if file is not None:
+        allowed_types = [".pdf", ".txt", ".doc"]
         extension = "." + file.name.split(".")[-1].lower()
         if not extension in allowed_types:
+            text = "Խնդրում եմ ներբեռնել .pdf, .txt, կամ .doc տիպի ֆայլեր։"
         else:
+            if extension is allowed_types[0]:
+                context = extract_text_from_pdf(file.name)
+            elif extension is allowed_types[1]:
+                context = extract_text_from_txt(file.name)
+            else:
+                context = extract_text_from_doc(file.name)
             result = pipe(question=question, context=context)
             answered = result['answer']
             text = remove_references(answered)