Mary12 commited on
Commit
622bf47
·
1 Parent(s): 9b19172

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -4
app.py CHANGED
@@ -31,11 +31,26 @@ def extract_text_from_pdf(file_path):
31
  pdf_document.close()
32
  return text
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  def model(model_name):
36
  tokenizer = AutoTokenizer.from_pretrained(model_name)
37
  model = AutoModelForQuestionAnswering.from_pretrained(model_name,return_dict = False)
38
- model_pipeline = pipeline(
39
  "question-answering",
40
  model = model,
41
  tokenizer = tokenizer
@@ -48,12 +63,18 @@ pipe = model(model_name)
48
 
49
  def qa_result( context, question, file, pipe = pipe):
50
  if file is not None:
51
- allowed_types = [".pdf", ".csv", ".doc"]
52
  extension = "." + file.name.split(".")[-1].lower()
53
  if not extension in allowed_types:
54
- text = "Խնդրում եմ ներբեռնել .pdf, .csv, կամ .doc տիպի ֆայլեր։"
55
  else:
56
- context = extract_text_from_pdf(file.name)
 
 
 
 
 
 
57
  result = pipe(question=question, context=context)
58
  answered = result['answer']
59
  text = remove_references(answered)
 
31
  pdf_document.close()
32
  return text
33
 
34
+ def extract_text_from_txt(file_path):
35
+ text = ""
36
+ with open(file_path, "r", encoding='utf-8') as txt_file:
37
+ text = txt_file.read()
38
+ return text
39
+
40
+ def extract_text_from_doc(file_path):
41
+ text - ""
42
+ doc = docx.Document(file_path)
43
+ for texts in doc.paragraphs:
44
+ text+= texts.text + "\n"
45
+
46
+ return texts
47
+
48
+
49
 
50
  def model(model_name):
51
  tokenizer = AutoTokenizer.from_pretrained(model_name)
52
  model = AutoModelForQuestionAnswering.from_pretrained(model_name,return_dict = False)
53
+ model_pipeline = pipeline4
54
  "question-answering",
55
  model = model,
56
  tokenizer = tokenizer
 
63
 
64
  def qa_result( context, question, file, pipe = pipe):
65
  if file is not None:
66
+ allowed_types = [".pdf", ".txt", ".doc"]
67
  extension = "." + file.name.split(".")[-1].lower()
68
  if not extension in allowed_types:
69
+ text = "Խնդրում եմ ներբեռնել .pdf, .txt, կամ .doc տիպի ֆայլեր։"
70
  else:
71
+ if extension is allowed_types[0]:
72
+ context = extract_text_from_pdf(file.name)
73
+ elif extension is allowed_types[1]:
74
+ context = extract_text_from_txt(file.name)
75
+ else:
76
+ context = extract_text_from_doc(file.name)
77
+
78
  result = pipe(question=question, context=context)
79
  answered = result['answer']
80
  text = remove_references(answered)