Spaces:
Sleeping
Sleeping
from haystack.document_stores import FAISSDocumentStore | |
from haystack.nodes import DensePassageRetriever, FARMReader | |
from haystack.pipelines import ExtractiveQAPipeline, DocumentSearchPipeline | |
from haystack.utils import clean_wiki_text, print_answers | |
from sentence_transformers import SentenceTransformer | |
from haystack.nodes import EmbeddingRetriever | |
import streamlit as st | |
import os | |
import pandas as pd | |
from datetime import datetime | |
import json | |
import re | |
def intializeFAISS(docs): | |
try: | |
db_file_name='faiss_document_store.db' | |
faiss_index_file='faiss_index' | |
if 'reader' not in st.session_state: | |
st.session_state.reader=FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=False) | |
if os.path.exists(faiss_index_file): | |
document_store=FAISSDocumentStore( | |
sql_url=f"sqlite:///{db_file_name}", | |
index=faiss_index_file, | |
embedding_dim=384 | |
) | |
else: | |
document_store = FAISSDocumentStore(embedding_dim=384, faiss_index_factory_str="Flat") | |
if 'retriever' not in st.session_state: | |
st.session_state.retriever = EmbeddingRetriever( | |
document_store=document_store, | |
embedding_model="sentence-transformers/all-MiniLM-L6-v2", | |
model_format="sentence_transformers", | |
use_gpu=False | |
) | |
document_store.delete_all_documents() | |
document_store.write_documents(docs) | |
document_store.update_embeddings(st.session_state.retriever) | |
document_store.save(index_path=faiss_index_file) | |
except Exception as ex: | |
print('--error--') | |
print('--intialization method--') | |
print(ex) | |
def dummy(r): | |
return{ | |
} | |
def prepare_doc(r): | |
_content=f'''{r['Salutation']} {r['Initial']} {r['Name']}, Employee Id is {r['Employee Id']} and user id is {r['User Id']}, is from country {r['Country']}, Supervisor / Manager name is {r['Supervisor / Manager']} and Contracting Company is {r['Contracting Company']}, Primary Industry is {r['Primary Industry']} and Secondary Industry is {r['Secondary Industry']}, there Sector is {r['Sector']}, they are expertise in {r['Expertise']}, there role is {r['Industry Role']}. There last Last Promotion Date is {r['Last Promotion Date']} and {r['Last Promotional Level']}, There Job Title is {r['Job Title']}, they are working here since {r['Professional Since']}, there hired date is {r['Hired Date']}, there Relevant is {r['Relevant']}, Employee Sponser is {r['Employee Sponser']}, Job Description is {r['Job Description']}, Emergency Contact Name is {r['Emergency Contact Name']} and Emergency Contact Number is {r['Emergency Contact Number']}, Regional Supervisor is {r['Regional Supervisor']} Office Supervisor is {r['Office Supervisor']} Engagement Supervisor is {r['Engagement Supervisor']} ''' | |
return { | |
'content':_content, | |
'meta':{ | |
"Salutation":r["Salutation"], | |
"Initial":r["Initial"], | |
"User Id" :r["User Id"], | |
"Name" :r["Name"], | |
"Employee Id" :r["Employee Id"], | |
"Country":r["Country"], | |
"Supervisor / Manager" :r["Supervisor / Manager"], | |
"Contracting Company" :r["Contracting Company"], | |
"Primary Industry":r["Primary Industry"], | |
"Secondary Industry" :r["Secondary Industry"], | |
"Sector" :r["Sector"], | |
"Expertise":r["Expertise"], | |
"Industry Role":r["Industry Role"], | |
"Designation" :r["Designation"], | |
"Grade" :r["Grade"], | |
"Target Chargeability %" :r["Target Chargeability %"], | |
"Charge Out Rate":r["Charge Out Rate"], | |
"Last Promotion Date" :r["Last Promotion Date"], | |
"Last Promotional Level" :r["Last Promotional Level"], | |
"Job Title":r["Job Title"], | |
"Professional Since" :r["Professional Since"], | |
"Hired Date" :r["Hired Date"], | |
"Relevant":r["Relevant"], | |
"Employee Sponser":r["Employee Sponser"], | |
"Job Description" :r["Job Description"], | |
"Emergency Contact Name" :r["Emergency Contact Name"], | |
"Emergency Contact Number":r["Emergency Contact Number"], | |
"Regional Supervisor" :r["Regional Supervisor"], | |
"Office Supervisor" :r["Office Supervisor"], | |
"Engagement Supervisor":r["Engagement Supervisor"] | |
} | |
} | |
def on_submission(question): | |
try: | |
print('reader') | |
print(st.session_state.reader) | |
print('retriever') | |
print(st.session_state.retriever) | |
qa_pipeline=ExtractiveQAPipeline(reader=st.session_state.reader,retriever=st.session_state.retriever) | |
prediction=qa_pipeline.run(query=question,params={ | |
"Retriever":{"top_k":5}, | |
"Reader":{"top_k":1} | |
}) | |
if prediction: | |
text=str(prediction['answers'][0]) | |
except Exception as ex: | |
print('--error--') | |
print('--on_submission method--') | |
print(ex) | |
uploaded_file=st.file_uploader(label='please upload your file',type=['csv']) | |
if uploaded_file is not None: | |
try: | |
df=pd.read_csv(uploaded_file) | |
employee_list=df.to_json(orient='records') | |
doc=[] | |
json_emp=json.loads(employee_list) | |
for e in json_emp: | |
doc.append(prepare_doc(e)) | |
intializeFAISS(docs=doc) | |
except Exception as ex: | |
print('--uploaded_file method--') | |
print(ex) | |
#intialize form | |
with st.form(key='workforce_management'): | |
st.title('workforce management') | |
question=st.text_input(label='please ask your question.') | |
submitted=st.form_submit_button(label='submit') | |
if submitted and question: | |
on_submission(question) | |