Spaces:
Sleeping
Sleeping
File size: 1,617 Bytes
dfc596b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
from io import BytesIO
from pathlib import Path
import pymupdf
import streamlit as st
from structured_qa.model_loaders import load_llama_cpp_model
from structured_qa.preprocessing import document_to_sections_dir
from structured_qa.workflow import find_retrieve_answer
@st.cache_resource
def load_model():
return load_llama_cpp_model(
"MaziyarPanahi/SmolTulu-1.7b-Reinforced-GGUF/SmolTulu-1.7b-Reinforced.fp16.gguf"
)
@st.cache_resource
def convert_to_sections(uploaded_file, output_dir):
document_to_sections_dir(
pymupdf.open("type", BytesIO(uploaded_file.read())),
output_dir,
)
st.title("Structured QA")
st.header("Uploading Data")
uploaded_file = st.file_uploader(
"Choose a file", type=["pdf", "html", "txt", "docx", "md"]
)
if uploaded_file is not None:
st.divider()
st.header("Loading and converting to sections")
st.markdown("[Docs for this Step]()")
st.divider()
convert_to_sections(uploaded_file, f"example_outputs/{uploaded_file.name}")
sections = [f.stem for f in Path(f"example_outputs/{uploaded_file.name}").iterdir()]
st.json(sections)
model = load_model()
question = st.text_input("Enter a question:")
if question:
with st.spinner("Answering..."):
answer, sections_checked = find_retrieve_answer(
model=model,
sections_dir=f"example_outputs/{uploaded_file.name}",
question=question,
)
st.text("Sections checked:")
st.json(sections_checked)
st.text("Answer:")
st.text(answer)
|