Spaces:

mozilla-ai
/

structured-qa

Sleeping

App Files Files Community

github-actions[bot] commited on 14 days ago

Commit

dfc596b

1 Parent(s): 3e1f373

Sync with https://github.com/mozilla-ai/structured-qa

Browse files

Files changed (2) hide show

Dockerfile +26 -0
app.py +58 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+FROM nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04
+RUN apt-get update && apt-get install --no-install-recommends -y \
+  build-essential \
+  python3.10 \
+  python3.10-dev \
+  python3-pip \
+  git \
+  && apt-get clean && rm -rf /var/lib/apt/lists/*
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+RUN pip3 install https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
+RUN pip3 install structured-qa
+COPY --chown=user . $HOME/app
+EXPOSE 8501
+ENTRYPOINT ["streamlit", "run", "app.py", "--server.enableXsrfProtection", "false"]

app.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from io import BytesIO
+from pathlib import Path
+import pymupdf
+import streamlit as st
+from structured_qa.model_loaders import load_llama_cpp_model
+from structured_qa.preprocessing import document_to_sections_dir
+from structured_qa.workflow import find_retrieve_answer
+@st.cache_resource
+def load_model():
+    return load_llama_cpp_model(
+        "MaziyarPanahi/SmolTulu-1.7b-Reinforced-GGUF/SmolTulu-1.7b-Reinforced.fp16.gguf"
+    )
+@st.cache_resource
+def convert_to_sections(uploaded_file, output_dir):
+    document_to_sections_dir(
+        pymupdf.open("type", BytesIO(uploaded_file.read())),
+        output_dir,
+    )
+st.title("Structured QA")
+st.header("Uploading Data")
+uploaded_file = st.file_uploader(
+    "Choose a file", type=["pdf", "html", "txt", "docx", "md"]
+)
+if uploaded_file is not None:
+    st.divider()
+    st.header("Loading and converting to sections")
+    st.markdown("[Docs for this Step]()")
+    st.divider()
+    convert_to_sections(uploaded_file, f"example_outputs/{uploaded_file.name}")
+    sections = [f.stem for f in Path(f"example_outputs/{uploaded_file.name}").iterdir()]
+    st.json(sections)
+    model = load_model()
+    question = st.text_input("Enter a question:")
+    if question:
+        with st.spinner("Answering..."):
+            answer, sections_checked = find_retrieve_answer(
+                model=model,
+                sections_dir=f"example_outputs/{uploaded_file.name}",
+                question=question,
+            )
+            st.text("Sections checked:")
+            st.json(sections_checked)
+            st.text("Answer:")
+            st.text(answer)