github-actions[bot] commited on
Commit
dfc596b
·
1 Parent(s): 3e1f373

Sync with https://github.com/mozilla-ai/structured-qa

Browse files
Files changed (2) hide show
  1. Dockerfile +26 -0
  2. app.py +58 -0
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04
2
+
3
+ RUN apt-get update && apt-get install --no-install-recommends -y \
4
+ build-essential \
5
+ python3.10 \
6
+ python3.10-dev \
7
+ python3-pip \
8
+ git \
9
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
10
+
11
+ RUN useradd -m -u 1000 user
12
+
13
+ USER user
14
+
15
+ ENV HOME=/home/user \
16
+ PATH=/home/user/.local/bin:$PATH
17
+
18
+ WORKDIR $HOME/app
19
+
20
+ RUN pip3 install https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
21
+ RUN pip3 install structured-qa
22
+
23
+ COPY --chown=user . $HOME/app
24
+
25
+ EXPOSE 8501
26
+ ENTRYPOINT ["streamlit", "run", "app.py", "--server.enableXsrfProtection", "false"]
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO
2
+ from pathlib import Path
3
+
4
+ import pymupdf
5
+ import streamlit as st
6
+
7
+ from structured_qa.model_loaders import load_llama_cpp_model
8
+ from structured_qa.preprocessing import document_to_sections_dir
9
+ from structured_qa.workflow import find_retrieve_answer
10
+
11
+
12
+ @st.cache_resource
13
+ def load_model():
14
+ return load_llama_cpp_model(
15
+ "MaziyarPanahi/SmolTulu-1.7b-Reinforced-GGUF/SmolTulu-1.7b-Reinforced.fp16.gguf"
16
+ )
17
+
18
+
19
+ @st.cache_resource
20
+ def convert_to_sections(uploaded_file, output_dir):
21
+ document_to_sections_dir(
22
+ pymupdf.open("type", BytesIO(uploaded_file.read())),
23
+ output_dir,
24
+ )
25
+
26
+
27
+ st.title("Structured QA")
28
+
29
+ st.header("Uploading Data")
30
+
31
+ uploaded_file = st.file_uploader(
32
+ "Choose a file", type=["pdf", "html", "txt", "docx", "md"]
33
+ )
34
+
35
+ if uploaded_file is not None:
36
+ st.divider()
37
+ st.header("Loading and converting to sections")
38
+ st.markdown("[Docs for this Step]()")
39
+ st.divider()
40
+
41
+ convert_to_sections(uploaded_file, f"example_outputs/{uploaded_file.name}")
42
+
43
+ sections = [f.stem for f in Path(f"example_outputs/{uploaded_file.name}").iterdir()]
44
+ st.json(sections)
45
+
46
+ model = load_model()
47
+ question = st.text_input("Enter a question:")
48
+ if question:
49
+ with st.spinner("Answering..."):
50
+ answer, sections_checked = find_retrieve_answer(
51
+ model=model,
52
+ sections_dir=f"example_outputs/{uploaded_file.name}",
53
+ question=question,
54
+ )
55
+ st.text("Sections checked:")
56
+ st.json(sections_checked)
57
+ st.text("Answer:")
58
+ st.text(answer)