Spaces:
Runtime error
Runtime error
nasirbloch323
commited on
Upload 13 files
Browse files- Dockerfile +18 -0
- README.md +12 -8
- __init__.py +0 -0
- _app.py +30 -0
- app.py +151 -0
- contract.pdf +0 -0
- css.css +157 -0
- gradio_pdf-0.0.2-py3-none-any.whl +0 -0
- gradio_pdf-0.0.3-py3-none-any.whl +0 -0
- invoice_2.pdf +0 -0
- requirements.txt +5 -0
- sample_invoice.pdf +0 -0
Dockerfile
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
FROM python:3.9
|
3 |
+
|
4 |
+
WORKDIR /code
|
5 |
+
|
6 |
+
COPY --link --chown=1000 . .
|
7 |
+
|
8 |
+
RUN mkdir -p /tmp/cache/
|
9 |
+
RUN chmod a+rwx -R /tmp/cache/
|
10 |
+
RUN apt-get update && apt-get install -y poppler-utils tesseract-ocr chromium
|
11 |
+
ENV TRANSFORMERS_CACHE=/tmp/cache/
|
12 |
+
|
13 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
14 |
+
|
15 |
+
ENV PYTHONUNBUFFERED=1 GRADIO_ALLOW_FLAGGING=never GRADIO_NUM_PORTS=1 GRADIO_SERVER_NAME=0.0.0.0 GRADIO_SERVER_PORT=7860 SYSTEM=spaces
|
16 |
+
|
17 |
+
CMD ["python", "app.py"]
|
18 |
+
|
README.md
CHANGED
@@ -1,13 +1,17 @@
|
|
|
|
1 |
---
|
2 |
-
|
3 |
-
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
-
sdk:
|
7 |
-
sdk_version: 4.24.0
|
8 |
-
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
---
|
3 |
+
tags: [gradio-custom-component,Documents,PDF,Document QA,gradio,gradio-template-Fallback]
|
4 |
+
title: gradio_pdf V0.0.3
|
5 |
+
colorFrom: green
|
6 |
+
colorTo: pink
|
7 |
+
sdk: docker
|
|
|
|
|
8 |
pinned: false
|
9 |
license: apache-2.0
|
10 |
---
|
11 |
|
12 |
+
|
13 |
+
# Name: gradio_pdf
|
14 |
+
|
15 |
+
Description: Easily display PDFs in Gradio
|
16 |
+
|
17 |
+
Install with: pip install gradio_pdf
|
__init__.py
ADDED
File without changes
|
_app.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import gradio as gr
|
3 |
+
from gradio_pdf import PDF
|
4 |
+
from pdf2image import convert_from_path
|
5 |
+
from transformers import pipeline
|
6 |
+
from pathlib import Path
|
7 |
+
|
8 |
+
dir_ = Path(__file__).parent
|
9 |
+
|
10 |
+
p = pipeline(
|
11 |
+
"document-question-answering",
|
12 |
+
model="impira/layoutlm-document-qa",
|
13 |
+
)
|
14 |
+
|
15 |
+
def qa(question: str, doc: str) -> str:
|
16 |
+
img = convert_from_path(doc)[0]
|
17 |
+
output = p(img, question)
|
18 |
+
return sorted(output, key=lambda x: x["score"], reverse=True)[0]['answer']
|
19 |
+
|
20 |
+
|
21 |
+
demo = gr.Interface(
|
22 |
+
qa,
|
23 |
+
[gr.Textbox(label="Question"), PDF(label="Document")],
|
24 |
+
gr.Textbox(),
|
25 |
+
examples=[["What is the total gross worth?", str(dir_ / "invoice_2.pdf")],
|
26 |
+
["Whos is being invoiced?", str(dir_ / "sample_invoice.pdf")]]
|
27 |
+
)
|
28 |
+
|
29 |
+
if __name__ == "__main__":
|
30 |
+
demo.launch()
|
app.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import gradio as gr
|
3 |
+
from _app import demo as app
|
4 |
+
import os
|
5 |
+
|
6 |
+
_docs = {'PDF': {'description': 'A base class for defining methods that all input/output components should have.', 'members': {'__init__': {'value': {'type': 'Any', 'default': 'None', 'description': None}, 'height': {'type': 'int | None', 'default': 'None', 'description': None}, 'label': {'type': 'str | None', 'default': 'None', 'description': None}, 'info': {'type': 'str | None', 'default': 'None', 'description': None}, 'show_label': {'type': 'bool | None', 'default': 'None', 'description': None}, 'container': {'type': 'bool', 'default': 'True', 'description': None}, 'scale': {'type': 'int | None', 'default': 'None', 'description': None}, 'min_width': {'type': 'int | None', 'default': 'None', 'description': None}, 'interactive': {'type': 'bool | None', 'default': 'None', 'description': None}, 'visible': {'type': 'bool', 'default': 'True', 'description': None}, 'elem_id': {'type': 'str | None', 'default': 'None', 'description': None}, 'elem_classes': {'type': 'list[str] | str | None', 'default': 'None', 'description': None}, 'render': {'type': 'bool', 'default': 'True', 'description': None}, 'load_fn': {'type': 'Callable[..., Any] | None', 'default': 'None', 'description': None}, 'every': {'type': 'float | None', 'default': 'None', 'description': None}}, 'postprocess': {'value': {'type': 'str | None', 'description': None}}, 'preprocess': {'return': {'type': 'str', 'description': None}, 'value': None}}, 'events': {'change': {'type': None, 'default': None, 'description': ''}, 'upload': {'type': None, 'default': None, 'description': ''}}}, '__meta__': {'additional_interfaces': {}, 'user_fn_refs': {'PDF': []}}}
|
7 |
+
|
8 |
+
abs_path = os.path.join(os.path.dirname(__file__), "css.css")
|
9 |
+
|
10 |
+
with gr.Blocks(
|
11 |
+
css=abs_path,
|
12 |
+
theme=gr.themes.Default(
|
13 |
+
font_mono=[
|
14 |
+
gr.themes.GoogleFont("Inconsolata"),
|
15 |
+
"monospace",
|
16 |
+
],
|
17 |
+
),
|
18 |
+
) as demo:
|
19 |
+
gr.Markdown(
|
20 |
+
"""
|
21 |
+
# `gradio_pdf`
|
22 |
+
|
23 |
+
<div style="display: flex; gap: 7px;">
|
24 |
+
<a href="https://pypi.org/project/gradio_pdf/" target="_blank"><img alt="PyPI - Version" src="https://img.shields.io/pypi/v/gradio_pdf"></a> <a href="https://github.com/freddyaboulton/gradio-pdf/issues" target="_blank"><img alt="Static Badge" src="https://img.shields.io/badge/Issues-white?logo=github&logoColor=black"></a> <a href="https://huggingface.co/spaces/freddyaboulton/gradio_pdf/discussions" target="_blank"><img alt="Static Badge" src="https://img.shields.io/badge/%F0%9F%A4%97%20Discuss-%23097EFF?style=flat&logoColor=black"></a>
|
25 |
+
</div>
|
26 |
+
|
27 |
+
Easily display PDFs in Gradio
|
28 |
+
""", elem_classes=["md-custom"], header_links=True)
|
29 |
+
app.render()
|
30 |
+
gr.Markdown(
|
31 |
+
"""
|
32 |
+
## Installation
|
33 |
+
|
34 |
+
```bash
|
35 |
+
pip install gradio_pdf
|
36 |
+
```
|
37 |
+
|
38 |
+
## Usage
|
39 |
+
|
40 |
+
```python
|
41 |
+
|
42 |
+
import gradio as gr
|
43 |
+
from gradio_pdf import PDF
|
44 |
+
from pdf2image import convert_from_path
|
45 |
+
from transformers import pipeline
|
46 |
+
from pathlib import Path
|
47 |
+
|
48 |
+
dir_ = Path(__file__).parent
|
49 |
+
|
50 |
+
p = pipeline(
|
51 |
+
"document-question-answering",
|
52 |
+
model="impira/layoutlm-document-qa",
|
53 |
+
)
|
54 |
+
|
55 |
+
def qa(question: str, doc: str) -> str:
|
56 |
+
img = convert_from_path(doc)[0]
|
57 |
+
output = p(img, question)
|
58 |
+
return sorted(output, key=lambda x: x["score"], reverse=True)[0]['answer']
|
59 |
+
|
60 |
+
|
61 |
+
demo = gr.Interface(
|
62 |
+
qa,
|
63 |
+
[gr.Textbox(label="Question"), PDF(label="Document")],
|
64 |
+
gr.Textbox(),
|
65 |
+
examples=[["What is the total gross worth?", str(dir_ / "invoice_2.pdf")],
|
66 |
+
["Whos is being invoiced?", str(dir_ / "sample_invoice.pdf")]]
|
67 |
+
)
|
68 |
+
|
69 |
+
if __name__ == "__main__":
|
70 |
+
demo.launch()
|
71 |
+
|
72 |
+
```
|
73 |
+
""", elem_classes=["md-custom"], header_links=True)
|
74 |
+
|
75 |
+
|
76 |
+
gr.Markdown("""
|
77 |
+
## `PDF`
|
78 |
+
|
79 |
+
### Initialization
|
80 |
+
""", elem_classes=["md-custom"], header_links=True)
|
81 |
+
|
82 |
+
gr.ParamViewer(value=_docs["PDF"]["members"]["__init__"], linkify=[])
|
83 |
+
|
84 |
+
|
85 |
+
gr.Markdown("### Events")
|
86 |
+
gr.ParamViewer(value=_docs["PDF"]["events"], linkify=['Event'])
|
87 |
+
|
88 |
+
|
89 |
+
|
90 |
+
|
91 |
+
gr.Markdown("""
|
92 |
+
|
93 |
+
### User function
|
94 |
+
|
95 |
+
The impact on the users predict function varies depending on whether the component is used as an input or output for an event (or both).
|
96 |
+
|
97 |
+
- When used as an Input, the component only impacts the input signature of the user function.
|
98 |
+
- When used as an output, the component only impacts the return signature of the user function.
|
99 |
+
|
100 |
+
The code snippet below is accurate in cases where the component is used as both an input and an output.
|
101 |
+
|
102 |
+
|
103 |
+
|
104 |
+
```python
|
105 |
+
def predict(
|
106 |
+
value: str
|
107 |
+
) -> str | None:
|
108 |
+
return value
|
109 |
+
```
|
110 |
+
""", elem_classes=["md-custom", "PDF-user-fn"], header_links=True)
|
111 |
+
|
112 |
+
|
113 |
+
|
114 |
+
|
115 |
+
demo.load(None, js=r"""function() {
|
116 |
+
const refs = {};
|
117 |
+
const user_fn_refs = {
|
118 |
+
PDF: [], };
|
119 |
+
requestAnimationFrame(() => {
|
120 |
+
|
121 |
+
Object.entries(user_fn_refs).forEach(([key, refs]) => {
|
122 |
+
if (refs.length > 0) {
|
123 |
+
const el = document.querySelector(`.${key}-user-fn`);
|
124 |
+
if (!el) return;
|
125 |
+
refs.forEach(ref => {
|
126 |
+
el.innerHTML = el.innerHTML.replace(
|
127 |
+
new RegExp("\\b"+ref+"\\b", "g"),
|
128 |
+
`<a href="#h-${ref.toLowerCase()}">${ref}</a>`
|
129 |
+
);
|
130 |
+
})
|
131 |
+
}
|
132 |
+
})
|
133 |
+
|
134 |
+
Object.entries(refs).forEach(([key, refs]) => {
|
135 |
+
if (refs.length > 0) {
|
136 |
+
const el = document.querySelector(`.${key}`);
|
137 |
+
if (!el) return;
|
138 |
+
refs.forEach(ref => {
|
139 |
+
el.innerHTML = el.innerHTML.replace(
|
140 |
+
new RegExp("\\b"+ref+"\\b", "g"),
|
141 |
+
`<a href="#h-${ref.toLowerCase()}">${ref}</a>`
|
142 |
+
);
|
143 |
+
})
|
144 |
+
}
|
145 |
+
})
|
146 |
+
})
|
147 |
+
}
|
148 |
+
|
149 |
+
""")
|
150 |
+
|
151 |
+
demo.launch()
|
contract.pdf
ADDED
Binary file (128 kB). View file
|
|
css.css
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
html {
|
2 |
+
font-family: Inter;
|
3 |
+
font-size: 16px;
|
4 |
+
font-weight: 400;
|
5 |
+
line-height: 1.5;
|
6 |
+
-webkit-text-size-adjust: 100%;
|
7 |
+
background: #fff;
|
8 |
+
color: #323232;
|
9 |
+
-webkit-font-smoothing: antialiased;
|
10 |
+
-moz-osx-font-smoothing: grayscale;
|
11 |
+
text-rendering: optimizeLegibility;
|
12 |
+
}
|
13 |
+
|
14 |
+
:root {
|
15 |
+
--space: 1;
|
16 |
+
--vspace: calc(var(--space) * 1rem);
|
17 |
+
--vspace-0: calc(3 * var(--space) * 1rem);
|
18 |
+
--vspace-1: calc(2 * var(--space) * 1rem);
|
19 |
+
--vspace-2: calc(1.5 * var(--space) * 1rem);
|
20 |
+
--vspace-3: calc(0.5 * var(--space) * 1rem);
|
21 |
+
}
|
22 |
+
|
23 |
+
.app {
|
24 |
+
max-width: 748px !important;
|
25 |
+
}
|
26 |
+
|
27 |
+
.prose p {
|
28 |
+
margin: var(--vspace) 0;
|
29 |
+
line-height: var(--vspace * 2);
|
30 |
+
font-size: 1rem;
|
31 |
+
}
|
32 |
+
|
33 |
+
code {
|
34 |
+
font-family: "Inconsolata", sans-serif;
|
35 |
+
font-size: 16px;
|
36 |
+
}
|
37 |
+
|
38 |
+
h1,
|
39 |
+
h1 code {
|
40 |
+
font-weight: 400;
|
41 |
+
line-height: calc(2.5 / var(--space) * var(--vspace));
|
42 |
+
}
|
43 |
+
|
44 |
+
h1 code {
|
45 |
+
background: none;
|
46 |
+
border: none;
|
47 |
+
letter-spacing: 0.05em;
|
48 |
+
padding-bottom: 5px;
|
49 |
+
position: relative;
|
50 |
+
padding: 0;
|
51 |
+
}
|
52 |
+
|
53 |
+
h2 {
|
54 |
+
margin: var(--vspace-1) 0 var(--vspace-2) 0;
|
55 |
+
line-height: 1em;
|
56 |
+
}
|
57 |
+
|
58 |
+
h3,
|
59 |
+
h3 code {
|
60 |
+
margin: var(--vspace-1) 0 var(--vspace-2) 0;
|
61 |
+
line-height: 1em;
|
62 |
+
}
|
63 |
+
|
64 |
+
h4,
|
65 |
+
h5,
|
66 |
+
h6 {
|
67 |
+
margin: var(--vspace-3) 0 var(--vspace-3) 0;
|
68 |
+
line-height: var(--vspace);
|
69 |
+
}
|
70 |
+
|
71 |
+
.bigtitle,
|
72 |
+
h1,
|
73 |
+
h1 code {
|
74 |
+
font-size: calc(8px * 4.5);
|
75 |
+
word-break: break-word;
|
76 |
+
}
|
77 |
+
|
78 |
+
.title,
|
79 |
+
h2,
|
80 |
+
h2 code {
|
81 |
+
font-size: calc(8px * 3.375);
|
82 |
+
font-weight: lighter;
|
83 |
+
word-break: break-word;
|
84 |
+
border: none;
|
85 |
+
background: none;
|
86 |
+
}
|
87 |
+
|
88 |
+
.subheading1,
|
89 |
+
h3,
|
90 |
+
h3 code {
|
91 |
+
font-size: calc(8px * 1.8);
|
92 |
+
font-weight: 600;
|
93 |
+
border: none;
|
94 |
+
background: none;
|
95 |
+
letter-spacing: 0.1em;
|
96 |
+
text-transform: uppercase;
|
97 |
+
}
|
98 |
+
|
99 |
+
h2 code {
|
100 |
+
padding: 0;
|
101 |
+
position: relative;
|
102 |
+
letter-spacing: 0.05em;
|
103 |
+
}
|
104 |
+
|
105 |
+
blockquote {
|
106 |
+
font-size: calc(8px * 1.1667);
|
107 |
+
font-style: italic;
|
108 |
+
line-height: calc(1.1667 * var(--vspace));
|
109 |
+
margin: var(--vspace-2) var(--vspace-2);
|
110 |
+
}
|
111 |
+
|
112 |
+
.subheading2,
|
113 |
+
h4 {
|
114 |
+
font-size: calc(8px * 1.4292);
|
115 |
+
text-transform: uppercase;
|
116 |
+
font-weight: 600;
|
117 |
+
}
|
118 |
+
|
119 |
+
.subheading3,
|
120 |
+
h5 {
|
121 |
+
font-size: calc(8px * 1.2917);
|
122 |
+
line-height: calc(1.2917 * var(--vspace));
|
123 |
+
|
124 |
+
font-weight: lighter;
|
125 |
+
text-transform: uppercase;
|
126 |
+
letter-spacing: 0.15em;
|
127 |
+
}
|
128 |
+
|
129 |
+
h6 {
|
130 |
+
font-size: calc(8px * 1.1667);
|
131 |
+
font-size: 1.1667em;
|
132 |
+
font-weight: normal;
|
133 |
+
font-style: italic;
|
134 |
+
font-family: "le-monde-livre-classic-byol", serif !important;
|
135 |
+
letter-spacing: 0px !important;
|
136 |
+
}
|
137 |
+
|
138 |
+
#start .md > *:first-child {
|
139 |
+
margin-top: 0;
|
140 |
+
}
|
141 |
+
|
142 |
+
h2 + h3 {
|
143 |
+
margin-top: 0;
|
144 |
+
}
|
145 |
+
|
146 |
+
.md hr {
|
147 |
+
border: none;
|
148 |
+
border-top: 1px solid var(--block-border-color);
|
149 |
+
margin: var(--vspace-2) 0 var(--vspace-2) 0;
|
150 |
+
}
|
151 |
+
.prose ul {
|
152 |
+
margin: var(--vspace-2) 0 var(--vspace-1) 0;
|
153 |
+
}
|
154 |
+
|
155 |
+
.gap {
|
156 |
+
gap: 0;
|
157 |
+
}
|
gradio_pdf-0.0.2-py3-none-any.whl
ADDED
Binary file (304 kB). View file
|
|
gradio_pdf-0.0.3-py3-none-any.whl
ADDED
Binary file (306 kB). View file
|
|
invoice_2.pdf
ADDED
Binary file (372 kB). View file
|
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
transformers
|
3 |
+
pdf2image
|
4 |
+
pytesseract
|
5 |
+
gradio_pdf==0.0.5
|
sample_invoice.pdf
ADDED
Binary file (34.7 kB). View file
|
|