nasirbloch323 commited on
Commit
e46ebf6
·
verified ·
1 Parent(s): 742afb2

Upload 13 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM python:3.9
3
+
4
+ WORKDIR /code
5
+
6
+ COPY --link --chown=1000 . .
7
+
8
+ RUN mkdir -p /tmp/cache/
9
+ RUN chmod a+rwx -R /tmp/cache/
10
+ RUN apt-get update && apt-get install -y poppler-utils tesseract-ocr chromium
11
+ ENV TRANSFORMERS_CACHE=/tmp/cache/
12
+
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ ENV PYTHONUNBUFFERED=1 GRADIO_ALLOW_FLAGGING=never GRADIO_NUM_PORTS=1 GRADIO_SERVER_NAME=0.0.0.0 GRADIO_SERVER_PORT=7860 SYSTEM=spaces
16
+
17
+ CMD ["python", "app.py"]
18
+
README.md CHANGED
@@ -1,13 +1,17 @@
 
1
  ---
2
- title: Chatpdf
3
- emoji: 💻
4
- colorFrom: indigo
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 4.24.0
8
- app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
1
+
2
  ---
3
+ tags: [gradio-custom-component,Documents,PDF,Document QA,gradio,gradio-template-Fallback]
4
+ title: gradio_pdf V0.0.3
5
+ colorFrom: green
6
+ colorTo: pink
7
+ sdk: docker
 
 
8
  pinned: false
9
  license: apache-2.0
10
  ---
11
 
12
+
13
+ # Name: gradio_pdf
14
+
15
+ Description: Easily display PDFs in Gradio
16
+
17
+ Install with: pip install gradio_pdf
__init__.py ADDED
File without changes
_app.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ from gradio_pdf import PDF
4
+ from pdf2image import convert_from_path
5
+ from transformers import pipeline
6
+ from pathlib import Path
7
+
8
+ dir_ = Path(__file__).parent
9
+
10
+ p = pipeline(
11
+ "document-question-answering",
12
+ model="impira/layoutlm-document-qa",
13
+ )
14
+
15
+ def qa(question: str, doc: str) -> str:
16
+ img = convert_from_path(doc)[0]
17
+ output = p(img, question)
18
+ return sorted(output, key=lambda x: x["score"], reverse=True)[0]['answer']
19
+
20
+
21
+ demo = gr.Interface(
22
+ qa,
23
+ [gr.Textbox(label="Question"), PDF(label="Document")],
24
+ gr.Textbox(),
25
+ examples=[["What is the total gross worth?", str(dir_ / "invoice_2.pdf")],
26
+ ["Whos is being invoiced?", str(dir_ / "sample_invoice.pdf")]]
27
+ )
28
+
29
+ if __name__ == "__main__":
30
+ demo.launch()
app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ from _app import demo as app
4
+ import os
5
+
6
+ _docs = {'PDF': {'description': 'A base class for defining methods that all input/output components should have.', 'members': {'__init__': {'value': {'type': 'Any', 'default': 'None', 'description': None}, 'height': {'type': 'int | None', 'default': 'None', 'description': None}, 'label': {'type': 'str | None', 'default': 'None', 'description': None}, 'info': {'type': 'str | None', 'default': 'None', 'description': None}, 'show_label': {'type': 'bool | None', 'default': 'None', 'description': None}, 'container': {'type': 'bool', 'default': 'True', 'description': None}, 'scale': {'type': 'int | None', 'default': 'None', 'description': None}, 'min_width': {'type': 'int | None', 'default': 'None', 'description': None}, 'interactive': {'type': 'bool | None', 'default': 'None', 'description': None}, 'visible': {'type': 'bool', 'default': 'True', 'description': None}, 'elem_id': {'type': 'str | None', 'default': 'None', 'description': None}, 'elem_classes': {'type': 'list[str] | str | None', 'default': 'None', 'description': None}, 'render': {'type': 'bool', 'default': 'True', 'description': None}, 'load_fn': {'type': 'Callable[..., Any] | None', 'default': 'None', 'description': None}, 'every': {'type': 'float | None', 'default': 'None', 'description': None}}, 'postprocess': {'value': {'type': 'str | None', 'description': None}}, 'preprocess': {'return': {'type': 'str', 'description': None}, 'value': None}}, 'events': {'change': {'type': None, 'default': None, 'description': ''}, 'upload': {'type': None, 'default': None, 'description': ''}}}, '__meta__': {'additional_interfaces': {}, 'user_fn_refs': {'PDF': []}}}
7
+
8
+ abs_path = os.path.join(os.path.dirname(__file__), "css.css")
9
+
10
+ with gr.Blocks(
11
+ css=abs_path,
12
+ theme=gr.themes.Default(
13
+ font_mono=[
14
+ gr.themes.GoogleFont("Inconsolata"),
15
+ "monospace",
16
+ ],
17
+ ),
18
+ ) as demo:
19
+ gr.Markdown(
20
+ """
21
+ # `gradio_pdf`
22
+
23
+ <div style="display: flex; gap: 7px;">
24
+ <a href="https://pypi.org/project/gradio_pdf/" target="_blank"><img alt="PyPI - Version" src="https://img.shields.io/pypi/v/gradio_pdf"></a> <a href="https://github.com/freddyaboulton/gradio-pdf/issues" target="_blank"><img alt="Static Badge" src="https://img.shields.io/badge/Issues-white?logo=github&logoColor=black"></a> <a href="https://huggingface.co/spaces/freddyaboulton/gradio_pdf/discussions" target="_blank"><img alt="Static Badge" src="https://img.shields.io/badge/%F0%9F%A4%97%20Discuss-%23097EFF?style=flat&logoColor=black"></a>
25
+ </div>
26
+
27
+ Easily display PDFs in Gradio
28
+ """, elem_classes=["md-custom"], header_links=True)
29
+ app.render()
30
+ gr.Markdown(
31
+ """
32
+ ## Installation
33
+
34
+ ```bash
35
+ pip install gradio_pdf
36
+ ```
37
+
38
+ ## Usage
39
+
40
+ ```python
41
+
42
+ import gradio as gr
43
+ from gradio_pdf import PDF
44
+ from pdf2image import convert_from_path
45
+ from transformers import pipeline
46
+ from pathlib import Path
47
+
48
+ dir_ = Path(__file__).parent
49
+
50
+ p = pipeline(
51
+ "document-question-answering",
52
+ model="impira/layoutlm-document-qa",
53
+ )
54
+
55
+ def qa(question: str, doc: str) -> str:
56
+ img = convert_from_path(doc)[0]
57
+ output = p(img, question)
58
+ return sorted(output, key=lambda x: x["score"], reverse=True)[0]['answer']
59
+
60
+
61
+ demo = gr.Interface(
62
+ qa,
63
+ [gr.Textbox(label="Question"), PDF(label="Document")],
64
+ gr.Textbox(),
65
+ examples=[["What is the total gross worth?", str(dir_ / "invoice_2.pdf")],
66
+ ["Whos is being invoiced?", str(dir_ / "sample_invoice.pdf")]]
67
+ )
68
+
69
+ if __name__ == "__main__":
70
+ demo.launch()
71
+
72
+ ```
73
+ """, elem_classes=["md-custom"], header_links=True)
74
+
75
+
76
+ gr.Markdown("""
77
+ ## `PDF`
78
+
79
+ ### Initialization
80
+ """, elem_classes=["md-custom"], header_links=True)
81
+
82
+ gr.ParamViewer(value=_docs["PDF"]["members"]["__init__"], linkify=[])
83
+
84
+
85
+ gr.Markdown("### Events")
86
+ gr.ParamViewer(value=_docs["PDF"]["events"], linkify=['Event'])
87
+
88
+
89
+
90
+
91
+ gr.Markdown("""
92
+
93
+ ### User function
94
+
95
+ The impact on the users predict function varies depending on whether the component is used as an input or output for an event (or both).
96
+
97
+ - When used as an Input, the component only impacts the input signature of the user function.
98
+ - When used as an output, the component only impacts the return signature of the user function.
99
+
100
+ The code snippet below is accurate in cases where the component is used as both an input and an output.
101
+
102
+
103
+
104
+ ```python
105
+ def predict(
106
+ value: str
107
+ ) -> str | None:
108
+ return value
109
+ ```
110
+ """, elem_classes=["md-custom", "PDF-user-fn"], header_links=True)
111
+
112
+
113
+
114
+
115
+ demo.load(None, js=r"""function() {
116
+ const refs = {};
117
+ const user_fn_refs = {
118
+ PDF: [], };
119
+ requestAnimationFrame(() => {
120
+
121
+ Object.entries(user_fn_refs).forEach(([key, refs]) => {
122
+ if (refs.length > 0) {
123
+ const el = document.querySelector(`.${key}-user-fn`);
124
+ if (!el) return;
125
+ refs.forEach(ref => {
126
+ el.innerHTML = el.innerHTML.replace(
127
+ new RegExp("\\b"+ref+"\\b", "g"),
128
+ `<a href="#h-${ref.toLowerCase()}">${ref}</a>`
129
+ );
130
+ })
131
+ }
132
+ })
133
+
134
+ Object.entries(refs).forEach(([key, refs]) => {
135
+ if (refs.length > 0) {
136
+ const el = document.querySelector(`.${key}`);
137
+ if (!el) return;
138
+ refs.forEach(ref => {
139
+ el.innerHTML = el.innerHTML.replace(
140
+ new RegExp("\\b"+ref+"\\b", "g"),
141
+ `<a href="#h-${ref.toLowerCase()}">${ref}</a>`
142
+ );
143
+ })
144
+ }
145
+ })
146
+ })
147
+ }
148
+
149
+ """)
150
+
151
+ demo.launch()
contract.pdf ADDED
Binary file (128 kB). View file
 
css.css ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ html {
2
+ font-family: Inter;
3
+ font-size: 16px;
4
+ font-weight: 400;
5
+ line-height: 1.5;
6
+ -webkit-text-size-adjust: 100%;
7
+ background: #fff;
8
+ color: #323232;
9
+ -webkit-font-smoothing: antialiased;
10
+ -moz-osx-font-smoothing: grayscale;
11
+ text-rendering: optimizeLegibility;
12
+ }
13
+
14
+ :root {
15
+ --space: 1;
16
+ --vspace: calc(var(--space) * 1rem);
17
+ --vspace-0: calc(3 * var(--space) * 1rem);
18
+ --vspace-1: calc(2 * var(--space) * 1rem);
19
+ --vspace-2: calc(1.5 * var(--space) * 1rem);
20
+ --vspace-3: calc(0.5 * var(--space) * 1rem);
21
+ }
22
+
23
+ .app {
24
+ max-width: 748px !important;
25
+ }
26
+
27
+ .prose p {
28
+ margin: var(--vspace) 0;
29
+ line-height: var(--vspace * 2);
30
+ font-size: 1rem;
31
+ }
32
+
33
+ code {
34
+ font-family: "Inconsolata", sans-serif;
35
+ font-size: 16px;
36
+ }
37
+
38
+ h1,
39
+ h1 code {
40
+ font-weight: 400;
41
+ line-height: calc(2.5 / var(--space) * var(--vspace));
42
+ }
43
+
44
+ h1 code {
45
+ background: none;
46
+ border: none;
47
+ letter-spacing: 0.05em;
48
+ padding-bottom: 5px;
49
+ position: relative;
50
+ padding: 0;
51
+ }
52
+
53
+ h2 {
54
+ margin: var(--vspace-1) 0 var(--vspace-2) 0;
55
+ line-height: 1em;
56
+ }
57
+
58
+ h3,
59
+ h3 code {
60
+ margin: var(--vspace-1) 0 var(--vspace-2) 0;
61
+ line-height: 1em;
62
+ }
63
+
64
+ h4,
65
+ h5,
66
+ h6 {
67
+ margin: var(--vspace-3) 0 var(--vspace-3) 0;
68
+ line-height: var(--vspace);
69
+ }
70
+
71
+ .bigtitle,
72
+ h1,
73
+ h1 code {
74
+ font-size: calc(8px * 4.5);
75
+ word-break: break-word;
76
+ }
77
+
78
+ .title,
79
+ h2,
80
+ h2 code {
81
+ font-size: calc(8px * 3.375);
82
+ font-weight: lighter;
83
+ word-break: break-word;
84
+ border: none;
85
+ background: none;
86
+ }
87
+
88
+ .subheading1,
89
+ h3,
90
+ h3 code {
91
+ font-size: calc(8px * 1.8);
92
+ font-weight: 600;
93
+ border: none;
94
+ background: none;
95
+ letter-spacing: 0.1em;
96
+ text-transform: uppercase;
97
+ }
98
+
99
+ h2 code {
100
+ padding: 0;
101
+ position: relative;
102
+ letter-spacing: 0.05em;
103
+ }
104
+
105
+ blockquote {
106
+ font-size: calc(8px * 1.1667);
107
+ font-style: italic;
108
+ line-height: calc(1.1667 * var(--vspace));
109
+ margin: var(--vspace-2) var(--vspace-2);
110
+ }
111
+
112
+ .subheading2,
113
+ h4 {
114
+ font-size: calc(8px * 1.4292);
115
+ text-transform: uppercase;
116
+ font-weight: 600;
117
+ }
118
+
119
+ .subheading3,
120
+ h5 {
121
+ font-size: calc(8px * 1.2917);
122
+ line-height: calc(1.2917 * var(--vspace));
123
+
124
+ font-weight: lighter;
125
+ text-transform: uppercase;
126
+ letter-spacing: 0.15em;
127
+ }
128
+
129
+ h6 {
130
+ font-size: calc(8px * 1.1667);
131
+ font-size: 1.1667em;
132
+ font-weight: normal;
133
+ font-style: italic;
134
+ font-family: "le-monde-livre-classic-byol", serif !important;
135
+ letter-spacing: 0px !important;
136
+ }
137
+
138
+ #start .md > *:first-child {
139
+ margin-top: 0;
140
+ }
141
+
142
+ h2 + h3 {
143
+ margin-top: 0;
144
+ }
145
+
146
+ .md hr {
147
+ border: none;
148
+ border-top: 1px solid var(--block-border-color);
149
+ margin: var(--vspace-2) 0 var(--vspace-2) 0;
150
+ }
151
+ .prose ul {
152
+ margin: var(--vspace-2) 0 var(--vspace-1) 0;
153
+ }
154
+
155
+ .gap {
156
+ gap: 0;
157
+ }
gradio_pdf-0.0.2-py3-none-any.whl ADDED
Binary file (304 kB). View file
 
gradio_pdf-0.0.3-py3-none-any.whl ADDED
Binary file (306 kB). View file
 
invoice_2.pdf ADDED
Binary file (372 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ pdf2image
4
+ pytesseract
5
+ gradio_pdf==0.0.5
sample_invoice.pdf ADDED
Binary file (34.7 kB). View file