Spaces:
Runtime error
Runtime error
Update Key_Information_Extraction
Browse files- Key_Information_Extraction +66 -36
Key_Information_Extraction
CHANGED
@@ -1,6 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
os.system('pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu')
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import gradio as gr
|
5 |
|
6 |
import numpy as np
|
@@ -16,27 +43,33 @@ from datasets import Features, Sequence, ClassLabel, Value, Array2D, Array3D
|
|
16 |
from datasets import load_dataset # this dataset uses the new Image feature :)
|
17 |
|
18 |
from transformers import LayoutLMv3ForTokenClassification
|
19 |
-
from transformers.data.data_collator import default_data_collator
|
20 |
from transformers import AutoModelForTokenClassification
|
21 |
|
22 |
-
import cv2
|
23 |
from PIL import Image, ImageDraw, ImageFont
|
24 |
|
25 |
-
|
26 |
-
# pip install -q git+https://github.com/huggingface/transformers.git
|
27 |
-
# !pip install h5py
|
28 |
-
# It's useful for evaluation metrics such as F1 on sequence labeling tasks
|
29 |
-
# !pip install -q datasets seqeval
|
30 |
|
31 |
-
|
32 |
-
dataset = load_dataset("nielsr/funsd-layoutlmv3")
|
33 |
-
#dataset = load_dataset("G:\\BITS - MTECH\\Sem -4\\Final Report\\code\dataset")
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
|
39 |
example = dataset["test"][0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
words, boxes, ner_tags = example["tokens"], example["bboxes"], example["ner_tags"]
|
42 |
|
@@ -69,7 +102,6 @@ num_labels = len(label_list)
|
|
69 |
|
70 |
label2color = {'question':'blue', 'answer':'green', 'header':'orange', 'other':'violet'}
|
71 |
|
72 |
-
|
73 |
def prepare_examples(examples):
|
74 |
images = examples[image_column_name]
|
75 |
words = examples[text_column_name]
|
@@ -82,7 +114,6 @@ def prepare_examples(examples):
|
|
82 |
return encoding
|
83 |
|
84 |
processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
85 |
-
#model = AutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base")
|
86 |
|
87 |
model = LayoutLMv3ForTokenClassification.from_pretrained("microsoft/layoutlmv3-base",
|
88 |
id2label=id2label,
|
@@ -97,12 +128,6 @@ features = Features({
|
|
97 |
'labels': Sequence(feature=Value(dtype='int64')),
|
98 |
})
|
99 |
|
100 |
-
# train_dataset = dataset["train"].map(
|
101 |
-
# prepare_examples,
|
102 |
-
# batched=True,
|
103 |
-
# remove_columns=column_names,
|
104 |
-
# features=features,
|
105 |
-
# )
|
106 |
eval_dataset = dataset["test"].map(
|
107 |
prepare_examples,
|
108 |
batched=True,
|
@@ -123,21 +148,26 @@ def process_image(image):
|
|
123 |
print(type(image))
|
124 |
width, height = image.size
|
125 |
|
126 |
-
# encode
|
127 |
-
#encoding = processor(image, truncation=True, return_offsets_mapping=True, return_tensors="pt")
|
128 |
-
#offset_mapping = encoding.pop('offset_mapping')
|
129 |
-
|
130 |
image = example["image"]
|
131 |
words = example["tokens"]
|
132 |
boxes = example["bboxes"]
|
133 |
word_labels = example["ner_tags"]
|
134 |
|
135 |
-
encoding = processor(image, words, truncation=True,boxes=boxes, word_labels=word_labels,return_offsets_mapping=True, return_tensors="pt")
|
136 |
-
offset_mapping = encoding.pop('offset_mapping')
|
137 |
-
|
138 |
for k,v in encoding.items():
|
139 |
print(k,v.shape)
|
140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
# forward pass
|
142 |
with torch.no_grad():
|
143 |
outputs = model(**encoding)
|
@@ -155,9 +185,9 @@ def process_image(image):
|
|
155 |
token_boxes = encoding.bbox.squeeze().tolist()
|
156 |
width, height = image.size
|
157 |
|
158 |
-
true_predictions = [model.config.id2label[pred] for pred, label in zip(predictions, labels) if label != - 100]
|
159 |
-
true_labels = [model.config.id2label[label] for prediction, label in zip(predictions, labels) if label != -100]
|
160 |
-
true_boxes = [unnormalize_box(box, width, height) for box, label in zip(token_boxes, labels) if label != -100]
|
161 |
|
162 |
|
163 |
# only keep non-subword predictions
|
@@ -175,7 +205,7 @@ def process_image(image):
|
|
175 |
|
176 |
return image
|
177 |
|
178 |
-
title = "DocumentAI - Extraction using LayoutLMv3 model"
|
179 |
description = "Extraction of Form or Invoice Extraction - We use Microsoft's LayoutLMv3 trained on Invoice Dataset to predict the Biller Name, Biller Address, Biller post_code, Due_date, GST, Invoice_date, Invoice_number, Subtotal and Total. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."
|
180 |
|
181 |
article="<b>References</b><br>[1] Y. Xu et al., “LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking.” 2022. <a href='https://arxiv.org/abs/2204.08387'>Paper Link</a><br>[2] <a href='https://github.com/NielsRogge/Transformers-Tutorials/tree/master/LayoutLMv3'>LayoutLMv3 training and inference</a>"
|
@@ -186,13 +216,13 @@ css = """.output_image, .input_image {height: 600px !important}"""
|
|
186 |
|
187 |
iface = gr.Interface(fn=process_image,
|
188 |
inputs=gr.inputs.Image(type="pil"),
|
189 |
-
outputs=gr.outputs.Image(type="pil", label="annotated image"),
|
190 |
title=title,
|
191 |
description=description,
|
192 |
article=article,
|
193 |
examples=examples,
|
194 |
css=css,
|
195 |
-
analytics_enabled = True, enable_queue=True
|
196 |
-
|
197 |
-
iface.launch(inline=False, share=False, debug=False)
|
198 |
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""DocAI_DeploymentGradio.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colaboratory.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1USSEj7nHh2n2hUhTJTC0Iwhj6mSR7-mD
|
8 |
+
"""
|
9 |
+
|
10 |
import os
|
11 |
os.system('pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu')
|
12 |
|
13 |
+
os.system('pip install pyyaml==5.1')
|
14 |
+
|
15 |
+
os.system('pip install -q git+https://github.com/huggingface/transformers.git')
|
16 |
+
|
17 |
+
os.system('pip install -q datasets seqeval')
|
18 |
+
|
19 |
+
os.system('pip install torch==1.8.0+cu101 torchvision==0.9.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html')
|
20 |
+
os.system('pip install -q detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html')
|
21 |
+
os.system('pip install -q pytesseract')
|
22 |
+
|
23 |
+
!pip install gradio
|
24 |
+
|
25 |
+
!pip install -q git+https://github.com/huggingface/transformers.git
|
26 |
+
|
27 |
+
!pip install h5py
|
28 |
+
|
29 |
+
!pip install -q datasets seqeval
|
30 |
+
|
31 |
import gradio as gr
|
32 |
|
33 |
import numpy as np
|
|
|
43 |
from datasets import load_dataset # this dataset uses the new Image feature :)
|
44 |
|
45 |
from transformers import LayoutLMv3ForTokenClassification
|
|
|
46 |
from transformers import AutoModelForTokenClassification
|
47 |
|
48 |
+
#import cv2
|
49 |
from PIL import Image, ImageDraw, ImageFont
|
50 |
|
51 |
+
dataset = load_dataset("nielsr/funsd-layoutlmv3")
|
|
|
|
|
|
|
|
|
52 |
|
53 |
+
example = dataset["test"][0]
|
|
|
|
|
54 |
|
55 |
+
#image_path = "/root/.cache/huggingface/datasets/nielsr___funsd-layoutlmv3/funsd/1.0.0/0e3f4efdfd59aa1c3b4952c517894f7b1fc4d75c12ef01bcc8626a69e41c1bb9/funsd-layoutlmv3-test.arrow"
|
56 |
+
|
57 |
+
image_path = '/root/.cache/huggingface/datasets/nielsr___funsd-layoutlmv3/funsd/1.0.0/0e3f4efdfd59aa1c3b4952c517894f7b1fc4d75c12ef01bcc8626a69e41c1bb9'
|
58 |
|
59 |
example = dataset["test"][0]
|
60 |
+
example["image"].save("example1.png")
|
61 |
+
|
62 |
+
example1 = dataset["test"][1]
|
63 |
+
example1["image"].save("example2.png")
|
64 |
+
|
65 |
+
example2 = dataset["test"][2]
|
66 |
+
example2["image"].save("example3.png")
|
67 |
+
|
68 |
+
example2["image"]
|
69 |
+
|
70 |
+
#Image.open(dataset[2][image_path]).convert("RGB").save("example1.png")
|
71 |
+
#Image.open(dataset[1]["image_path"]).convert("RGB").save("example2.png")
|
72 |
+
#Image.open(dataset[0]["image_path"]).convert("RGB").save("example3.png")
|
73 |
|
74 |
words, boxes, ner_tags = example["tokens"], example["bboxes"], example["ner_tags"]
|
75 |
|
|
|
102 |
|
103 |
label2color = {'question':'blue', 'answer':'green', 'header':'orange', 'other':'violet'}
|
104 |
|
|
|
105 |
def prepare_examples(examples):
|
106 |
images = examples[image_column_name]
|
107 |
words = examples[text_column_name]
|
|
|
114 |
return encoding
|
115 |
|
116 |
processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
|
|
117 |
|
118 |
model = LayoutLMv3ForTokenClassification.from_pretrained("microsoft/layoutlmv3-base",
|
119 |
id2label=id2label,
|
|
|
128 |
'labels': Sequence(feature=Value(dtype='int64')),
|
129 |
})
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
eval_dataset = dataset["test"].map(
|
132 |
prepare_examples,
|
133 |
batched=True,
|
|
|
148 |
print(type(image))
|
149 |
width, height = image.size
|
150 |
|
|
|
|
|
|
|
|
|
151 |
image = example["image"]
|
152 |
words = example["tokens"]
|
153 |
boxes = example["bboxes"]
|
154 |
word_labels = example["ner_tags"]
|
155 |
|
|
|
|
|
|
|
156 |
for k,v in encoding.items():
|
157 |
print(k,v.shape)
|
158 |
|
159 |
+
# encode
|
160 |
+
#encoding = processor(image, truncation=True, return_offsets_mapping=True, return_tensors="pt")
|
161 |
+
#offset_mapping = encoding.pop('offset_mapping')
|
162 |
+
|
163 |
+
#encoding = processor(image, words, truncation=True,boxes=boxes, word_labels=word_labels,return_offsets_mapping=True, return_tensors="pt")
|
164 |
+
#offset_mapping = encoding.pop('offset_mapping')
|
165 |
+
|
166 |
+
encoding = processor(image, truncation=True,boxes=boxes, word_labels=word_labels,return_offsets_mapping=True, return_tensors="pt")
|
167 |
+
offset_mapping = encoding.pop('offset_mapping')
|
168 |
+
|
169 |
+
|
170 |
+
|
171 |
# forward pass
|
172 |
with torch.no_grad():
|
173 |
outputs = model(**encoding)
|
|
|
185 |
token_boxes = encoding.bbox.squeeze().tolist()
|
186 |
width, height = image.size
|
187 |
|
188 |
+
#true_predictions = [model.config.id2label[pred] for pred, label in zip(predictions, labels) if label != - 100]
|
189 |
+
#true_labels = [model.config.id2label[label] for prediction, label in zip(predictions, labels) if label != -100]
|
190 |
+
#true_boxes = [unnormalize_box(box, width, height) for box, label in zip(token_boxes, labels) if label != -100]
|
191 |
|
192 |
|
193 |
# only keep non-subword predictions
|
|
|
205 |
|
206 |
return image
|
207 |
|
208 |
+
title = "DocumentAI - Extraction of Key Information using LayoutLMv3 model"
|
209 |
description = "Extraction of Form or Invoice Extraction - We use Microsoft's LayoutLMv3 trained on Invoice Dataset to predict the Biller Name, Biller Address, Biller post_code, Due_date, GST, Invoice_date, Invoice_number, Subtotal and Total. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."
|
210 |
|
211 |
article="<b>References</b><br>[1] Y. Xu et al., “LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking.” 2022. <a href='https://arxiv.org/abs/2204.08387'>Paper Link</a><br>[2] <a href='https://github.com/NielsRogge/Transformers-Tutorials/tree/master/LayoutLMv3'>LayoutLMv3 training and inference</a>"
|
|
|
216 |
|
217 |
iface = gr.Interface(fn=process_image,
|
218 |
inputs=gr.inputs.Image(type="pil"),
|
219 |
+
outputs=gr.outputs.Image(type="pil", label="annotated predict image"),
|
220 |
title=title,
|
221 |
description=description,
|
222 |
article=article,
|
223 |
examples=examples,
|
224 |
css=css,
|
225 |
+
analytics_enabled = True, enable_queue=True
|
226 |
+
)
|
|
|
227 |
|
228 |
+
iface.launch(inline=False, share=False, debug=False)
|