Spaces:

acverma
/

documentAI

Runtime error

App Files Files

acverma commited on Sep 10, 2022

Commit

4ee6673

1 Parent(s): 7a13891

Update Key_Information_Extraction

Browse files

Files changed (1) hide show

Key_Information_Extraction +66 -36

Key_Information_Extraction CHANGED Viewed

@@ -1,6 +1,33 @@
 import os
 os.system('pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu')
 import gradio as gr
 import numpy as np
@@ -16,27 +43,33 @@ from datasets import Features, Sequence, ClassLabel, Value, Array2D, Array3D
 from datasets import load_dataset # this dataset uses the new Image feature :)
 from transformers import LayoutLMv3ForTokenClassification
-from transformers.data.data_collator import default_data_collator
 from transformers import AutoModelForTokenClassification
-import cv2
 from PIL import Image, ImageDraw, ImageFont
-#setting up the Huggingface env
-# pip install -q git+https://github.com/huggingface/transformers.git
-# !pip install h5py
-# It's useful for evaluation metrics such as F1 on sequence labeling tasks
-# !pip install -q datasets seqeval
-# this dataset uses the new Image feature :)
-dataset = load_dataset("nielsr/funsd-layoutlmv3")
-#dataset = load_dataset("G:\\BITS - MTECH\\Sem -4\\Final Report\\code\dataset")
-Image.open(dataset[2]["image_path"]).convert("RGB").save("example1.png")
-Image.open(dataset[1]["image_path"]).convert("RGB").save("example2.png")
-Image.open(dataset[0]["image_path"]).convert("RGB").save("example3.png")
 example = dataset["test"][0]
 words, boxes, ner_tags = example["tokens"], example["bboxes"], example["ner_tags"]
@@ -69,7 +102,6 @@ num_labels = len(label_list)
 label2color = {'question':'blue', 'answer':'green', 'header':'orange', 'other':'violet'}
 def prepare_examples(examples):
   images = examples[image_column_name]
   words = examples[text_column_name]
@@ -82,7 +114,6 @@ def prepare_examples(examples):
   return encoding
 processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
-#model = AutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base")
 model = LayoutLMv3ForTokenClassification.from_pretrained("microsoft/layoutlmv3-base",
                                                          id2label=id2label,
@@ -97,12 +128,6 @@ features = Features({
     'labels': Sequence(feature=Value(dtype='int64')),
 })
-# train_dataset = dataset["train"].map(
-#     prepare_examples,
-#     batched=True,
-#     remove_columns=column_names,
-#     features=features,
-# )
 eval_dataset = dataset["test"].map(
     prepare_examples,
     batched=True,
@@ -123,21 +148,26 @@ def process_image(image):
     print(type(image))
     width, height = image.size
-    # encode
-    #encoding = processor(image, truncation=True, return_offsets_mapping=True, return_tensors="pt")
-    #offset_mapping = encoding.pop('offset_mapping')
     image = example["image"]
     words = example["tokens"]
     boxes = example["bboxes"]
     word_labels = example["ner_tags"]
-    encoding = processor(image, words, truncation=True,boxes=boxes, word_labels=word_labels,return_offsets_mapping=True, return_tensors="pt")
-    offset_mapping = encoding.pop('offset_mapping')
     for k,v in encoding.items():
         print(k,v.shape)
     # forward pass
     with torch.no_grad():
         outputs = model(**encoding)
@@ -155,9 +185,9 @@ def process_image(image):
     token_boxes = encoding.bbox.squeeze().tolist()
     width, height = image.size
-    true_predictions = [model.config.id2label[pred] for pred, label in zip(predictions, labels) if label != - 100]
-    true_labels = [model.config.id2label[label] for prediction, label in zip(predictions, labels) if label != -100]
-    true_boxes = [unnormalize_box(box, width, height) for box, label in zip(token_boxes, labels) if label != -100]
     # only keep non-subword predictions
@@ -175,7 +205,7 @@ def process_image(image):
     return image
-title = "DocumentAI - Extraction using LayoutLMv3 model"
 description = "Extraction of Form or Invoice Extraction - We use Microsoft's LayoutLMv3 trained on Invoice Dataset to predict the Biller Name, Biller Address, Biller post_code, Due_date, GST, Invoice_date, Invoice_number, Subtotal and Total. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."
 article="<b>References</b><br>[1] Y. Xu et al., “LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking.” 2022. <a href='https://arxiv.org/abs/2204.08387'>Paper Link</a><br>[2]  <a href='https://github.com/NielsRogge/Transformers-Tutorials/tree/master/LayoutLMv3'>LayoutLMv3 training and inference</a>"
@@ -186,13 +216,13 @@ css = """.output_image, .input_image {height: 600px !important}"""
 iface = gr.Interface(fn=process_image,
                      inputs=gr.inputs.Image(type="pil"),
-                     outputs=gr.outputs.Image(type="pil", label="annotated image"),
                      title=title,
                      description=description,
                      article=article,
                      examples=examples,
                      css=css,
-                     analytics_enabled = True, enable_queue=True)
-iface.launch(inline=False, share=False, debug=False)

+# -*- coding: utf-8 -*-
+"""DocAI_DeploymentGradio.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1USSEj7nHh2n2hUhTJTC0Iwhj6mSR7-mD
+"""
 import os
 os.system('pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu')
+os.system('pip install pyyaml==5.1')
+os.system('pip install -q git+https://github.com/huggingface/transformers.git')
+os.system('pip install -q datasets seqeval')
+os.system('pip install torch==1.8.0+cu101 torchvision==0.9.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html')
+os.system('pip install -q detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html')
+os.system('pip install -q pytesseract')
+!pip install gradio
+!pip install -q git+https://github.com/huggingface/transformers.git
+!pip install h5py
+!pip install -q datasets seqeval
 import gradio as gr
 import numpy as np
 from datasets import load_dataset # this dataset uses the new Image feature :)
 from transformers import LayoutLMv3ForTokenClassification
 from transformers import AutoModelForTokenClassification
+#import cv2
 from PIL import Image, ImageDraw, ImageFont
+dataset = load_dataset("nielsr/funsd-layoutlmv3")
+example = dataset["test"][0]
+#image_path = "/root/.cache/huggingface/datasets/nielsr___funsd-layoutlmv3/funsd/1.0.0/0e3f4efdfd59aa1c3b4952c517894f7b1fc4d75c12ef01bcc8626a69e41c1bb9/funsd-layoutlmv3-test.arrow"
+image_path = '/root/.cache/huggingface/datasets/nielsr___funsd-layoutlmv3/funsd/1.0.0/0e3f4efdfd59aa1c3b4952c517894f7b1fc4d75c12ef01bcc8626a69e41c1bb9'
 example = dataset["test"][0]
+example["image"].save("example1.png")
+example1 = dataset["test"][1]
+example1["image"].save("example2.png")
+example2 = dataset["test"][2]
+example2["image"].save("example3.png")
+example2["image"]
+#Image.open(dataset[2][image_path]).convert("RGB").save("example1.png")
+#Image.open(dataset[1]["image_path"]).convert("RGB").save("example2.png")
+#Image.open(dataset[0]["image_path"]).convert("RGB").save("example3.png")
 words, boxes, ner_tags = example["tokens"], example["bboxes"], example["ner_tags"]
 label2color = {'question':'blue', 'answer':'green', 'header':'orange', 'other':'violet'}
 def prepare_examples(examples):
   images = examples[image_column_name]
   words = examples[text_column_name]
   return encoding
 processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
 model = LayoutLMv3ForTokenClassification.from_pretrained("microsoft/layoutlmv3-base",
                                                          id2label=id2label,
     'labels': Sequence(feature=Value(dtype='int64')),
 })
 eval_dataset = dataset["test"].map(
     prepare_examples,
     batched=True,
     print(type(image))
     width, height = image.size
     image = example["image"]
     words = example["tokens"]
     boxes = example["bboxes"]
     word_labels = example["ner_tags"]
     for k,v in encoding.items():
         print(k,v.shape)
+    # encode
+    #encoding = processor(image, truncation=True, return_offsets_mapping=True, return_tensors="pt")
+    #offset_mapping = encoding.pop('offset_mapping')
+    #encoding = processor(image, words, truncation=True,boxes=boxes, word_labels=word_labels,return_offsets_mapping=True, return_tensors="pt")
+    #offset_mapping = encoding.pop('offset_mapping')
+    encoding = processor(image, truncation=True,boxes=boxes, word_labels=word_labels,return_offsets_mapping=True, return_tensors="pt")
+    offset_mapping = encoding.pop('offset_mapping')
     # forward pass
     with torch.no_grad():
         outputs = model(**encoding)
     token_boxes = encoding.bbox.squeeze().tolist()
     width, height = image.size
+    #true_predictions = [model.config.id2label[pred] for pred, label in zip(predictions, labels) if label != - 100]
+    #true_labels = [model.config.id2label[label] for prediction, label in zip(predictions, labels) if label != -100]
+    #true_boxes = [unnormalize_box(box, width, height) for box, label in zip(token_boxes, labels) if label != -100]
     # only keep non-subword predictions
     return image
+title = "DocumentAI - Extraction of Key Information using LayoutLMv3 model"
 description = "Extraction of Form or Invoice Extraction - We use Microsoft's LayoutLMv3 trained on Invoice Dataset to predict the Biller Name, Biller Address, Biller post_code, Due_date, GST, Invoice_date, Invoice_number, Subtotal and Total. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."
 article="<b>References</b><br>[1] Y. Xu et al., “LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking.” 2022. <a href='https://arxiv.org/abs/2204.08387'>Paper Link</a><br>[2]  <a href='https://github.com/NielsRogge/Transformers-Tutorials/tree/master/LayoutLMv3'>LayoutLMv3 training and inference</a>"
 iface = gr.Interface(fn=process_image,
                      inputs=gr.inputs.Image(type="pil"),
+                     outputs=gr.outputs.Image(type="pil", label="annotated predict image"),
                      title=title,
                      description=description,
                      article=article,
                      examples=examples,
                      css=css,
+                     analytics_enabled = True, enable_queue=True
+                     )
+iface.launch(inline=False, share=False, debug=False)