Goodsea mertcobanov commited on
Commit
7c2c0f7
·
0 Parent(s):

Duplicate from deprem-ml/deprem-ocr

Browse files

Co-authored-by: Mert Cobanov <[email protected]>

Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +152 -0
  4. requirements.txt +5 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Deprem Ocr 2
3
+ emoji: 👀
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.17.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: deprem-ml/deprem-ocr
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from easyocr import Reader
3
+ from PIL import Image
4
+ import io
5
+ import json
6
+ import csv
7
+ import openai
8
+ import ast
9
+ import os
10
+ from deta import Deta
11
+
12
+
13
+ openai.api_key = os.getenv('API_KEY')
14
+ reader = Reader(["tr"])
15
+
16
+
17
+ def get_parsed_address(input_img):
18
+
19
+ address_full_text = get_text(input_img)
20
+ return openai_response(address_full_text)
21
+
22
+
23
+ def preprocess_img(inp_image):
24
+ gray = cv2.cvtColor(inp_image, cv2.COLOR_BGR2GRAY)
25
+ gray_img = cv2.bitwise_not(gray)
26
+ return gray_img
27
+
28
+
29
+ def get_text(input_img):
30
+ result = reader.readtext(input_img, detail=0)
31
+ return " ".join(result)
32
+
33
+
34
+ def save_csv(mahalle, il, sokak, apartman):
35
+ adres_full = [mahalle, il, sokak, apartman]
36
+
37
+ with open("adress_book.csv", "a", encoding="utf-8") as f:
38
+ write = csv.writer(f)
39
+ write.writerow(adres_full)
40
+ return adres_full
41
+
42
+
43
+ def get_json(mahalle, il, sokak, apartman):
44
+ adres = {"mahalle": mahalle, "il": il, "sokak": sokak, "apartman": apartman}
45
+ dump = json.dumps(adres, indent=4, ensure_ascii=False)
46
+ return dump
47
+
48
+ def write_db(data_dict):
49
+ # 2) initialize with a project key
50
+ deta_key = os.getenv('DETA_KEY')
51
+ deta = Deta(deta_key)
52
+
53
+ # 3) create and use as many DBs as you want!
54
+ users = deta.Base("deprem-ocr")
55
+ users.insert(data_dict)
56
+
57
+
58
+ def text_dict(input):
59
+ eval_result = ast.literal_eval(input)
60
+ write_db(eval_result)
61
+
62
+ return (
63
+ str(eval_result['city']),
64
+ str(eval_result['distinct']),
65
+ str(eval_result['neighbourhood']),
66
+ str(eval_result['street']),
67
+ str(eval_result['address']),
68
+ str(eval_result['tel']),
69
+ str(eval_result['name_surname']),
70
+ str(eval_result['no']),
71
+ )
72
+
73
+ def openai_response(ocr_input):
74
+ prompt = f"""Tabular Data Extraction You are a highly intelligent and accurate tabular data extractor from
75
+ plain text input and especially from emergency text that carries address information, your inputs can be text
76
+ of arbitrary size, but the output should be in [{{'tabular': {{'entity_type': 'entity'}} }}] JSON format Force it
77
+ to only extract keys that are shared as an example in the examples section, if a key value is not found in the
78
+ text input, then it should be ignored. Have only city, distinct, neighbourhood,
79
+ street, no, tel, name_surname, address Examples: Input: Deprem sırasında evimizde yer alan adresimiz: İstanbul,
80
+ Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35, cep telefonu numaram 5551231256, adim Ahmet Yilmaz
81
+ Output: {{'city': 'İstanbul', 'distinct': 'Beşiktaş', 'neighbourhood': 'Yıldız Mahallesi', 'street': 'Cumhuriyet Caddesi', 'no': '35', 'tel': '5551231256', 'name_surname': 'Ahmet Yılmaz', 'address': 'İstanbul, Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35'}}
82
+ Input: {ocr_input}
83
+ Output:
84
+ """
85
+
86
+ response = openai.Completion.create(
87
+ model="text-davinci-003",
88
+ prompt=prompt,
89
+ temperature=0,
90
+ max_tokens=300,
91
+ top_p=1,
92
+ frequency_penalty=0.0,
93
+ presence_penalty=0.0,
94
+ stop=["\n"],
95
+ )
96
+ resp = response["choices"][0]["text"]
97
+ print(resp)
98
+ resp = eval(resp.replace("'{", "{").replace("}'", "}"))
99
+ resp["input"] = ocr_input
100
+ dict_keys = [
101
+ 'city',
102
+ 'distinct',
103
+ 'neighbourhood',
104
+ 'street',
105
+ 'no',
106
+ 'tel',
107
+ 'name_surname',
108
+ 'address',
109
+ 'input',
110
+ ]
111
+ for key in dict_keys:
112
+ if key not in resp.keys():
113
+ resp[key] = ''
114
+ return resp
115
+
116
+
117
+ with gr.Blocks() as demo:
118
+ gr.Markdown(
119
+ """
120
+ # Enkaz Bildirme Uygulaması
121
+ """)
122
+ gr.Markdown("Bu uygulamada ekran görüntüsü sürükleyip bırakarak AFAD'a enkaz bildirimi yapabilirsiniz. Mesajı metin olarak da girebilirsiniz, tam adresi ayrıştırıp döndürür. API olarak kullanmak isterseniz sayfanın en altında use via api'ya tıklayın.")
123
+ with gr.Row():
124
+ img_area = gr.Image(label="Ekran Görüntüsü yükleyin 👇")
125
+ ocr_result = gr.Textbox(label="Metin yükleyin 👇 ")
126
+ open_api_text = gr.Textbox(label="Tam Adres")
127
+ submit_button = gr.Button(label="Yükle")
128
+ with gr.Column():
129
+ with gr.Row():
130
+ city = gr.Textbox(label="İl")
131
+ distinct = gr.Textbox(label="İlçe")
132
+ with gr.Row():
133
+ neighbourhood = gr.Textbox(label="Mahalle")
134
+ street = gr.Textbox(label="Sokak/Cadde/Bulvar")
135
+ with gr.Row():
136
+ tel = gr.Textbox(label="Telefon")
137
+ with gr.Row():
138
+ name_surname = gr.Textbox(label="İsim Soyisim")
139
+ address = gr.Textbox(label="Adres")
140
+ with gr.Row():
141
+ no = gr.Textbox(label="Kapı No")
142
+
143
+
144
+ submit_button.click(get_parsed_address, inputs = img_area, outputs = open_api_text, api_name="upload_image")
145
+
146
+ ocr_result.change(openai_response, ocr_result, open_api_text, api_name="upload-text")
147
+
148
+ open_api_text.change(text_dict, open_api_text, [city, distinct, neighbourhood, street, address, tel, name_surname, no])
149
+
150
+
151
+ if __name__ == "__main__":
152
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openai
2
+ Pillow
3
+ easyocr
4
+ gradio
5
+ deta