prithivMLmods commited on
Commit
d9b779f
·
verified ·
1 Parent(s): e65ca6b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +295 -1
README.md CHANGED
@@ -40,4 +40,298 @@ tags:
40
 
41
  !pip install gradio spaces transformers accelerate numpy requests torch torchvision qwen-vl-utils av ipython reportlab fpdf python-docx pillow huggingface_hub
42
 
43
- *ChemQwen With Inference Documentation, **Before using, make sure that the `hf_token` is provided in the login field in the code below.***
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  !pip install gradio spaces transformers accelerate numpy requests torch torchvision qwen-vl-utils av ipython reportlab fpdf python-docx pillow huggingface_hub
42
 
43
+ *ChemQwen With Inference Documentation, **Before using, make sure that the `hf_token` is provided in the login field in the code below.***
44
+
45
+ ```python
46
+
47
+ # Authenticate with Hugging Face
48
+ from huggingface_hub import login
49
+
50
+ # Log in to Hugging Face using the provided token
51
+ hf_token = '----xxxxx----'
52
+ login(hf_token)
53
+
54
+ # Demo
55
+ import gradio as gr
56
+ import spaces
57
+ from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer
58
+ from qwen_vl_utils import process_vision_info
59
+ import torch
60
+ from PIL import Image
61
+ import os
62
+ import uuid
63
+ import io
64
+ from threading import Thread
65
+ from reportlab.lib.pagesizes import A4
66
+ from reportlab.lib.styles import getSampleStyleSheet
67
+ from reportlab.lib import colors
68
+ from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Paragraph, Spacer
69
+ from reportlab.pdfbase import pdfmetrics
70
+ from reportlab.pdfbase.ttfonts import TTFont
71
+ import docx
72
+ from docx.enum.text import WD_ALIGN_PARAGRAPH
73
+
74
+ # Define model options
75
+ MODEL_OPTIONS = {
76
+ "ChemQwen": "prithivMLmods/ChemQwen-vL",
77
+ }
78
+
79
+ # Preload models and processors into CUDA
80
+ models = {}
81
+ processors = {}
82
+ for name, model_id in MODEL_OPTIONS.items():
83
+ print(f"Loading {name}...")
84
+ models[name] = Qwen2VLForConditionalGeneration.from_pretrained(
85
+ model_id,
86
+ trust_remote_code=True,
87
+ torch_dtype=torch.float16
88
+ ).to("cuda").eval()
89
+ processors[name] = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
90
+
91
+ image_extensions = Image.registered_extensions()
92
+
93
+ def identify_and_save_blob(blob_path):
94
+ """Identifies if the blob is an image and saves it."""
95
+ try:
96
+ with open(blob_path, 'rb') as file:
97
+ blob_content = file.read()
98
+ try:
99
+ Image.open(io.BytesIO(blob_content)).verify() # Check if it's a valid image
100
+ extension = ".png" # Default to PNG for saving
101
+ media_type = "image"
102
+ except (IOError, SyntaxError):
103
+ raise ValueError("Unsupported media type. Please upload a valid image.")
104
+
105
+ filename = f"temp_{uuid.uuid4()}_media{extension}"
106
+ with open(filename, "wb") as f:
107
+ f.write(blob_content)
108
+
109
+ return filename, media_type
110
+
111
+ except FileNotFoundError:
112
+ raise ValueError(f"The file {blob_path} was not found.")
113
+ except Exception as e:
114
+ raise ValueError(f"An error occurred while processing the file: {e}")
115
+
116
+ @spaces.GPU
117
+ def qwen_inference(model_name, media_input, text_input=None):
118
+ """Handles inference for the selected model."""
119
+ model = models[model_name]
120
+ processor = processors[model_name]
121
+
122
+ if isinstance(media_input, str):
123
+ media_path = media_input
124
+ if media_path.endswith(tuple([i for i in image_extensions.keys()])):
125
+ media_type = "image"
126
+ else:
127
+ try:
128
+ media_path, media_type = identify_and_save_blob(media_input)
129
+ except Exception as e:
130
+ raise ValueError("Unsupported media type. Please upload a valid image.")
131
+
132
+ messages = [
133
+ {
134
+ "role": "user",
135
+ "content": [
136
+ {
137
+ "type": media_type,
138
+ media_type: media_path
139
+ },
140
+ {"type": "text", "text": text_input},
141
+ ],
142
+ }
143
+ ]
144
+
145
+ text = processor.apply_chat_template(
146
+ messages, tokenize=False, add_generation_prompt=True
147
+ )
148
+ image_inputs, _ = process_vision_info(messages)
149
+ inputs = processor(
150
+ text=[text],
151
+ images=image_inputs,
152
+ padding=True,
153
+ return_tensors="pt",
154
+ ).to("cuda")
155
+
156
+ streamer = TextIteratorStreamer(
157
+ processor.tokenizer, skip_prompt=True, skip_special_tokens=True
158
+ )
159
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
160
+
161
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
162
+ thread.start()
163
+
164
+ buffer = ""
165
+ for new_text in streamer:
166
+ buffer += new_text
167
+ # Remove <|im_end|> or similar tokens from the output
168
+ buffer = buffer.replace("<|im_end|>", "")
169
+ yield buffer
170
+
171
+ def format_plain_text(output_text):
172
+ """Formats the output text as plain text without LaTeX delimiters."""
173
+ # Remove LaTeX delimiters and convert to plain text
174
+ plain_text = output_text.replace("\\(", "").replace("\\)", "").replace("\\[", "").replace("\\]", "")
175
+ return plain_text
176
+
177
+ def generate_document(media_path, output_text, file_format, font_size, line_spacing, alignment, image_size):
178
+ """Generates a document with the input image and plain text output."""
179
+ plain_text = format_plain_text(output_text)
180
+ if file_format == "pdf":
181
+ return generate_pdf(media_path, plain_text, font_size, line_spacing, alignment, image_size)
182
+ elif file_format == "docx":
183
+ return generate_docx(media_path, plain_text, font_size, line_spacing, alignment, image_size)
184
+
185
+ def generate_pdf(media_path, plain_text, font_size, line_spacing, alignment, image_size):
186
+ """Generates a PDF document."""
187
+ filename = f"output_{uuid.uuid4()}.pdf"
188
+ doc = SimpleDocTemplate(
189
+ filename,
190
+ pagesize=A4,
191
+ rightMargin=inch,
192
+ leftMargin=inch,
193
+ topMargin=inch,
194
+ bottomMargin=inch
195
+ )
196
+ styles = getSampleStyleSheet()
197
+ styles["Normal"].fontSize = int(font_size)
198
+ styles["Normal"].leading = int(font_size) * line_spacing
199
+ styles["Normal"].alignment = {
200
+ "Left": 0,
201
+ "Center": 1,
202
+ "Right": 2,
203
+ "Justified": 4
204
+ }[alignment]
205
+
206
+ story = []
207
+
208
+ # Add image with size adjustment
209
+ image_sizes = {
210
+ "Small": (200, 200),
211
+ "Medium": (400, 400),
212
+ "Large": (600, 600)
213
+ }
214
+ img = RLImage(media_path, width=image_sizes[image_size][0], height=image_sizes[image_size][1])
215
+ story.append(img)
216
+ story.append(Spacer(1, 12))
217
+
218
+ # Add plain text output
219
+ text = Paragraph(plain_text, styles["Normal"])
220
+ story.append(text)
221
+
222
+ doc.build(story)
223
+ return filename
224
+
225
+ def generate_docx(media_path, plain_text, font_size, line_spacing, alignment, image_size):
226
+ """Generates a DOCX document."""
227
+ filename = f"output_{uuid.uuid4()}.docx"
228
+ doc = docx.Document()
229
+
230
+ # Add image with size adjustment
231
+ image_sizes = {
232
+ "Small": docx.shared.Inches(2),
233
+ "Medium": docx.shared.Inches(4),
234
+ "Large": docx.shared.Inches(6)
235
+ }
236
+ doc.add_picture(media_path, width=image_sizes[image_size])
237
+ doc.add_paragraph()
238
+
239
+ # Add plain text output
240
+ paragraph = doc.add_paragraph()
241
+ paragraph.paragraph_format.line_spacing = line_spacing
242
+ paragraph.paragraph_format.alignment = {
243
+ "Left": WD_ALIGN_PARAGRAPH.LEFT,
244
+ "Center": WD_ALIGN_PARAGRAPH.CENTER,
245
+ "Right": WD_ALIGN_PARAGRAPH.RIGHT,
246
+ "Justified": WD_ALIGN_PARAGRAPH.JUSTIFY
247
+ }[alignment]
248
+ run = paragraph.add_run(plain_text)
249
+ run.font.size = docx.shared.Pt(int(font_size))
250
+
251
+ doc.save(filename)
252
+ return filename
253
+
254
+ # CSS for output styling
255
+ css = """
256
+ #output {
257
+ height: 500px;
258
+ overflow: auto;
259
+ border: 1px solid #ccc;
260
+ }
261
+ .submit-btn {
262
+ background-color: #cf3434 !important;
263
+ color: white !important;
264
+ }
265
+ .submit-btn:hover {
266
+ background-color: #ff2323 !important;
267
+ }
268
+ .download-btn {
269
+ background-color: #35a6d6 !important;
270
+ color: white !important;
271
+ }
272
+ .download-btn:hover {
273
+ background-color: #22bcff !important;
274
+ }
275
+ """
276
+
277
+ # Gradio app setup
278
+ with gr.Blocks(css=css) as demo:
279
+ gr.Markdown("# ChemQwen Chemical Identifier")
280
+
281
+ with gr.Tab(label="Image Input"):
282
+
283
+ with gr.Row():
284
+ with gr.Column():
285
+ model_choice = gr.Dropdown(
286
+ label="Model Selection",
287
+ choices=list(MODEL_OPTIONS.keys()),
288
+ value="ChemQwen"
289
+ )
290
+ input_media = gr.File(
291
+ label="Upload Image", type="filepath"
292
+ )
293
+ text_input = gr.Textbox(label="Question", placeholder="Ask a question about the image...")
294
+ submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
295
+
296
+ with gr.Column():
297
+ output_text = gr.Textbox(label="Output Text", lines=10)
298
+ plain_text_output = gr.Textbox(label="Standardized Plain Text", lines=10)
299
+
300
+ submit_btn.click(
301
+ qwen_inference, [model_choice, input_media, text_input], [output_text]
302
+ ).then(
303
+ lambda output_text: format_plain_text(output_text), [output_text], [plain_text_output]
304
+ )
305
+
306
+ # Add examples directly usable by clicking
307
+ with gr.Row():
308
+ with gr.Column():
309
+ line_spacing = gr.Dropdown(
310
+ choices=[0.5, 1.0, 1.15, 1.5, 2.0, 2.5, 3.0],
311
+ value=1.5,
312
+ label="Line Spacing"
313
+ )
314
+ font_size = gr.Dropdown(
315
+ choices=["8", "10", "12", "14", "16", "18", "20", "22", "24"],
316
+ value="18",
317
+ label="Font Size"
318
+ )
319
+ alignment = gr.Dropdown(
320
+ choices=["Left", "Center", "Right", "Justified"],
321
+ value="Justified",
322
+ label="Text Alignment"
323
+ )
324
+ image_size = gr.Dropdown(
325
+ choices=["Small", "Medium", "Large"],
326
+ value="Small",
327
+ label="Image Size"
328
+ )
329
+ file_format = gr.Radio(["pdf", "docx"], label="File Format", value="pdf")
330
+ get_document_btn = gr.Button(value="Get Document", elem_classes="download-btn")
331
+
332
+ get_document_btn.click(
333
+ generate_document, [input_media, output_text, file_format, font_size, line_spacing, alignment, image_size], gr.File(label="Download Document")
334
+ )
335
+
336
+ demo.launch(debug=True)
337
+ ```