Spaces:
Running
Running
# Copyright (c) Alibaba, Inc. and its affiliates. | |
import os | |
import gradio as gr | |
import modelscope_studio.components.antd as antd | |
import modelscope_studio.components.base as ms | |
from PIL import Image | |
import secrets | |
import tempfile | |
from http import HTTPStatus | |
from urllib3.exceptions import HTTPError | |
from pathlib import Path | |
os.environ['DASHSCOPE_HTTP_BASE_URL'] = 'https://dashscope.aliyuncs.com/api/v1' | |
# os.environ['DASHSCOPE_WEBSOCKET_BASE_URL'] = 'https://poc-dashscope.aliyuncs.com/api-ws/v1/inference' | |
import dashscope | |
from dashscope import MultiModalConversation | |
API_KEY = os.environ['API_KEY'] | |
dashscope.api_key = API_KEY | |
is_modelscope_studio = os.getenv('MODELSCOPE_ENVIRONMENT') == 'studio' | |
def get_text(text: str, cn_text: str): | |
if is_modelscope_studio: | |
return cn_text | |
return text | |
def resolve_image(filename): | |
return os.path.join(os.path.dirname(__file__), filename) | |
DEMO_LIST = [ | |
{ | |
"description": "Evaluate the integral of the functions graphed using the formula for circles: ", | |
"image": resolve_image("./examples/1.webp") | |
}, | |
{ | |
"description": "回答图中问题", | |
"image": resolve_image("./examples/2.png") | |
}, | |
{ | |
"description": "图片中的滤液E是什么化学物质?", | |
"image": resolve_image("./examples/3.png") | |
}, | |
{ | |
"description": "I want to know the volume of this sofa", | |
"image": resolve_image("./examples/4.png") | |
}, | |
] | |
def process_image(image, shouldConvert=False): | |
# 获取上传文件的目录 | |
uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str( | |
Path(tempfile.gettempdir()) / "gradio") | |
os.makedirs(uploaded_file_dir, exist_ok=True) | |
# 创建临时文件路径 | |
name = f"tmp{secrets.token_hex(20)}.jpg" | |
filename = os.path.join(uploaded_file_dir, name) | |
# 保存上传的图片 | |
if shouldConvert: | |
new_img = Image.new('RGB', | |
size=(image.width, image.height), | |
color=(255, 255, 255)) | |
new_img.paste(image, (0, 0), mask=image) | |
image = new_img | |
image.save(filename) | |
return filename | |
def generate(image, query): | |
imageFile = process_image(image) | |
content = [ | |
{'image': f'file://{imageFile}'}, | |
{'text': query} | |
] | |
messages = [ | |
{'role': 'user', 'content': content}, | |
] | |
print('messages:', messages) | |
responses = MultiModalConversation.call( | |
model='qvq-72b-preview', messages=messages, stream=True, | |
) | |
for response in responses: | |
if not response.status_code == HTTPStatus.OK: | |
raise HTTPError(f'response.code: {response.code}\nresponse.message: {response.message}') | |
response = response.output.choices[0].message.content | |
if len(response) > 0 and response[0]['text']: | |
print(response[0]['text']) | |
yield response[0]['text'] | |
if __name__ == "__main__": | |
def on_clear(): | |
return { | |
input: gr.update(value=None), | |
**{ | |
item: gr.update(value=None) | |
for item in input_image | |
}, | |
} | |
with gr.Blocks() as demo: | |
with ms.Application() as app: | |
with antd.ConfigProvider( | |
locale="zh_CN" if is_modelscope_studio else None, | |
theme=dict(token=dict(colorPrimary="#a855f7"))): | |
with antd.Card(elem_style=dict(marginBottom=12), | |
styles=dict(body=dict(padding=4))): | |
with antd.Flex(elem_style=dict(width="100%"), | |
justify="center", | |
align="center", | |
gap=14): | |
with ms.Div(elem_style=dict(flexShrink=0)): | |
antd.Image( | |
resolve_image("./cutelogo.jpg"), | |
preview=False, | |
height=60) | |
with ms.Div(): | |
antd.Typography.Title( | |
"QVQ-72B-Preview", | |
elem_style=dict(margin=0, fontSize=24), | |
level=1) | |
with ms.AutoLoading(): | |
with antd.Row(gutter=[8, 8], align="stretch"): | |
with antd.Col(xs=24, md=8): | |
with antd.Space(direction="vertical", | |
elem_style=dict(width="100%")): | |
with antd.Space(direction="vertical", | |
elem_style=dict(width="100%"), | |
elem_id="input-container"): | |
with ms.Fragment(): | |
input_image = gr.Image( | |
type="pil", | |
label="Upload", | |
sources=["upload"]), | |
input = antd.Input.Textarea( | |
placeholder=get_text("Ask a question", "输入一个问题"), | |
auto_size=dict(maxRows=6, minRows=2), | |
allow_clear=True) | |
with antd.Flex(align="center", | |
justify="space-between"): | |
antd.Typography.Text( | |
get_text("Warning: This model only supports single-turn dialogue.", "注:当前模型只支持单轮对话,如需中文回答,提示词加“用中文回答”"), type="warning") | |
tour_btn = antd.Button(get_text("Tour", "使用指引"), | |
variant="filled", | |
color="default") | |
with antd.Row(gutter=8): | |
with antd.Col(span=12): | |
clear_btn = antd.Button(get_text("Clear", "清除"), | |
block=True) | |
with antd.Col(span=12): | |
submit_btn = antd.Button( | |
get_text("Submit", "提交"), | |
type="primary", | |
block=True, | |
elem_id="submit-btn") | |
antd.Divider(get_text("Example", "示例")) | |
with antd.Flex(gap="small", wrap=True): | |
for item in DEMO_LIST: | |
def bind_on_example(_item): | |
def on_example(): | |
return gr.update( | |
value=_item[ | |
'description'] | |
), gr.update( | |
value=_item['image']) | |
return on_example | |
with antd.Card( | |
hoverable=True, | |
elem_style=dict( | |
width="100%")) as example: | |
if "description" in item: | |
antd.Typography.Text( | |
item["description"]) | |
if "image" in item: | |
antd.Image(item["image"], | |
preview=False) | |
example.click( | |
fn=bind_on_example(item), | |
outputs=[input, input_image[0]]) | |
with antd.Col(xs=24, md=16): | |
with antd.Card(title=get_text("Answer", "答案"), | |
elem_style=dict(height="100%"), | |
elem_id="output-container"): | |
output = gr.Markdown( | |
show_copy_button=True, | |
latex_delimiters=[{ | |
"left": '$$', | |
"right": '$$', | |
"display": True | |
}, { | |
"left": '$', | |
"right": '$', | |
"display": False, | |
}, { | |
"left": '\\(', | |
"right": '\\)', | |
"display": False, | |
}, { | |
"left": '\\[', | |
"right": '\\]', | |
"display": True | |
}]) | |
with antd.Tour(props=dict(open=False)) as tour: | |
antd.Tour.Step( | |
title=get_text("Step 1", "步骤 1"), | |
description=get_text("Upload image and enter text", "传入图片和文本"), | |
get_target= | |
"() => document.querySelector('#input-container')") | |
antd.Tour.Step( | |
title=get_text("Step 2","步骤 2"), | |
description=get_text("Click submit button", "点击提交按钮"), | |
get_target= | |
"() => document.querySelector('#submit-btn')") | |
antd.Tour.Step( | |
title=get_text("Step 3","步骤 3"), | |
description=get_text("Wait for result", "等待结果返回"), | |
get_target= | |
"() => document.querySelector('#output-container')" | |
) | |
tour_btn.click(fn=lambda: gr.update(props=dict(open=True)), | |
outputs=[tour]) | |
gr.on([tour.finish, tour.close], | |
fn=lambda: gr.update(props=dict(open=False)), | |
outputs=[tour]) | |
submit_btn.click( | |
fn=generate, | |
inputs=[*input_image, input], | |
outputs=[output]) | |
clear_btn.click( | |
fn=on_clear, | |
outputs=[*input_image, input]) | |
demo.queue(default_concurrency_limit=50).launch() | |