QVQ-72B-preview / app.py
cherrytest
upload
1625ded
raw
history blame
11.3 kB
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import gradio as gr
import modelscope_studio.components.antd as antd
import modelscope_studio.components.base as ms
from PIL import Image
import secrets
import tempfile
from http import HTTPStatus
from urllib3.exceptions import HTTPError
from pathlib import Path
os.environ['DASHSCOPE_HTTP_BASE_URL'] = 'https://dashscope.aliyuncs.com/api/v1'
# os.environ['DASHSCOPE_WEBSOCKET_BASE_URL'] = 'https://poc-dashscope.aliyuncs.com/api-ws/v1/inference'
import dashscope
from dashscope import MultiModalConversation
API_KEY = os.environ['API_KEY']
dashscope.api_key = API_KEY
is_modelscope_studio = os.getenv('MODELSCOPE_ENVIRONMENT') == 'studio'
def get_text(text: str, cn_text: str):
if is_modelscope_studio:
return cn_text
return text
def resolve_image(filename):
return os.path.join(os.path.dirname(__file__), filename)
DEMO_LIST = [
{
"description": "Evaluate the integral of the functions graphed using the formula for circles: ",
"image": resolve_image("./examples/1.webp")
},
{
"description": "回答图中问题",
"image": resolve_image("./examples/2.png")
},
{
"description": "图片中的滤液E是什么化学物质?",
"image": resolve_image("./examples/3.png")
},
{
"description": "I want to know the volume of this sofa",
"image": resolve_image("./examples/4.png")
},
]
def process_image(image, shouldConvert=False):
# 获取上传文件的目录
uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str(
Path(tempfile.gettempdir()) / "gradio")
os.makedirs(uploaded_file_dir, exist_ok=True)
# 创建临时文件路径
name = f"tmp{secrets.token_hex(20)}.jpg"
filename = os.path.join(uploaded_file_dir, name)
# 保存上传的图片
if shouldConvert:
new_img = Image.new('RGB',
size=(image.width, image.height),
color=(255, 255, 255))
new_img.paste(image, (0, 0), mask=image)
image = new_img
image.save(filename)
return filename
def generate(image, query):
imageFile = process_image(image)
content = [
{'image': f'file://{imageFile}'},
{'text': query}
]
messages = [
{'role': 'user', 'content': content},
]
print('messages:', messages)
responses = MultiModalConversation.call(
model='qvq-72b-preview', messages=messages, stream=True,
)
for response in responses:
if not response.status_code == HTTPStatus.OK:
raise HTTPError(f'response.code: {response.code}\nresponse.message: {response.message}')
response = response.output.choices[0].message.content
if len(response) > 0 and response[0]['text']:
print(response[0]['text'])
yield response[0]['text']
if __name__ == "__main__":
def on_clear():
return {
input: gr.update(value=None),
**{
item: gr.update(value=None)
for item in input_image
},
}
with gr.Blocks() as demo:
with ms.Application() as app:
with antd.ConfigProvider(
locale="zh_CN" if is_modelscope_studio else None,
theme=dict(token=dict(colorPrimary="#a855f7"))):
with antd.Card(elem_style=dict(marginBottom=12),
styles=dict(body=dict(padding=4))):
with antd.Flex(elem_style=dict(width="100%"),
justify="center",
align="center",
gap=14):
with ms.Div(elem_style=dict(flexShrink=0)):
antd.Image(
resolve_image("./cutelogo.jpg"),
preview=False,
height=60)
with ms.Div():
antd.Typography.Title(
"QVQ-72B-Preview",
elem_style=dict(margin=0, fontSize=24),
level=1)
with ms.AutoLoading():
with antd.Row(gutter=[8, 8], align="stretch"):
with antd.Col(xs=24, md=8):
with antd.Space(direction="vertical",
elem_style=dict(width="100%")):
with antd.Space(direction="vertical",
elem_style=dict(width="100%"),
elem_id="input-container"):
with ms.Fragment():
input_image = gr.Image(
type="pil",
label="Upload",
sources=["upload"]),
input = antd.Input.Textarea(
placeholder=get_text("Ask a question", "输入一个问题"),
auto_size=dict(maxRows=6, minRows=2),
allow_clear=True)
with antd.Flex(align="center",
justify="space-between"):
antd.Typography.Text(
get_text("Warning: This model only supports single-turn dialogue.", "注:当前模型只支持单轮对话,如需中文回答,提示词加“用中文回答”"), type="warning")
tour_btn = antd.Button(get_text("Tour", "使用指引"),
variant="filled",
color="default")
with antd.Row(gutter=8):
with antd.Col(span=12):
clear_btn = antd.Button(get_text("Clear", "清除"),
block=True)
with antd.Col(span=12):
submit_btn = antd.Button(
get_text("Submit", "提交"),
type="primary",
block=True,
elem_id="submit-btn")
antd.Divider(get_text("Example", "示例"))
with antd.Flex(gap="small", wrap=True):
for item in DEMO_LIST:
def bind_on_example(_item):
def on_example():
return gr.update(
value=_item[
'description']
), gr.update(
value=_item['image'])
return on_example
with antd.Card(
hoverable=True,
elem_style=dict(
width="100%")) as example:
if "description" in item:
antd.Typography.Text(
item["description"])
if "image" in item:
antd.Image(item["image"],
preview=False)
example.click(
fn=bind_on_example(item),
outputs=[input, input_image[0]])
with antd.Col(xs=24, md=16):
with antd.Card(title=get_text("Answer", "答案"),
elem_style=dict(height="100%"),
elem_id="output-container"):
output = gr.Markdown(
show_copy_button=True,
latex_delimiters=[{
"left": '$$',
"right": '$$',
"display": True
}, {
"left": '$',
"right": '$',
"display": False,
}, {
"left": '\\(',
"right": '\\)',
"display": False,
}, {
"left": '\\[',
"right": '\\]',
"display": True
}])
with antd.Tour(props=dict(open=False)) as tour:
antd.Tour.Step(
title=get_text("Step 1", "步骤 1"),
description=get_text("Upload image and enter text", "传入图片和文本"),
get_target=
"() => document.querySelector('#input-container')")
antd.Tour.Step(
title=get_text("Step 2","步骤 2"),
description=get_text("Click submit button", "点击提交按钮"),
get_target=
"() => document.querySelector('#submit-btn')")
antd.Tour.Step(
title=get_text("Step 3","步骤 3"),
description=get_text("Wait for result", "等待结果返回"),
get_target=
"() => document.querySelector('#output-container')"
)
tour_btn.click(fn=lambda: gr.update(props=dict(open=True)),
outputs=[tour])
gr.on([tour.finish, tour.close],
fn=lambda: gr.update(props=dict(open=False)),
outputs=[tour])
submit_btn.click(
fn=generate,
inputs=[*input_image, input],
outputs=[output])
clear_btn.click(
fn=on_clear,
outputs=[*input_image, input])
demo.queue(default_concurrency_limit=50).launch()