Spaces:

Qwen
/

QVQ-72B-preview

Running

cherrytest

upload

1625ded 13 days ago

11.3 kB

	# Copyright (c) Alibaba, Inc. and its affiliates.
	import os

	import gradio as gr
	import modelscope_studio.components.antd as antd
	import modelscope_studio.components.base as ms
	from PIL import Image
	import secrets
	import tempfile
	from http import HTTPStatus
	from urllib3.exceptions import HTTPError

	from pathlib import Path

	os.environ['DASHSCOPE_HTTP_BASE_URL'] = 'https://dashscope.aliyuncs.com/api/v1'
	# os.environ['DASHSCOPE_WEBSOCKET_BASE_URL'] = 'https://poc-dashscope.aliyuncs.com/api-ws/v1/inference'

	import dashscope
	from dashscope import MultiModalConversation
	API_KEY = os.environ['API_KEY']
	dashscope.api_key = API_KEY

	is_modelscope_studio = os.getenv('MODELSCOPE_ENVIRONMENT') == 'studio'

	def get_text(text: str, cn_text: str):
	if is_modelscope_studio:
	return cn_text
	return text

	def resolve_image(filename):
	return os.path.join(os.path.dirname(__file__), filename)

	DEMO_LIST = [
	{
	"description": "Evaluate the integral of the functions graphed using the formula for circles: ",
	"image": resolve_image("./examples/1.webp")
	},
	{
	"description": "回答图中问题",
	"image": resolve_image("./examples/2.png")
	},
	{
	"description": "图片中的滤液E是什么化学物质?",
	"image": resolve_image("./examples/3.png")
	},
	{
	"description": "I want to know the volume of this sofa",
	"image": resolve_image("./examples/4.png")
	},
	]

	def process_image(image, shouldConvert=False):
	# 获取上传文件的目录
	uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str(
	Path(tempfile.gettempdir()) / "gradio")
	os.makedirs(uploaded_file_dir, exist_ok=True)

	# 创建临时文件路径
	name = f"tmp{secrets.token_hex(20)}.jpg"
	filename = os.path.join(uploaded_file_dir, name)
	# 保存上传的图片
	if shouldConvert:
	new_img = Image.new('RGB',
	size=(image.width, image.height),
	color=(255, 255, 255))
	new_img.paste(image, (0, 0), mask=image)
	image = new_img
	image.save(filename)

	return filename



	def generate(image, query):
	imageFile = process_image(image)
	content = [
	{'image': f'file://{imageFile}'},
	{'text': query}
	]
	messages = [
	{'role': 'user', 'content': content},
	]
	print('messages:', messages)
	responses = MultiModalConversation.call(
	model='qvq-72b-preview', messages=messages, stream=True,
	)
	for response in responses:
	if not response.status_code == HTTPStatus.OK:
	raise HTTPError(f'response.code: {response.code}\nresponse.message: {response.message}')
	response = response.output.choices[0].message.content
	if len(response) > 0 and response[0]['text']:
	print(response[0]['text'])
	yield response[0]['text']


	if __name__ == "__main__":

	def on_clear():
	return {
	input: gr.update(value=None),
	**{
	item: gr.update(value=None)
	for item in input_image
	},
	}

	with gr.Blocks() as demo:
	with ms.Application() as app:
	with antd.ConfigProvider(
	locale="zh_CN" if is_modelscope_studio else None,
	theme=dict(token=dict(colorPrimary="#a855f7"))):
	with antd.Card(elem_style=dict(marginBottom=12),
	styles=dict(body=dict(padding=4))):
	with antd.Flex(elem_style=dict(width="100%"),
	justify="center",
	align="center",
	gap=14):
	with ms.Div(elem_style=dict(flexShrink=0)):
	antd.Image(
	resolve_image("./cutelogo.jpg"),
	preview=False,
	height=60)
	with ms.Div():
	antd.Typography.Title(
	"QVQ-72B-Preview",
	elem_style=dict(margin=0, fontSize=24),
	level=1)
	with ms.AutoLoading():
	with antd.Row(gutter=[8, 8], align="stretch"):
	with antd.Col(xs=24, md=8):
	with antd.Space(direction="vertical",
	elem_style=dict(width="100%")):
	with antd.Space(direction="vertical",
	elem_style=dict(width="100%"),
	elem_id="input-container"):
	with ms.Fragment():
	input_image = gr.Image(
	type="pil",
	label="Upload",
	sources=["upload"]),
	input = antd.Input.Textarea(
	placeholder=get_text("Ask a question", "输入一个问题"),
	auto_size=dict(maxRows=6, minRows=2),
	allow_clear=True)

	with antd.Flex(align="center",
	justify="space-between"):
	antd.Typography.Text(
	get_text("Warning: This model only supports single-turn dialogue.", "注：当前模型只支持单轮对话，如需中文回答，提示词加“用中文回答”"), type="warning")
	tour_btn = antd.Button(get_text("Tour", "使用指引"),
	variant="filled",
	color="default")

	with antd.Row(gutter=8):
	with antd.Col(span=12):
	clear_btn = antd.Button(get_text("Clear", "清除"),
	block=True)
	with antd.Col(span=12):
	submit_btn = antd.Button(
	get_text("Submit", "提交"),
	type="primary",
	block=True,
	elem_id="submit-btn")

	antd.Divider(get_text("Example", "示例"))

	with antd.Flex(gap="small", wrap=True):
	for item in DEMO_LIST:

	def bind_on_example(_item):
	def on_example():
	return gr.update(
	value=_item[
	'description']
	), gr.update(
	value=_item['image'])

	return on_example

	with antd.Card(
	hoverable=True,
	elem_style=dict(
	width="100%")) as example:
	if "description" in item:
	antd.Typography.Text(
	item["description"])
	if "image" in item:
	antd.Image(item["image"],
	preview=False)
	example.click(
	fn=bind_on_example(item),
	outputs=[input, input_image[0]])

	with antd.Col(xs=24, md=16):
	with antd.Card(title=get_text("Answer", "答案"),
	elem_style=dict(height="100%"),
	elem_id="output-container"):
	output = gr.Markdown(
	show_copy_button=True,
	latex_delimiters=[{
	"left": '$$',
	"right": '$$',
	"display": True
	}, {
	"left": '$',
	"right": '$',
	"display": False,
	}, {
	"left": '\\(',
	"right": '\\)',
	"display": False,
	}, {
	"left": '\\[',
	"right": '\\]',
	"display": True
	}])
	with antd.Tour(props=dict(open=False)) as tour:
	antd.Tour.Step(
	title=get_text("Step 1", "步骤 1"),
	description=get_text("Upload image and enter text", "传入图片和文本"),
	get_target=
	"() => document.querySelector('#input-container')")
	antd.Tour.Step(
	title=get_text("Step 2","步骤 2"),
	description=get_text("Click submit button", "点击提交按钮"),
	get_target=
	"() => document.querySelector('#submit-btn')")
	antd.Tour.Step(
	title=get_text("Step 3","步骤 3"),
	description=get_text("Wait for result", "等待结果返回"),
	get_target=
	"() => document.querySelector('#output-container')"
	)

	tour_btn.click(fn=lambda: gr.update(props=dict(open=True)),
	outputs=[tour])
	gr.on([tour.finish, tour.close],
	fn=lambda: gr.update(props=dict(open=False)),
	outputs=[tour])

	submit_btn.click(
	fn=generate,
	inputs=[*input_image, input],
	outputs=[output])
	clear_btn.click(
	fn=on_clear,
	outputs=[*input_image, input])

	demo.queue(default_concurrency_limit=50).launch()