Spaces:

Qwen
/

QVQ-72B-preview

Running

File size: 12,426 Bytes

# Copyright (c) Alibaba, Inc. and its affiliates.
import os

import gradio as gr
import modelscope_studio.components.antd as antd
import modelscope_studio.components.base as ms
from PIL import Image
import secrets
import tempfile
from http import HTTPStatus
from urllib3.exceptions import HTTPError

from pathlib import Path


import dashscope
from dashscope import MultiModalConversation
API_KEY = os.environ['API_KEY']
BASE_URL = os.environ['DASHSCOPE_HTTP_BASE_URL']
dashscope.api_key = API_KEY
dashscope.base_http_api_url = BASE_URL

is_modelscope_studio = os.getenv('MODELSCOPE_ENVIRONMENT') == 'studio'

def get_text(text: str, cn_text: str):
    if is_modelscope_studio:
        return cn_text
    return text

def resolve_image(filename):
    return os.path.join(os.path.dirname(__file__), filename)

DEMO_LIST = [
  {
    "description": "Evaluate the integral of the functions graphed using the formula for circles: ",
    "image": resolve_image("./examples/1.webp")
  },
  {
    "description": "请解答这道题",
    "image": resolve_image("./examples/5.png")
  },
  {
    "description": "图片中的滤液E是什么化学物质?",
    "image": resolve_image("./examples/3.png")
  },
  {
    "description": "I want to know the volume of this sofa",
    "image": resolve_image("./examples/4.png")
  },
]

def process_image(image, shouldConvert=False):
    # 获取上传文件的目录
    uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str(
        Path(tempfile.gettempdir()) / "gradio")
    os.makedirs(uploaded_file_dir, exist_ok=True)

    # 创建临时文件路径
    name = f"tmp{secrets.token_hex(20)}.jpg"
    filename = os.path.join(uploaded_file_dir, name)
    # 保存上传的图片
    if shouldConvert:
        new_img = Image.new('RGB',
                            size=(image.width, image.height),
                            color=(255, 255, 255))
        new_img.paste(image, (0, 0), mask=image)
        image = new_img
    image.save(filename)

    return filename

if __name__ == "__main__":

    def on_clear():
        return {
            input: gr.update(value=None),
            **{
                item: gr.update(value=None)
                for item in input_image
            },
        }

    with gr.Blocks() as demo:
        with ms.Application() as app:
            with antd.ConfigProvider(
                    locale="zh_CN" if is_modelscope_studio else None,
                    theme=dict(token=dict(colorPrimary="#a855f7"))):
                with antd.Card(elem_style=dict(marginBottom=12),
                               styles=dict(body=dict(padding=4))):
                    with antd.Flex(elem_style=dict(width="100%"),
                                   justify="center",
                                   align="center",
                                   gap=14):
                        with ms.Div(elem_style=dict(flexShrink=0)):
                            antd.Image(
                                resolve_image("./cutelogo.jpg"),
                                preview=False,
                                height=60,
                                width=60)
                        with ms.Div():
                            antd.Typography.Title(
                                "QVQ-72B-Preview",
                                elem_style=dict(margin=0, fontSize=24),
                                level=1)
                with ms.AutoLoading():
                    with antd.Row(gutter=[8, 8], align="stretch"):
                        with antd.Col(xs=24, md=8):
                            with antd.Space(direction="vertical",
                                            elem_style=dict(width="100%")):
                                with antd.Space(direction="vertical",
                                                elem_style=dict(width="100%"),
                                                elem_id="input-container"):
                                    with ms.Fragment():
                                        input_image = gr.Image(
                                                    type="pil",
                                                    label="Upload",
                                                    sources=["upload"]),
                                    input = antd.Input.Textarea(
                                        placeholder=get_text("Ask a question", "输入一个问题"),
                                        auto_size=dict(maxRows=6, minRows=2),
                                        allow_clear=True)

                                with antd.Flex(align="center",
                                               justify="space-between"):
                                    antd.Typography.Text(
                                        get_text("Warning: This model only supports single-turn dialogue.",  "注：当前模型只支持单轮对话，如需中文回答，提示词加“用中文回答”"), type="warning")
                                    tour_btn = antd.Button(get_text("Tour", "使用指引"),
                                                           variant="filled",
                                                           color="default")

                                with antd.Row(gutter=8):
                                    with antd.Col(span=12):
                                        clear_btn = antd.Button(get_text("Clear", "清除"),
                                                                block=True)
                                    with antd.Col(span=12):
                                        submit_btn = antd.Button(
                                            get_text("Submit", "提交"),
                                            type="primary",
                                            block=True,
                                            elem_id="submit-btn")

                                antd.Divider(get_text("Example", "示例"))

                                with antd.Flex(gap="small", wrap=True):
                                    for item in DEMO_LIST:

                                        def bind_on_example(_item):
                                            def on_example():
                                                return gr.update(
                                                        value=_item[
                                                            'description']
                                                    ), gr.update(
                                                        value=_item['image'])

                                            return on_example

                                        with antd.Card(
                                                hoverable=True,
                                                elem_style=dict(
                                                    width="100%")) as example:
                                            if "description" in item:
                                                antd.Typography.Text(
                                                    item["description"])
                                            if "image" in item:
                                                antd.Image(item["image"],
                                                           preview=False)
                                        example.click(
                                            fn=bind_on_example(item),
                                            outputs=[input, input_image[0]])

                        with antd.Col(xs=24, md=16):
                            with antd.Card(title=get_text("Answer", "答案"),
                                           elem_style=dict(height="100%"),
                                           elem_id="output-container"):
                                with ms.Slot("extra"):
                                    cancel_btn = antd.Button(get_text("Stop", "停止"),
                                                                block=True, disabled=True)
                                output = gr.Markdown(
                                    show_copy_button=True,
                                    latex_delimiters=[{
                                        "left": '$$',
                                        "right": '$$',
                                        "display": True
                                    }, {
                                        "left": '$',
                                        "right": '$',
                                        "display": False,
                                    }, {
                                        "left": '\\(',
                                        "right": '\\)',
                                        "display": False,
                                    }, {
                                        "left": '\\[',
                                        "right": '\\]',
                                        "display": True
                                    }])
                    with antd.Tour(props=dict(open=False)) as tour:
                        antd.Tour.Step(
                            title=get_text("Step 1", "步骤 1"),
                            description=get_text("Upload image and enter text", "传入图片和文本"),
                            get_target=
                            "() => document.querySelector('#input-container')")
                        antd.Tour.Step(
                            title=get_text("Step 2","步骤 2"),
                            description=get_text("Click submit button", "点击提交按钮"),
                            get_target=
                            "() => document.querySelector('#submit-btn')")
                        antd.Tour.Step(
                            title=get_text("Step 3","步骤 3"),
                            description=get_text("Wait for result", "等待结果返回"),
                            get_target=
                            "() => document.querySelector('#output-container')"
                        )

                    tour_btn.click(fn=lambda: gr.update(props=dict(open=True)),
                                   outputs=[tour])
                    gr.on([tour.finish, tour.close],
                          fn=lambda: gr.update(props=dict(open=False)),
                          outputs=[tour])

                    def generate(image, query):
                        imageFile = process_image(image)
                        content = [
                            {'image': f'file://{imageFile}'},
                            {'text': query}
                        ]
                        messages = [
                            {'role': 'user', 'content': content},
                        ]
                        print('messages:', messages)
                        responses = MultiModalConversation.call(
                            model='qvq-72b-preview', messages=messages, stream=True,
                        )
                        yield {
                            cancel_btn: gr.update(disabled=False)
                        }
                        for response in responses:
                            if not response.status_code == HTTPStatus.OK:
                                raise HTTPError(f'response.code: {response.code}\nresponse.message: {response.message}')
                            response = response.output.choices[0].message.content
                            if len(response) > 0 and response[0]['text']:
                                print(response[0]['text'])
                                yield {
                                    output: response[0]['text']
                                }
                        yield {
                            cancel_btn: gr.update(disabled=True)
                        }

                    output_process = submit_btn.click(
                        fn=generate,
                        inputs=[*input_image, input],
                        outputs=[output, cancel_btn])
                    clear_btn.click(
                        fn=on_clear,
                        outputs=[*input_image, input])
                    cancel_btn.click(fn=lambda : gr.update(disabled=True), inputs=None, outputs=[cancel_btn], cancels=[output_process])

                demo.queue(default_concurrency_limit=50).launch(ssr_mode=False)