File size: 3,620 Bytes
9d3e407
681b67d
9d3e407
0d59e36
 
 
 
 
c11be31
9d3e407
ea7867a
681b67d
 
fa25c04
681b67d
0eec9d7
 
 
fa25c04
0eec9d7
 
 
 
 
 
681b67d
 
 
 
 
 
6b811d9
681b67d
 
 
 
c11be31
681b67d
 
0eec9d7
2dd6655
 
2728bd0
2dd6655
2728bd0
 
2dd6655
9361469
 
 
2728bd0
 
 
0eec9d7
ea7867a
 
 
 
 
9361469
ea7867a
 
 
 
0d59e36
c11be31
681b67d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import gradio as gr
import evaluate

suite = evaluate.EvaluationSuite.load("Vipitis/ShaderEval") #downloads it

#TODO: can you import it locally instead?
# from ShaderEval import Suite
# suite = Suite("Vipitis/ShaderEval")
# save resutls to a file?

text = """# Welcome to the ShaderEval Suite.
            
            This space hosts the ShaderEval Suite. more to follow soon.
            For an interactive Demo and more information see the demo space [ShaderCoder](https://huggingface.co/spaces/Vipitis/ShaderCoder)
            
            # Task1: Return Completion
            ## Explanation
            Modelled after the [CodeXGLUE code_completion_line](https://huggingface.co/datasets/code_x_glue_cc_code_completion_line) task. 
            Using the "return_completion" subset of the [Shadertoys-fine dataset](https://huggingface.co/datasets/Vipitis/Shadertoys-fine).
            All preprocessing and post proessing is done by the custom evaluator for this suite. It should be as easy as just giving it a model checkpoint that can do the "text-generation" task.
            Evaluated is currently with just [exact_match](https://huggingface.co/metrics/exact_match).

            ## Notice
            should you find any model that throws an error, please let me know in the issues tab. Several parts of this suite are still missing.

            ## Instructions
            ### Run the code yourself:.
            ```python
            import evaluate
            suite = evaluate.EvaluationSuite.load("Vipitis/ShaderEval")
            model_cp = "gpt2"
            suite.run(model_cp, snippet=300)
            ```
            
            ### try the demo below
            - Select a **model checkpoint** from the "dropdown"
            - Select how many **samples** to run (there us up to 300 from the test set)
            - Click **Run** to run the suite
            - The results will be displayed in the **Output** box
            
            ## Results 
            ![](file/bar.png)
            Additionally, you can report results to your models and it should show up on this [leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards?dataset=Vipitis%2FShadertoys-fine)

            ## Todo (feel free to contribute in a [Pull Request](https://huggingface.co/spaces/Vipitis/ShaderEval/discussions?status=open&type=pull_request))
            - [~] leaderboard (via autoevaluate and self reporting)
            - [?] supporting batches to speed up inference 
            - [ ] CER metric (via a custom metric perhaps?)
            - [x] removing the pad_token warning
            - [ ] adding OpenVINO pipelines for inference, pending on OpenVINO release
            - [ ] task1b for "better" featuring a improved testset as well as better metrics. Will allow more generation parameters
            - [ ] semantic match by comparing the rendered frames (depending on WGPU implementation?)
            - [ ] CLIP match to evaluate rendered images fitting to title/description
            """


def run_suite(model_cp, snippet):
    # print(model_cp, snippet)
    results = suite.run(model_cp, snippet)
    print(results) # so they show up in the logs for me.
    return results[0]

with gr.Blocks() as site:
    text_md = gr.Markdown(text)
    model_cp = gr.Textbox(value="gpt2", label="Model Checkpoint", interactive=True)
    first_n = gr.Slider(minimum=1, maximum=300, default=5, label="num_samples", step=1.0)
    output = gr.Textbox(label="Output")
    run_button = gr.Button(label="Run")
    run_button.click(fn=run_suite, inputs=[model_cp, first_n], outputs=output)
site.launch()