davanstrien HF staff commited on
Commit
9e7d682
·
1 Parent(s): 3555196

stronger JSON prompt

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -1,10 +1,10 @@
1
- import subprocess # 🥲
2
 
3
- subprocess.run(
4
- "pip install flash-attn --no-build-isolation",
5
- env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
6
- shell=True,
7
- )
8
 
9
  import spaces
10
  import gradio as gr
@@ -74,7 +74,8 @@ If there are no relevant visual elements, replace the third query with another s
74
  Here is the document image to analyze:
75
  <image>
76
 
77
- Generate the queries based on this image and provide the response in the specified JSON format."""
 
78
 
79
  return prompt, GeneralRetrievalQuery
80
 
@@ -92,7 +93,7 @@ def generate_response(image):
92
  inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}
93
  output = model.generate_from_batch(
94
  inputs,
95
- GenerationConfig(max_new_tokens=200, stop_token="<|endoftext|>"),
96
  tokenizer=processor.tokenizer
97
  )
98
  generated_tokens = output[0, inputs['input_ids'].size(1):]
 
1
+ # import subprocess # 🥲
2
 
3
+ # subprocess.run(
4
+ # "pip install flash-attn --no-build-isolation",
5
+ # env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
6
+ # shell=True,
7
+ # )
8
 
9
  import spaces
10
  import gradio as gr
 
74
  Here is the document image to analyze:
75
  <image>
76
 
77
+ Generate the queries based on this image and provide the response in the specified JSON format.
78
+ Only return JSON"""
79
 
80
  return prompt, GeneralRetrievalQuery
81
 
 
93
  inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}
94
  output = model.generate_from_batch(
95
  inputs,
96
+ GenerationConfig(max_new_tokens=800, stop_token="<|endoftext|>"),
97
  tokenizer=processor.tokenizer
98
  )
99
  generated_tokens = output[0, inputs['input_ids'].size(1):]