yushihu commited on
Commit
b0acc2a
·
1 Parent(s): 564f713

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +73 -35
README.md CHANGED
@@ -38,56 +38,94 @@ pipeline = transformers.pipeline(
38
  device_map="auto",
39
  )
40
 
41
- # prompt formatting
42
 
 
 
43
 
 
 
 
 
 
 
 
44
 
45
- test_caption = "a blue rabbit and a red plane"
46
-
47
-
48
-
49
-
50
- model = PromptCap("vqascore/promptcap-coco-vqa") # also support OFA checkpoints. e.g. "OFA-Sys/ofa-large"
51
-
52
- if torch.cuda.is_available():
53
- model.cuda()
54
-
55
- prompt = "please describe this image according to the given question: what piece of clothing is this boy putting on?"
56
- image = "glove_boy.jpeg"
57
-
58
- print(model.caption(prompt, image))
59
- ```
60
-
61
- To try generic captioning, just use "what does the image describe?"
62
 
63
- ```python
64
- prompt = "what does the image describe?"
65
- image = "glove_boy.jpeg"
66
 
67
- print(model.caption(prompt, image))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  ```
69
 
 
70
 
71
-
72
- PromptCap also support taking OCR inputs:
73
 
74
  ```python
75
- prompt = "please describe this image according to the given question: what year was this taken?"
76
- image = "dvds.jpg"
77
- ocr = "yip AE Mht juor 02/14/2012"
78
-
79
- print(model.caption(prompt, image, ocr))
80
- ```
81
 
 
82
 
 
83
 
 
 
 
84
 
85
  ## Bibtex
86
  ```
87
- @article{hu2022promptcap,
88
- title={PromptCap: Prompt-Guided Task-Aware Image Captioning},
89
- author={Hu, Yushi and Hua, Hang and Yang, Zhengyuan and Shi, Weijia and Smith, Noah A and Luo, Jiebo},
90
- journal={arXiv preprint arXiv:2211.09699},
91
- year={2022}
92
  }
93
  ```
 
38
  device_map="auto",
39
  )
40
 
 
41
 
42
+ # formating prompt following LLaMA 2 style
43
+ def create_qg_prompt(caption):
44
 
45
+ INTRO_BLURB = """Given an image description, generate one or two multiple-choice questions that verifies if the image description is correct.
46
+ Classify each concept into a type (object, human, animal, food, activity, attribute, counting, color, material, spatial, location, shape, other), and then generate a question for each type.
47
+ """
48
+
49
+ formated_prompt = f"<s>[INST] <<SYS>>\n{INTRO_BLURB}\n<</SYS>>\n\n"
50
+ formated_prompt += f"Description: {caption} [/INST] Entities:"
51
+ return formated_prompt
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ test_caption = "a blue rabbit and a red plane"
 
 
55
 
56
+ # create prompt
57
+ prompt = create_qg_prompt(text_caption)
58
+
59
+ # text completion
60
+ sequences = pipeline(
61
+ prompt, do_sample=False, num_beams=5, num_return_sequences=1, max_length=512)
62
+ output = sequences[0]['generated_text'][len(prompt):]
63
+ output = output.split('\n\n')[0]
64
+
65
+ # output
66
+ print(output)
67
+
68
+ #### Expected output ###
69
+ # rabbit, plane
70
+ # Activites:
71
+ # Colors: blue, red
72
+ # Counting:
73
+ # Other attributes:
74
+ # About rabbit (animal):
75
+ # Q: is this a rabbit?
76
+ # Choices: yes, no
77
+ # A: yes
78
+ # About rabbit (animal):
79
+ # Q: what animal is in the picture?
80
+ # Choices: rabbit, dog, cat, fish
81
+ # A: rabbit
82
+ # About plane (object):
83
+ # Q: is this a plane?
84
+ # Choices: yes, no
85
+ # A: yes
86
+ # About plane (object):
87
+ # Q: what type of vehicle is this?
88
+ # Choices: plane, car, motorcycle, bus
89
+ # A: plane
90
+ # About blue (color):
91
+ # Q: is the rabbit blue?
92
+ # Choices: yes, no
93
+ # A: yes
94
+ # About blue (color):
95
+ # Q: what color is the rabbit?
96
+ # Choices: blue, red, yellow, green
97
+ # A: blue
98
+ # About red (color):
99
+ # Q: is the plane red?
100
+ # Choices: yes, no
101
+ # A: yes
102
+ # About red (color):
103
+ # Q: what color is the plane?
104
+ # Choices: red, blue, yellow, green
105
+ # A: red
106
  ```
107
 
108
+ # Use this LM under tifascore package
109
 
110
+ tifascore provides extra functions to parse this output etc. The usage is below
 
111
 
112
  ```python
113
+ from tifascore import get_llama2_pipeline, get_llama2_question_and_answers
 
 
 
 
 
114
 
115
+ pipeline = get_llama2_pipeline("tifa-benchmark/llama2_tifa_question_generation")
116
 
117
+ print(get_llama2_question_and_answers(pipeline, "a blue rabbit and a red plane"))
118
 
119
+ #### Expected output ###
120
+ [{'caption': 'a blue rabbit and a red plane', 'element': 'rabbit', 'question': 'what animal is in the picture?', 'choices': ['rabbit', 'dog', 'cat', 'fish'], 'answer': 'rabbit', 'element_type': 'animal/human'}, {'caption': 'a blue rabbit and a red plane', 'element': 'plane', 'question': 'is this a plane?', 'choices': ['yes', 'no'], 'answer': 'yes', 'element_type': 'object'}, {'caption': 'a blue rabbit and a red plane', 'element': 'plane', 'question': 'what type of vehicle is this?', 'choices': ['plane', 'car', 'motorcycle', 'bus'], 'answer': 'plane', 'element_type': 'object'}, {'caption': 'a blue rabbit and a red plane', 'element': 'blue', 'question': 'is the rabbit blue?', 'choices': ['yes', 'no'], 'answer': 'yes', 'element_type': 'color'}, {'caption': 'a blue rabbit and a red plane', 'element': 'blue', 'question': 'what color is the rabbit?', 'choices': ['blue', 'red', 'yellow', 'green'], 'answer': 'blue', 'element_type': 'color'}, {'caption': 'a blue rabbit and a red plane', 'element': 'red', 'question': 'is the plane red?', 'choices': ['yes', 'no'], 'answer': 'yes', 'element_type': 'color'}, {'caption': 'a blue rabbit and a red plane', 'element': 'red', 'question': 'what color is the plane?', 'choices': ['red', 'blue', 'yellow', 'green'], 'answer': 'red', 'element_type': 'color'}]
121
+ ```
122
 
123
  ## Bibtex
124
  ```
125
+ @article{hu2023tifa,
126
+ title={Tifa: Accurate and interpretable text-to-image faithfulness evaluation with question answering},
127
+ author={Hu, Yushi and Liu, Benlin and Kasai, Jungo and Wang, Yizhong and Ostendorf, Mari and Krishna, Ranjay and Smith, Noah A},
128
+ journal={arXiv preprint arXiv:2303.11897},
129
+ year={2023}
130
  }
131
  ```