ngthanhtinqn commited on
Commit
3daa867
Β·
1 Parent(s): 6b44c63
Files changed (2) hide show
  1. app.py +4 -11
  2. demo.py +1 -1
app.py CHANGED
@@ -4,22 +4,15 @@ import gradio as gr
4
  from demo import query_image
5
 
6
  description = """
7
- Gradio demo for <a href="https://huggingface.co/docs/transformers/main/en/model_doc/owlvit">OWL-ViT</a>,
8
- introduced in <a href="https://arxiv.org/abs/2205.06230">Simple Open-Vocabulary Object Detection
9
- with Vision Transformers</a>.
10
- \n\nYou can use OWL-ViT to query images with text descriptions of any object.
11
- To use it, simply upload an image and enter comma separated text descriptions of objects you want to query the image for. You
12
- can also use the score threshold slider to set a threshold to filter out low probability predictions.
13
- \n\nOWL-ViT is trained on text templates,
14
- hence you can get better predictions by querying the image with text templates used in training the original model: *"photo of a star-spangled banner"*,
15
- *"image of a shoe"*. Refer to the <a href="https://arxiv.org/abs/2103.00020">CLIP</a> paper to see the full list of text templates used to augment the training data.
16
- \n\n<a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb">Colab demo</a>
17
  """
18
  demo = gr.Interface(
19
  query_image,
20
  inputs=[gr.Image(), "text"],
21
  outputs=["image", "image"],
22
- title="Zero-Shot Object Detection with OWL-ViT",
23
  description=description,
24
  examples=[
25
  ["./demo_images/cats.png", "cats,ears"],
 
4
  from demo import query_image
5
 
6
  description = """
7
+ Gradio demo for combining <a href="https://github.com/facebookresearch/segment-anything">Segment-Anything (SAM)</a>
8
+ <a href="https://huggingface.co/docs/transformers/main/en/model_doc/owlvit">OWL-ViT</a>.
9
+ \n\nYou can use OWL-ViT to query boxes with text descriptions of any object, then SAM will segment anything in the boxes.
 
 
 
 
 
 
 
10
  """
11
  demo = gr.Interface(
12
  query_image,
13
  inputs=[gr.Image(), "text"],
14
  outputs=["image", "image"],
15
+ title="Segment Anything (SAM) with OWL-ViT",
16
  description=description,
17
  examples=[
18
  ["./demo_images/cats.png", "cats,ears"],
demo.py CHANGED
@@ -164,7 +164,7 @@ def query_image(img, text_prompt):
164
  buf = io.BytesIO()
165
  plt.savefig(buf)
166
  buf.seek(0)
167
- owlvit_segment_image = Image.open(buf).convert('RGB')
168
 
169
  # grounded results
170
  image_with_box = plot_boxes_to_image(pil_img, pred_dict)[0]
 
164
  buf = io.BytesIO()
165
  plt.savefig(buf)
166
  buf.seek(0)
167
+ owlvit_segment_image = Image.open(buf)
168
 
169
  # grounded results
170
  image_with_box = plot_boxes_to_image(pil_img, pred_dict)[0]