ncoop57 commited on
Commit
fd2744e
·
1 Parent(s): 021b099

Reorganize interface and code to be more modular and add necessary debian packages to install

Browse files
Files changed (2) hide show
  1. app.py +54 -51
  2. packages.txt +1 -0
app.py CHANGED
@@ -1,23 +1,22 @@
1
- from torch._C import device
2
  import ffmpeg
 
3
  import youtube_dl
4
 
5
  import numpy as np
 
6
 
7
- from PIL import Image
8
- import requests
9
-
10
- import torch
11
  from sentence_transformers import SentenceTransformer, util, models
12
  from clip import CLIPModel
13
- # from sentence_transformers.models import CLIPModel
14
  from PIL import Image
15
 
16
- clip = CLIPModel()
17
- model = SentenceTransformer(modules=[clip]).to(dtype=torch.float32, device=torch.device('cpu'))
 
 
 
18
 
19
 
20
- def get_embedding(query, video):
21
  text_emb = model.encode(query, device='cpu')
22
 
23
  # Encode an image:
@@ -28,25 +27,9 @@ def get_embedding(query, video):
28
 
29
  return text_emb, img_embs
30
 
31
-
32
- # # Encode an image:
33
- # url = "http://images.cocodataset.org/val2017/000000039769.jpg"
34
- # img = Image.fromarray(np.array(Image.open(requests.get(url, stream=True).raw))).convert('RGB')
35
- # img_emb = model.encode([img, img], device='cpu')
36
-
37
- # # Encode text descriptions
38
- # text_emb = model.encode(['Two dogs in the snow', 'Two cats laying on a sofa',
39
- # 'A picture of London at night'], device='cpu')
40
-
41
- # # Compute cosine similarities
42
- # cos_scores = util.cos_sim(img_emb, text_emb)
43
- # print(cos_scores)
44
-
45
-
46
- def my_hook(d):
47
  if d['status'] == 'finished':
48
- print(d)
49
- print('Done downloading, now extracting frames ...')
50
  probe = ffmpeg.probe(d["filename"])
51
  video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
52
  width = int(video_stream['width'])
@@ -61,27 +44,47 @@ def my_hook(d):
61
  np
62
  .frombuffer(out, np.uint8)
63
  .reshape([-1, height, width, 3])
64
- )[::10]
65
-
66
- print(video.shape)
67
- txt_embd, img_embds = get_embedding("two white puppies", video)
68
- cos_scores = util.cos_sim(txt_embd, img_embds)
69
- print(cos_scores)
70
-
71
-
72
- ydl_opts = {"format": "mp4", "progress_hooks": [my_hook], }
73
- with youtube_dl.YoutubeDL(ydl_opts) as ydl:
74
- ydl.download(['https://youtu.be/I3AaW9ZevIU'])
75
-
76
-
77
- # # out, _ = (
78
- # # ffmpeg
79
- # # .input('in.mp4')
80
- # # .output('pipe:', format='rawvideo', pix_fmt='rgb24')
81
- # # .run(capture_stdout=True)
82
- # # )
83
- # # video = (
84
- # # np
85
- # # .frombuffer(out, np.uint8)
86
- # # .reshape([-1, height, width, 3])
87
- # )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import ffmpeg
2
+ import torch
3
  import youtube_dl
4
 
5
  import numpy as np
6
+ import streamlit as st
7
 
 
 
 
 
8
  from sentence_transformers import SentenceTransformer, util, models
9
  from clip import CLIPModel
 
10
  from PIL import Image
11
 
12
+ @st.cache(allow_output_mutation=True, max_entries=1)
13
+ def get_model():
14
+ clip = CLIPModel()
15
+ model = SentenceTransformer(modules=[clip]).to(dtype=torch.float32, device=torch.device('cpu'))
16
+ return model
17
 
18
 
19
+ def get_embedding(model, query, video):
20
  text_emb = model.encode(query, device='cpu')
21
 
22
  # Encode an image:
 
27
 
28
  return text_emb, img_embs
29
 
30
+ def my_hook(d, model, desc, top_k, text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  if d['status'] == 'finished':
32
+ text.text("Processing video...")
 
33
  probe = ffmpeg.probe(d["filename"])
34
  video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
35
  width = int(video_stream['width'])
 
44
  np
45
  .frombuffer(out, np.uint8)
46
  .reshape([-1, height, width, 3])
47
+ )[::10][:200]
48
+
49
+ txt_embd, img_embds = get_embedding(model, desc, video)
50
+ cos_scores = np.array(util.cos_sim(txt_embd, img_embds))
51
+ ids = np.argsort(cos_scores)[0][-top_k:]
52
+
53
+ imgs = [Image.fromarray(video[i]) for i in ids]
54
+ text.empty()
55
+ st.image(imgs)
56
+
57
+ def run():
58
+ st.set_page_config(page_title="Youtube CLIFS")
59
+ # main body
60
+ model = get_model()
61
+
62
+ st.sidebar.markdown("### Controls:")
63
+ top_k = st.sidebar.slider(
64
+ "Top K",
65
+ min_value=1,
66
+ max_value=5,
67
+ step=1,
68
+ )
69
+ desc = st.sidebar.text_input(
70
+ "Search Description",
71
+ value="Two white puppies",
72
+ help="Text description of what you want to find in the video",
73
+ )
74
+ url = st.sidebar.text_input(
75
+ "Youtube Video URL",
76
+ value='https://youtu.be/I3AaW9ZevIU',
77
+ help="Youtube video you'd like to search through",
78
+ )
79
+
80
+ submit_button = st.sidebar.button("Search")
81
+ if submit_button:
82
+ text = st.text("Downloading video...")
83
+ hook = lambda d: my_hook(d, model, desc, top_k, text)
84
+ ydl_opts = {"format": "mp4[height=360]", "progress_hooks": [hook], }
85
+ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
86
+ ydl.download([url])
87
+
88
+
89
+ if __name__ == "__main__":
90
+ run()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg