bmay commited on
Commit
e35c029
·
verified ·
1 Parent(s): 749c724

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import numpy as np
4
+ from transformers import AutoModel
5
+ from theia.decoding import load_feature_stats, prepare_depth_decoder, prepare_mask_generator, decode_everything
6
+
7
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
8
+
9
+ def run_theia(image):
10
+ theia_model = AutoModel.from_pretrained("theaiinstitute/theia-base-patch16-224-cdiv", trust_remote_code=True)
11
+ theia_model = theia_model.to(device)
12
+ target_model_names = [
13
+ "google/vit-huge-patch14-224-in21k",
14
+ "facebook/dinov2-large",
15
+ "openai/clip-vit-large-patch14",
16
+ "facebook/sam-vit-huge",
17
+ "LiheYoung/depth-anything-large-hf",
18
+ ]
19
+ feature_means, feature_vars = load_feature_stats(target_model_names, stat_file_root="../../../feature_stats")
20
+
21
+ mask_generator, sam_model = prepare_mask_generator(device)
22
+ depth_anything_model_name = "LiheYoung/depth-anything-large-hf"
23
+ depth_anything_decoder, _ = prepare_depth_decoder(depth_anything_model_name, device)
24
+
25
+ images = [image]
26
+
27
+ theia_decode_results, gt_decode_results = decode_everything(
28
+ theia_model=theia_model,
29
+ feature_means=feature_means,
30
+ feature_vars=feature_vars,
31
+ images=images,
32
+ mask_generator=mask_generator,
33
+ sam_model=sam_model,
34
+ depth_anything_decoder=depth_anything_decoder,
35
+ pred_iou_thresh=0.5,
36
+ stability_score_thresh=0.7,
37
+ gt=True,
38
+ device=device,
39
+ )
40
+
41
+ vis_video = np.stack(
42
+ [np.vstack([tr, gtr]) for tr, gtr in zip(theia_decode_results, gt_decode_results, strict=False)]
43
+ )
44
+
45
+ return vis_video
46
+
47
+ demo = gr.Interface(fn=run_theia, inputs="image", outputs="image")
48
+ demo.launch()