Spaces:

ZhangYuanhan
/

Bamboo_ViT-B16_demo

Runtime error

App Files Files Community

Davidzhangyuanhan commited on Jun 16, 2022

Commit

6ab04f7

1 Parent(s): 1414829

Add application file

Browse files

Files changed (6) hide show

.gitignore +139 -0
142520422_6ad756ddf6_w_d.jpg +0 -0
README.md +2 -2
app.py +102 -0
timmvit.py +83 -0
trainid2name.json +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,139 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+**/*.pyc
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+#lib/
+#lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Auto generate documentation
+docs/en/_build/
+docs/en/_model_zoo.rst
+docs/en/modelzoo_statistics.md
+docs/en/papers/
+docs/zh_CN/_build/
+docs/zh_CN/_model_zoo.rst
+docs/zh_CN/modelzoo_statistics.md
+docs/zh_CN/papers/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+# custom
+.vscode
+.idea
+*.pkl
+*.pkl.json
+*.log.json
+/work_dirs
+/mmcls/.mim
+# Pytorch
+*.pth.*
+# work_dir
+work_dir
+saves
+#checkpoint
+weights
+#logs
+logs
+#DS_Store
+*DS_Store

142520422_6ad756ddf6_w_d.jpg ADDED Viewed

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 title: Bamboo ViT-B16 Demo
-emoji: 💻
 colorFrom: blue
 colorTo: blue
 sdk: gradio
@@ -10,4 +10,4 @@ pinned: false
 license: cc-by-4.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Bamboo ViT-B16 Demo
+emoji: 🎋
 colorFrom: blue
 colorTo: blue
 sdk: gradio
 license: cc-by-4.0
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import argparse
+import requests
+import gradio as gr
+import numpy as np
+import cv2
+import torch
+import torch.nn as nn
+from PIL import Image
+from torchvision import transforms
+from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
+from timm.data import create_transform
+from timmvit import timmvit
+import json
+from timm.models.hub import download_cached_file
+from PIL import Image
+def pil_loader(filepath):
+    with Image.open(filepath) as img:
+        img = img.convert('RGB')
+    return img
+def build_transforms(input_size):
+    transform = torchvision.transforms.Compose([
+        torchvision.transforms.Resize(input_size * 8 // 7),
+        torchvision.transforms.CenterCrop(input_size),
+        torchvision.transforms.ToTensor(),
+        torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+             ]))
+    return transforms
+# Download human-readable labels for Bamboo.
+with open('./Bamboo_ViT-B16_demo/trainid2name.json') as f:
+    id2name = json.load(f)
+'''
+build model
+'''
+model = timmvit(pretrain_path='./Bamboo_v0-1_ViT-B16.pth.tar.convert')
+model.eval()
+'''
+build data transform
+'''
+eval_transforms = build_transforms(224)
+'''
+borrow code from here: https://github.com/jacobgil/pytorch-grad-cam/blob/master/pytorch_grad_cam/utils/image.py
+'''
+def show_cam_on_image(img: np.ndarray,
+                      mask: np.ndarray,
+                      use_rgb: bool = False,
+                      colormap: int = cv2.COLORMAP_JET) -> np.ndarray:
+    """ This function overlays the cam mask on the image as an heatmap.
+    By default the heatmap is in BGR format.
+    :param img: The base image in RGB or BGR format.
+    :param mask: The cam mask.
+    :param use_rgb: Whether to use an RGB or BGR heatmap, this should be set to True if 'img' is in RGB format.
+    :param colormap: The OpenCV colormap to be used.
+    :returns: The default image with the cam overlay.
+    """
+    heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap)
+    if use_rgb:
+        heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
+    heatmap = np.float32(heatmap) / 255
+    if np.max(img) > 1:
+        raise Exception(
+            "The input image should np.float32 in the range [0, 1]")
+    cam = 0.7*heatmap + 0.3*img
+    # cam = cam / np.max(cam)
+    return np.uint8(255 * cam)
+def recognize_image(image, texts):
+    img_t = eval_transforms(image)
+    # compute output
+    output = model(img_t.unsqueeze(0))
+    prediction = output.softmax(-1).flatten()
+    _,top5_idx = torch.topk(prediction, 5)
+    return {id2name[str(i)][0]: float(prediction[i]) for i in top5_idx.tolist()}
+image = gr.inputs.Image()
+label = gr.outputs.Label(num_top_classes=5)
+gr.Interface(
+    description="Bamboo for Zero-shot Image Recognition Demo (https://github.com/Davidzhangyuanhan/Bamboo)",
+    fn=recognize_image,
+    inputs=["image"],
+    outputs=[
+        label,
+    ],
+    # examples=[
+    # ["./elephants.png", "an elephant; an elephant walking in the river; four elephants walking in the river"],
+    # ["./apple_with_ipod.jpg", "an ipod; an apple with a write note 'ipod'; an apple"],
+    # ["./crowd2.jpg", "a street; a street with a woman walking in the middle; a street with a man walking in the  middle"],
+    # ["./zebras.png", "three zebras on the grass; two zebras on the grass; one zebra on the grass; no zebra on the grass; four zebras on the grass"],
+    # ],
+).launch()

timmvit.py ADDED Viewed

	@@ -0,0 +1,83 @@

+# ------------------------------------------------------------------------
+# SenseTime VTAB
+# Copyright (c) 2021 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# ------------------------------------------------------------------------
+import timm
+import torch
+import copy
+import torch.nn as nn
+import torchvision
+import json
+from timm.models.hub import download_cached_file
+from PIL import Image
+class MyViT(nn.Module):
+    def __init__(self, num_classes=115217, pretrain_path=None, enable_fc=False):
+        super().__init__()
+        print('initializing ViT model as backbone using ckpt:', pretrain_path)
+        self.model = timm.create_model('vit_base_patch16_224',checkpoint_path=pretrain_path,num_classes=num_classes)# pretrained=True)
+    # def forward_features(self, x):
+    #     x = self.model.patch_embed(x)
+    #     cls_token = self.model.cls_token.expand(x.shape[0], -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
+    #     if self.model.dist_token is None:
+    #         x = torch.cat((cls_token, x), dim=1)
+    #     else:
+    #         x = torch.cat((cls_token, self.model.dist_token.expand(x.shape[0], -1, -1), x), dim=1)
+    #     x = self.model.pos_drop(x + self.model.pos_embed)
+    #     x = self.model.blocks(x)
+    #     x = self.model.norm(x)
+        # return self.model.pre_logits(x[:, 0])
+    def forward(self, x):
+        x = self.model.forward(x)
+        return x
+def timmvit(**kwargs):
+    default_kwargs={}
+    default_kwargs.update(**kwargs)
+    return MyViT(**default_kwargs)
+def build_transforms(input_size, center_crop=True):
+    transform = torchvision.transforms.Compose([
+        torchvision.transforms.Resize(input_size * 8 // 7),
+        torchvision.transforms.CenterCrop(input_size),
+        torchvision.transforms.ToTensor(),
+        torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+             ])
+    return transform
+def pil_loader(filepath):
+    with Image.open(filepath) as img:
+        img = img.convert('RGB')
+    return img
+def test_build():
+    with open('/mnt/lustre/yhzhang/bamboo/Bamboo_ViT-B16_demo/trainid2name.json') as f:
+        id2name = json.load(f)
+    img = pil_loader('/mnt/lustre/yhzhang/bamboo/Bamboo_ViT-B16_demo/142520422_6ad756ddf6_w_d.jpg')
+    eval_transforms = build_transforms(224)
+    img_t = eval_transforms(img)
+    img_t = img_t[None, :]
+    model = MyViT(pretrain_path='/mnt/lustre/yhzhang/bamboo/Bamboo_ViT-B16_demo/Bamboo_v0-1_ViT-B16.pth.tar.convert')
+    # image = torch.rand(1, 3, 224, 224)
+    output = model(img_t)
+    # import pdb;pdb.set_trace()
+    prediction = output.softmax(-1).flatten()
+    _,top5_idx = torch.topk(prediction, 5)
+    # import pdb;pdb.set_trace()
+    print({id2name[str(i)][0]: float(prediction[i]) for i in top5_idx.tolist()})
+if __name__ == '__main__':
+    test_build()

trainid2name.json ADDED Viewed

The diff for this file is too large to render. See raw diff