File size: 3,914 Bytes
c5343e6
45f98c8
 
 
 
 
 
 
c5343e6
 
 
 
 
 
 
 
0c61c42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5343e6
 
 
0c61c42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import tensorflow as tf

tf.config.set_visible_devices([], 'GPU')
# gpu_devices = tf.config.experimental.list_physical_devices('GPU')
# if gpu_devices:
#     tf.config.experimental.set_memory_growth(gpu_devices[0], True)
# else:
#     print(f"TensorFlow device: {gpu_devices}")

import os
import numpy as np
import keras
from PIL import Image
from keras_cv_attention_models import beit
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from typing import Tuple
#from huggingface_hub import snapshot_download
from labels import lookup_140


def get_triplet_model_beit(input_shape = (600, 600, 3),
                      embedding_units = 256,
                      embedding_depth = 2,
                      n_classes = 19,backbone_name ='Beit'):

    backbone_class = beit.BeitBasePatch16(input_shape=input_shape, pretrained="imagenet21k-ft1k")

    backbone_class = tf.keras.Model(backbone_class.input, backbone_class.layers[-2].output)
    #features = GlobalAveragePooling2D()(backbone_class.output)
    embedding_head = backbone_class.output

    for embed_i in range(embedding_depth):
        embedding_head = Dense(embedding_units, activation="relu" if embed_i < embedding_depth-1 else "linear")(embedding_head)
    embedding_head = tf.nn.l2_normalize(embedding_head, -1, epsilon=1e-5)

    logits_head = Dense(n_classes)(backbone_class.output)

    model = tf.keras.Model(backbone_class.input, [embedding_head, logits_head])
    model.compile(loss='cce',metrics=['accuracy'])
    #model.summary()

    return model




load_size = 600
crop_size = 600
def _clever_crop(img: tf.Tensor,
                 target_size: Tuple[int]=(128,128),
                 grayscale: bool=False
                 ) -> tf.Tensor:
    """[summary]
    Args:
        img (tf.Tensor): [description]
        target_size (Tuple[int], optional): [description]. Defaults to (128,128).
        grayscale (bool, optional): [description]. Defaults to False.
    Returns:
        tf.Tensor: [description]
    """
    maxside = tf.math.maximum(tf.shape(img)[0],tf.shape(img)[1])
    minside = tf.math.minimum(tf.shape(img)[0],tf.shape(img)[1])
    new_img = img

    if tf.math.divide(maxside,minside) > 1.2:
        repeating = tf.math.floor(tf.math.divide(maxside,minside))
        new_img = img
        if tf.math.equal(tf.shape(img)[1],minside):
            for _ in range(int(repeating)):
                new_img = tf.concat((new_img, img), axis=1)

        if tf.math.equal(tf.shape(img)[0],minside):
            for _ in range(int(repeating)):
                new_img = tf.concat((new_img, img), axis=0)
            new_img = tf.image.rot90(new_img)
    else:
        new_img = img
        repeating = 0
    img = tf.image.resize(new_img, target_size)
    if grayscale:
        img = tf.image.rgb_to_grayscale(img)
        img = tf.image.grayscale_to_rgb(img)

    return img,repeating

def preprocess(img,size=384):
  img = np.array(img, np.float32) / 255.0
  img = tf.image.resize(img, (size, size))
  return np.array(img, np.float32)

def select_top_n(preds,n=10):
    top_n = np.argsort(preds)[-n:][::-1]
    return top_n

def parse_results(top_n,logits):
    results = {}
    for n in top_n:
        label = lookup_140[n]
        results[label] = float(logits[n])
    return results

def inference_resnet_embedding_beit(x,model,size=576,n_classes=142,n_top=10):
    cropped = _clever_crop(x,(size,size))[0]
    prep = preprocess(cropped,size=size)
    embedding = model.predict(np.array([prep]))[0][0]
   
    
    return embedding 

def inference_resnet_finer_beit(x,model,size=576,n_classes=142,n_top=10):
    cropped = _clever_crop(x,(size,size))[0]
    prep = preprocess(cropped,size=size)
    logits = tf.nn.softmax(model.predict(np.array([prep]))[1][0]).cpu().numpy()
    top_n = select_top_n(logits,n=n_top)
    
    return parse_results(top_n,logits)