File size: 5,103 Bytes
a017c81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa4049f
 
a017c81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import streamlit as st
from annoy import AnnoyIndex
from sentence_transformers import SentenceTransformer
import json
from PIL import Image
import os
import urllib

st.set_page_config(
     page_title="BHL Flickr Image Search",
     page_icon="🖼️",
     layout="wide"
)

@st.cache_resource
def load_clip_model():
    return SentenceTransformer('clip-ViT-B-32')

@st.cache_resource
def load_annoy_index():
    annoy_index = AnnoyIndex(512, metric='angular')
    annoy_index.load('bhl_index.annoy')
    return annoy_index

@st.cache_data
def load_flickr_data():
    with open('bhl_flickr_list.json') as json_in:
        bhl_flickr_ids = json.load(json_in)
    return bhl_flickr_ids

def bhl_annoy_search(mode, query, k=5):
    if mode == 'id':
        for idx, row in enumerate(bhl_flickr_ids):
            if str(row['flickr_id']) == query:
                matching_row = idx
        neighbors = bhl_index.get_nns_by_item(matching_row, k,
                                            include_distances=True)
    elif mode == 'text':
        query_emb = model.encode([query], show_progress_bar=False)
        neighbors = bhl_index.get_nns_by_vector(query_emb[0], k,
                                            include_distances=True)
    elif mode == 'image':
        query_emb = model.encode([query], show_progress_bar=False)
        neighbors = bhl_index.get_nns_by_vector(query_emb[0], k,
                                            include_distances=True)
    return neighbors

#DEPLOY_MODE = 'streamlit_share'
DEPLOY_MODE = 'hf_spaces'
#DEPLOY_MODE = 'localhost'

if DEPLOY_MODE == 'localhost':
    BASE_URL = 'http://localhost:8501/'
elif DEPLOY_MODE == 'streamlit_share':
    BASE_URL = 'https://share.streamlit.io/miketrizna/bhl_flickr_search'
elif DEPLOY_MODE == 'hf_spaces':
    BASE_URL = 'https://huggingface.co/spaces/MikeTrizna/bhl_flickr_search'

if __name__ == "__main__":
    st.markdown("# BHL Flickr Image Search")
    with st.expander("How does this work?", expanded=False):
        st.write('placeholder')

    st.sidebar.markdown('### Search Mode')

    query_params = st.experimental_get_query_params()
    mode_index = 0
    if 'mode' in query_params:
        if query_params['mode'][0] == 'text_search':
            mode_index = 0
        elif query_params['mode'][0] == 'flickr_id':
            mode_index = 2

    app_mode = st.sidebar.radio("How would you like to search?",
                        ['Text search','Upload Image', 'BHL Flickr ID'],
                        index = mode_index)

    model = load_clip_model()
    bhl_index = load_annoy_index()
    bhl_flickr_ids = load_flickr_data()

    if app_mode == 'Text search':
        search_text = 'a watercolor illustration of an insect with flowers'
        if 'mode' in query_params:
            if query_params['mode'][0] == 'text_search':
                if 'query' in query_params:
                    search_text = query_params['query'][0]
            else:
                st.experimental_set_query_params()
        query = st.text_input('Text query',search_text) 
        search_mode = 'text'
        #closest_k_idx, closest_k_dist = bhl_text_search(text_query, 100)

    elif app_mode == 'BHL Flickr ID':
        search_id = '5974846748'
        if 'mode' in st.experimental_get_query_params():
            if st.experimental_get_query_params()['mode'][0] == 'flickr_id':
                if 'query' in st.experimental_get_query_params():
                    search_id = st.experimental_get_query_params()['query'][0]
            else:
                st.experimental_set_query_params()
        query = st.text_input('Query ID', search_id)
        search_mode = 'id'
        #closest_k_idx, closest_k_dist = bhl_id_search(id_query, 100)
    
    elif app_mode == 'Upload Image':
        st.experimental_set_query_params()
        query = None
        image_file = st.file_uploader("Upload Image", type=["png","jpg","jpeg"])
        search_mode = 'image'
        #closest_k_idx = []
        if image_file is not None:
            query = Image.open(image_file)
            st.image(query,width=100,caption='Query image')
            #closest_k_idx, closest_k_dist = bhl_image_search(img, 100)

    if query:
        closest_k_idx, closest_k_dist = bhl_annoy_search(search_mode, query, 100)

        col_list = st.columns(5)

        if len(closest_k_idx):
            for idx, annoy_idx in enumerate(closest_k_idx):
                bhl_ids = bhl_flickr_ids[annoy_idx]
                bhl_url = f"https://live.staticflickr.com/{bhl_ids['server']}/{bhl_ids['flickr_id']}_{bhl_ids['secret']}.jpg"
                col_list[idx%5].image(bhl_url, use_column_width=True)

                flickr_url = f"https://www.flickr.com/photos/biodivlibrary/{bhl_ids['flickr_id']}/"
                neighbors_url = f"{BASE_URL}?mode=flickr_id&query={bhl_ids['flickr_id']}"
                link_html = f'<a href="{flickr_url}" target="_blank">Flickr Link</a> | <a href="{neighbors_url}">Neighbors</a>'
                col_list[idx%5].markdown(link_html, unsafe_allow_html=True)
                col_list[idx%5].markdown("---")