Spaces:

RobotJelly
/

Text_Or_Image-To-Image_Search

Build error

App Files Files Community

Text_Or_Image-To-Image_Search / app.py

RobotJelly

app.py

20085d4 about 3 years ago

raw

history blame

3.93 kB

	# Import Libraries
	from pathlib import Path
	import pandas as pd
	import numpy as np
	import torch
	import clip
	from PIL import Image
	from io import BytesIO
	import requests
	import gradio as gr
	# Load the openAI's CLIP model
	#model, preprocess = clip.load("ViT-B/32", jit=False)
	#display output photo
	def show_output_image(matched_images, photos) :
	image=[]
	for photo_id in matched_images:
	#photo_image_url = f"https://unsplash.com/photos/{photo_id}/download?w=280"
	#photo_image_url = f"https://unsplash.com/photos/{photo_id}?w=640"
	#photo_image_url = f"https://unsplash.com/photos/{photo_id}?ixid=2yJhcHBfaWQiOjEyMDd9&fm=jpg"
	photo_found = photos[photos["photo_id"] == photo_id].iloc[0]
	response = requests.get(photo_found["photo_image_url"] + "?w=640")
	#response = requests.get(photo_image_url)
	img = Image.open(BytesIO(response.content))
	#return img
	image.append(img)
	return image
	# Encode and normalize the search query using CLIP
	def encode_search_query(search_query, model, device):
	with torch.no_grad():
	text_encoded = model.encode_text(clip.tokenize(search_query).to(device))
	text_encoded /= text_encoded.norm(dim=-1, keepdim=True)
	# Retrieve the feature vector from the GPU and convert it to a numpy array
	return text_encoded.cpu().numpy()
	# Find all matched photos
	def find_matches(text_features, photo_features, photo_ids, results_count=4):
	# Compute the similarity between the search query and each photo using the Cosine similarity
	similarities = (photo_features @ text_features.T).squeeze(1)
	# Sort the photos by their similarity score
	best_photo_idx = (-similarities).argsort()
	# Return the photo IDs of the best matches
	return [photo_ids[i] for i in best_photo_idx[:results_count]]
	def image_search(search_text, search_image, option):
	# taking photo IDs
	photo_ids = pd.read_csv("./photo_ids.csv")
	photo_ids = list(photo_ids['photo_id'])

	# Photo dataset
	photos = pd.read_csv("./photos.tsv000", sep="\t", header=0)

	# taking features vectors
	photo_features = np.load("./features.npy")

	# check if CUDA available
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Load the openAI's CLIP model
	model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
	#model = model.to(device)

	# Input Text Query
	#search_query = "The feeling when your program finally works"

	if option == "Text-To-Image" :
	# Extracting text features
	text_features = encode_search_query(search_text, model, device)

	# Find the matched Images
	matched_images = find_matches(text_features, photo_features, photo_ids, 4)
	# ---- debug purpose ------#
	#print(matched_images[0])
	#id = matched_images[0]
	#photo_image_url = f"https://unsplash.com/photos/{id}/download?w=280"
	#print(photo_image_url)
	#--------------------------#

	return show_output_image(matched_images, photos)
	elif option == "Image-To-Image":
	# Input Image for Search
	with torch.no_grad():
	image_feature = model.encode_image(preprocess(search_image).unsqueeze(0).to(device))
	image_feature = (image_feature / image_feature.norm(dim=-1, keepdim=True)).cpu().numpy()
	# Find the matched Images
	matched_images = find_matches(image_feature, photo_features, photo_ids, 4)
	#is_input_image = True
	return show_output_image(matched_images, photos)

	gr.Interface(fn=image_search,
	inputs=[gr.inputs.Textbox(lines=7, label="Input Text"),
	gr.inputs.Image(type="pil", optional=True),
	gr.inputs.Dropdown(["Text-To-Image", "Image-To-Image"])
	],
	outputs=gr.outputs.Carousel([gr.outputs.Image(type="pil"), gr.outputs.Image(type="pil"), gr.outputs.Image(type="pil"), gr.outputs.Image(type="pil")]),
	enable_queue=True
	).launch(debug=True,share=True)