File size: 1,142 Bytes
eb23072
fa423dd
eb23072
 
fa423dd
 
 
eb23072
fa423dd
 
 
 
 
 
 
 
 
 
 
 
 
 
eb23072
 
 
 
 
fa423dd
 
 
 
 
 
 
 
4485dfb
fa423dd
8eac885
fa423dd
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import torch
import gradio as gr
from transformers import AlignProcessor, AlignModel


device = "cuda" if torch.cuda.is_available() else "cpu"

processor = AlignProcessor.from_pretrained("kakaobrain/align-base")
model = AlignModel.from_pretrained("kakaobrain/align-base").to(device)
model.eval()


def predict(image, labels):
    labels = labels.split(', ')
    inputs = processor(images=image, text=labels, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model(**inputs)

    logits_per_image = outputs.logits_per_image  
    probs = logits_per_image.softmax(dim=1).cpu().numpy()
    return {k: float(v) for k, v in zip(labels, probs[0])}


description = """
"""

gr.Interface(
    fn=predict,
    inputs=[
        gr.inputs.Image(label="Image to classify", type="pil"),
        gr.inputs.Textbox(lines=1, label="Comma separated candidate labels", placeholder="Enter labels separated by ', '",)
    ],
    outputs="label",
    examples=[
        ["rafale.jpg", "Dassault Rafale, Lockheed Martin f35",], 
    ],
    title="Images vs labels créé avec ALIGN et Huggingface",
    description=description
).launch()