File size: 1,605 Bytes
e67043b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from langchain.embeddings import OpenAIEmbeddings
from typing import List, Dict
from queue import PriorityQueue
from dotenv import load_dotenv

load_dotenv(".env")

import os


class Retriever:
    def __init__(
        self, openai_api_key: str = None, model: str = "text-embedding-ada-002"
    ):
        if openai_api_key is None:
            openai_api_key = os.environ.get("OPENAI_API_KEY")
        self.embed = OpenAIEmbeddings(openai_api_key=openai_api_key, model=model)
        self.documents = dict()

    def add_tool(self, tool_name: str, api_info: Dict) -> None:
        if tool_name in self.documents:
            return
        document = api_info["name_for_model"] + ". " + api_info["description_for_model"]
        document_embedding = self.embed.embed_documents([document])
        self.documents[tool_name] = {
            "document": document,
            "embedding": document_embedding[0],
        }

    def query(self, query: str, topk: int = 3) -> List[str]:
        query_embedding = self.embed.embed_query(query)

        queue = PriorityQueue()
        for tool_name, tool_info in self.documents.items():
            tool_embedding = tool_info["embedding"]
            tool_sim = self.similarity(query_embedding, tool_embedding)
            queue.put([-tool_sim, tool_name])

        result = []
        for i in range(min(topk, len(queue.queue))):
            tool = queue.get()
            result.append(tool[1])

        return result

    def similarity(self, query: List[float], document: List[float]) -> float:
        return sum([i * j for i, j in zip(query, document)])