Spaces:
Runtime error
Runtime error
"""Wrapper around weaviate vector database.""" | |
from __future__ import annotations | |
from typing import Any, Dict, Iterable, List, Optional | |
from uuid import uuid4 | |
from langchain.docstore.document import Document | |
from langchain.embeddings.base import Embeddings | |
from langchain.vectorstores.base import VectorStore | |
class Weaviate(VectorStore): | |
"""Wrapper around Weaviate vector database. | |
To use, you should have the ``weaviate-client`` python package installed. | |
Example: | |
.. code-block:: python | |
import weaviate | |
from langchain.vectorstores import Weaviate | |
client = weaviate.Client(url=os.environ["WEAVIATE_URL"], ...) | |
weaviate = Weaviate(client, index_name, text_key) | |
""" | |
def __init__( | |
self, | |
client: Any, | |
index_name: str, | |
text_key: str, | |
attributes: Optional[List[str]] = None, | |
): | |
"""Initialize with Weaviate client.""" | |
try: | |
import weaviate | |
except ImportError: | |
raise ValueError( | |
"Could not import weaviate python package. " | |
"Please install it with `pip install weaviate-client`." | |
) | |
if not isinstance(client, weaviate.Client): | |
raise ValueError( | |
f"client should be an instance of weaviate.Client, got {type(client)}" | |
) | |
self._client = client | |
self._index_name = index_name | |
self._text_key = text_key | |
self._query_attrs = [self._text_key] | |
if attributes is not None: | |
self._query_attrs.extend(attributes) | |
def add_texts( | |
self, | |
texts: Iterable[str], | |
metadatas: Optional[List[dict]] = None, | |
**kwargs: Any, | |
) -> List[str]: | |
"""Upload texts with metadata (properties) to Weaviate.""" | |
from weaviate.util import get_valid_uuid | |
with self._client.batch as batch: | |
ids = [] | |
for i, doc in enumerate(texts): | |
data_properties = { | |
self._text_key: doc, | |
} | |
if metadatas is not None: | |
for key in metadatas[i].keys(): | |
data_properties[key] = metadatas[i][key] | |
_id = get_valid_uuid(uuid4()) | |
batch.add_data_object(data_properties, self._index_name, _id) | |
ids.append(_id) | |
return ids | |
def similarity_search( | |
self, query: str, k: int = 4, **kwargs: Any | |
) -> List[Document]: | |
"""Look up similar documents in weaviate.""" | |
content: Dict[str, Any] = {"concepts": [query]} | |
if kwargs.get("search_distance"): | |
content["certainty"] = kwargs.get("search_distance") | |
query_obj = self._client.query.get(self._index_name, self._query_attrs) | |
result = query_obj.with_near_text(content).with_limit(k).do() | |
docs = [] | |
for res in result["data"]["Get"][self._index_name]: | |
text = res.pop(self._text_key) | |
docs.append(Document(page_content=text, metadata=res)) | |
return docs | |
def from_texts( | |
cls, | |
texts: List[str], | |
embedding: Embeddings, | |
metadatas: Optional[List[dict]] = None, | |
**kwargs: Any, | |
) -> VectorStore: | |
"""Not implemented for Weaviate yet.""" | |
raise NotImplementedError("weaviate does not currently support `from_texts`.") | |