Tonic commited on
Commit
92053d2
·
1 Parent(s): debaa04

wrap automodel for zerogpu

Browse files
Files changed (1) hide show
  1. langchainapp.py +14 -9
langchainapp.py CHANGED
@@ -2,7 +2,7 @@
2
  import spaces
3
  from torch.nn import DataParallel
4
  from torch import Tensor
5
- # from transformers import AutoTokenizer, AutoModel
6
  from huggingface_hub import InferenceClient
7
  from openai import OpenAI
8
  from langchain_community.embeddings import HuggingFaceInstructEmbeddings
@@ -43,18 +43,20 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
43
  ### Utils
44
  hf_token, yi_token = load_env_variables()
45
 
 
 
 
 
46
  @spaces.GPU
47
  def load_model():
48
- # Import AutoModel within the function to avoid issues with pickling in multiprocessing
49
- from transformers import AutoModel, AutoTokenizer
50
- tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token, trust_remote_code=True)
51
-
52
- return AutoModel.from_pretrained(model_name, token=hf_token, trust_remote_code=True).to(device)
53
 
54
  # Load model
55
  nvidiamodel = load_model()
56
- # Load model
57
- nvidiamodel = load_model()
58
  # nvidiamodel.set_pooling_include_prompt(include_prompt=False)
59
 
60
  def clear_cuda_cache():
@@ -66,7 +68,8 @@ chroma_client = chromadb.Client(Settings())
66
 
67
  # Create a collection
68
  chroma_collection = chroma_client.create_collection("all-my-documents")
69
-
 
70
  class MyEmbeddingFunction(EmbeddingFunction):
71
  def __init__(self, model_name: str, token: str, intention_client):
72
  self.model_name = model_name
@@ -156,6 +159,7 @@ def add_documents_to_chroma(documents: list, embedding_function: MyEmbeddingFunc
156
  )
157
 
158
  def query_chroma(query_text: str, embedding_function: MyEmbeddingFunction):
 
159
  query_embeddings, query_metadata = embedding_function.compute_embeddings(query_text)
160
  result_docs = chroma_collection.query(
161
  query_texts=[query_text],
@@ -208,6 +212,7 @@ def upload_documents(files):
208
  return "Documents uploaded and processed successfully!"
209
 
210
  def query_documents(query):
 
211
  results = query_chroma(query)
212
  return "\n\n".join([result.content for result in results])
213
 
 
2
  import spaces
3
  from torch.nn import DataParallel
4
  from torch import Tensor
5
+ from transformers import AutoTokenizer, AutoModel
6
  from huggingface_hub import InferenceClient
7
  from openai import OpenAI
8
  from langchain_community.embeddings import HuggingFaceInstructEmbeddings
 
43
  ### Utils
44
  hf_token, yi_token = load_env_variables()
45
 
46
+ tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token, trust_remote_code=True)
47
+ # Lazy load model
48
+ model = None
49
+
50
  @spaces.GPU
51
  def load_model():
52
+ global model
53
+ if model is None:
54
+ from transformers import AutoModel
55
+ model = AutoModel.from_pretrained(model_name, token=hf_token, trust_remote_code=True).to(device)
56
+ return model
57
 
58
  # Load model
59
  nvidiamodel = load_model()
 
 
60
  # nvidiamodel.set_pooling_include_prompt(include_prompt=False)
61
 
62
  def clear_cuda_cache():
 
68
 
69
  # Create a collection
70
  chroma_collection = chroma_client.create_collection("all-my-documents")
71
+
72
+ @spaces.GPU
73
  class MyEmbeddingFunction(EmbeddingFunction):
74
  def __init__(self, model_name: str, token: str, intention_client):
75
  self.model_name = model_name
 
159
  )
160
 
161
  def query_chroma(query_text: str, embedding_function: MyEmbeddingFunction):
162
+ model = load_model()
163
  query_embeddings, query_metadata = embedding_function.compute_embeddings(query_text)
164
  result_docs = chroma_collection.query(
165
  query_texts=[query_text],
 
212
  return "Documents uploaded and processed successfully!"
213
 
214
  def query_documents(query):
215
+ model = load_model()
216
  results = query_chroma(query)
217
  return "\n\n".join([result.content for result in results])
218