monsoon-nlp commited on
Commit
3d872a7
·
1 Parent(s): 0d78964

hide sgpt for now

Browse files
Files changed (1) hide show
  1. app.py +35 -34
app.py CHANGED
@@ -13,9 +13,9 @@ pinecone.init(
13
  environment=os.environ.get('PINECONE_ENV', '')
14
  )
15
 
16
- model = AutoModel.from_pretrained('monsoon-nlp/gpt-nyc')
17
- tokenizer = AutoTokenizer.from_pretrained('monsoon-nlp/gpt-nyc')
18
- zos = np.zeros(4096-1024).tolist()
19
 
20
  def list_me(matches):
21
  result = ''
@@ -43,38 +43,39 @@ def query(question):
43
  )
44
 
45
  # SGPT search
46
- batch_tokens = tokenizer(
47
- [question],
48
- padding=True,
49
- truncation=True,
50
- return_tensors="pt"
51
- )
52
- with torch.no_grad():
53
- last_hidden_state = model(**batch_tokens, output_hidden_states=True, return_dict=True).last_hidden_state
54
- weights = (
55
- torch.arange(start=1, end=last_hidden_state.shape[1] + 1)
56
- .unsqueeze(0)
57
- .unsqueeze(-1)
58
- .expand(last_hidden_state.size())
59
- .float().to(last_hidden_state.device)
60
- )
61
- input_mask_expanded = (
62
- batch_tokens["attention_mask"]
63
- .unsqueeze(-1)
64
- .expand(last_hidden_state.size())
65
- .float()
66
- )
67
- sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded * weights, dim=1)
68
- sum_mask = torch.sum(input_mask_expanded * weights, dim=1)
69
- embeddings = sum_embeddings / sum_mask
70
- closest_sgpt = index.query(
71
- top_k=2,
72
- include_metadata=True,
73
- namespace="mini",
74
- vector=embeddings[0].tolist() + zos,
75
- )
76
 
77
- return '<h3>Cohere</h3><ul>' + list_me(closest['matches']) + '</ul><h3>SGPT</h3><ul>' + list_me(closest_sgpt['matches']) + '</ul>'
 
78
 
79
 
80
  iface = gr.Interface(
 
13
  environment=os.environ.get('PINECONE_ENV', '')
14
  )
15
 
16
+ # model = AutoModel.from_pretrained('monsoon-nlp/gpt-nyc')
17
+ # tokenizer = AutoTokenizer.from_pretrained('monsoon-nlp/gpt-nyc')
18
+ # zos = np.zeros(4096-1024).tolist()
19
 
20
  def list_me(matches):
21
  result = ''
 
43
  )
44
 
45
  # SGPT search
46
+ # batch_tokens = tokenizer(
47
+ # [question],
48
+ # padding=True,
49
+ # truncation=True,
50
+ # return_tensors="pt"
51
+ # )
52
+ # with torch.no_grad():
53
+ # last_hidden_state = model(**batch_tokens, output_hidden_states=True, return_dict=True).last_hidden_state
54
+ # weights = (
55
+ # torch.arange(start=1, end=last_hidden_state.shape[1] + 1)
56
+ # .unsqueeze(0)
57
+ # .unsqueeze(-1)
58
+ # .expand(last_hidden_state.size())
59
+ # .float().to(last_hidden_state.device)
60
+ # )
61
+ # input_mask_expanded = (
62
+ # batch_tokens["attention_mask"]
63
+ # .unsqueeze(-1)
64
+ # .expand(last_hidden_state.size())
65
+ # .float()
66
+ # )
67
+ # sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded * weights, dim=1)
68
+ # sum_mask = torch.sum(input_mask_expanded * weights, dim=1)
69
+ # embeddings = sum_embeddings / sum_mask
70
+ # closest_sgpt = index.query(
71
+ # top_k=2,
72
+ # include_metadata=True,
73
+ # namespace="mini",
74
+ # vector=embeddings[0].tolist() + zos,
75
+ # )
76
 
77
+ return '<h3>Cohere</h3><ul>' + list_me(closest['matches']) + '</ul>'
78
+ #'<h3>SGPT</h3><ul>' + list_me(closest_sgpt['matches']) + '</ul>'
79
 
80
 
81
  iface = gr.Interface(