aaditkapoorbionlp commited on
Commit
90c54f6
·
1 Parent(s): 7bb26a8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -0
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel
3
+ import os
4
+ import pandas as pd
5
+ import numpy as np
6
+ from transformers import pipeline
7
+ from sklearn.metrics.pairwise import cosine_similarity, manhattan_distances, euclidean_distances
8
+
9
+
10
+
11
+ @st.cache
12
+ def load_model():
13
+ tokenizer = AutoTokenizer.from_pretrained("stanford-crfm/pubmedgpt")
14
+ model = AutoModel.from_pretrained("stanford-crfm/pubmedgpt")
15
+ return tokenizer, model
16
+
17
+ tokenizer, model = load_model()
18
+ pipe = pipeline('feature-extraction', model=model, tokenizer=tokenizer)
19
+ def get_embedding(desc):
20
+ return np.squeeze(pipe(desc)).mean(axis=0)
21
+
22
+
23
+ st.set_page_config(
24
+ page_title="Clinical Trials Best Match [Eye Diseases]",
25
+ page_icon="🧑‍💻",
26
+ layout="wide",
27
+ )
28
+
29
+ # Constants
30
+ embs = []
31
+
32
+ # Heading
33
+ st.title('Clinical Trials Search')
34
+
35
+
36
+ # Gene File, 128 dim embeddings
37
+ data = np.load("data.npy")
38
+
39
+
40
+ @st.cache(allow_output_mutation=True)
41
+ def get_sim(emb_desc, data):
42
+ ids = []
43
+ scores = []
44
+ for i in data:
45
+ score = cosine_similarity(emb_desc, i['data'])
46
+ ids.append(i['ids'])
47
+ scores.append(score)
48
+ df = pd.DataFrame(data={"url": ids, "scores": scores}).sort_values(by='scores')
49
+
50
+ return df
51
+
52
+ st.subheader("🖮 Enter your clinical trial study description")
53
+ text = st.text_area('Example')
54
+
55
+ with st.spinner():
56
+ emb = get_embedding(text)
57
+
58
+
59
+ st.subheader("💻 Hit Search")
60
+
61
+ if st.button("Compute"):
62
+ with st.spinner('Searching...'):
63
+ df = get_sim(emb, data=data)
64
+ st.dataframe(df)