srinivasbt commited on
Commit
125f3f2
·
verified ·
1 Parent(s): 059690e

Initial app.py changes

Browse files

Created an App to tokenize, create embeddings using a specialized Medical model.

Files changed (1) hide show
  1. app.py +26 -0
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoModel, AutoTokenizer
3
+ import torch
4
+
5
+ # The model name
6
+ model_name = "emilyalsentzer/Bio_ClinicalBERT"
7
+
8
+ # Load the tokenizer and model
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModel.from_pretrained(model_name)
11
+
12
+ # Streamlit app UI
13
+ st.title("Medical Text Analysis with ClinicalBERT")
14
+ st.write("Type in a medical text input to get the CLS token embedding.")
15
+
16
+ # User input
17
+ text = st.text_input("Enter Medical Text")
18
+
19
+ if st.button("Predict"):
20
+ if text.strip():
21
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
22
+ outputs = model(**inputs)
23
+ cls_embedding = outputs.last_hidden_state[:, 0, :].detach().numpy()
24
+ st.write(f"CLS Embedding (first 5 values): {cls_embedding[0][:5]}")
25
+ else:
26
+ st.write("Please enter some text.")