Spaces:
Runtime error
Runtime error
thankrandomness
commited on
Commit
·
9f09d69
1
Parent(s):
f2ca0de
split data
Browse files- app.py +8 -1
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
import torch
|
3 |
-
from datasets import load_dataset
|
4 |
from transformers import AutoTokenizer, AutoModel
|
5 |
import chromadb
|
6 |
import gradio as gr
|
@@ -15,6 +15,13 @@ def meanpooling(output, mask):
|
|
15 |
# Load the dataset
|
16 |
dataset = load_dataset("thankrandomness/mimic-iii-sample")
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
# Load the model and tokenizer
|
19 |
tokenizer = AutoTokenizer.from_pretrained("neuml/pubmedbert-base-embeddings-matryoshka")
|
20 |
model = AutoModel.from_pretrained("neuml/pubmedbert-base-embeddings-matryoshka")
|
|
|
1 |
import os
|
2 |
import torch
|
3 |
+
from datasets import load_dataset, DatasetDict
|
4 |
from transformers import AutoTokenizer, AutoModel
|
5 |
import chromadb
|
6 |
import gradio as gr
|
|
|
15 |
# Load the dataset
|
16 |
dataset = load_dataset("thankrandomness/mimic-iii-sample")
|
17 |
|
18 |
+
# Split the dataset into train and validation sets
|
19 |
+
split_dataset = dataset['train'].train_test_split(test_size=0.2, seed=42)
|
20 |
+
dataset = DatasetDict({
|
21 |
+
'train': split_dataset['train'],
|
22 |
+
'validation': split_dataset['test']
|
23 |
+
})
|
24 |
+
|
25 |
# Load the model and tokenizer
|
26 |
tokenizer = AutoTokenizer.from_pretrained("neuml/pubmedbert-base-embeddings-matryoshka")
|
27 |
model = AutoModel.from_pretrained("neuml/pubmedbert-base-embeddings-matryoshka")
|
requirements.txt
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
torch
|
2 |
transformers
|
|
|
3 |
chromadb
|
4 |
gradio
|
5 |
numpy
|
|
|
1 |
torch
|
2 |
transformers
|
3 |
+
datasets
|
4 |
chromadb
|
5 |
gradio
|
6 |
numpy
|