Spaces:
Running
Running
File size: 449 Bytes
170d9a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
from medrag_multi_modal.semantic_chunking import SemanticChunker
def test_semantic_chunking():
chunker = SemanticChunker(chunk_size=256)
dataset = chunker.chunk(document_dataset="geekyrakshit/grays-anatomy-test")
assert dataset.num_rows == 49
assert dataset.column_names == [
"document_idx",
"text",
"page_idx",
"document_name",
"file_path",
"file_url",
"loader_name",
]
|