File size: 449 Bytes
170d9a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
from medrag_multi_modal.semantic_chunking import SemanticChunker


def test_semantic_chunking():
    chunker = SemanticChunker(chunk_size=256)
    dataset = chunker.chunk(document_dataset="geekyrakshit/grays-anatomy-test")
    assert dataset.num_rows == 49
    assert dataset.column_names == [
        "document_idx",
        "text",
        "page_idx",
        "document_name",
        "file_path",
        "file_url",
        "loader_name",
    ]