Spaces:
Runtime error
Runtime error
"""Test functionality related to combining documents.""" | |
from typing import Any, List, Tuple | |
import pytest | |
from langchain.chains.combine_documents.map_reduce import ( | |
_collapse_docs, | |
_split_list_of_docs, | |
) | |
from langchain.docstore.document import Document | |
def _fake_docs_len_func(docs: List[Document]) -> int: | |
return len(_fake_combine_docs_func(docs)[0]) | |
def _fake_combine_docs_func(docs: List[Document], **kwargs: Any) -> Tuple[str, dict]: | |
return "".join([d.page_content for d in docs]), {} | |
def test__split_list_long_single_doc() -> None: | |
"""Test splitting of a long single doc.""" | |
docs = [Document(page_content="foo" * 100)] | |
with pytest.raises(ValueError): | |
_split_list_of_docs(docs, _fake_docs_len_func, 100) | |
def test__split_list_long_pair_doc() -> None: | |
"""Test splitting of a list with two medium docs.""" | |
docs = [Document(page_content="foo" * 30)] * 2 | |
with pytest.raises(ValueError): | |
_split_list_of_docs(docs, _fake_docs_len_func, 100) | |
def test__split_list_single_doc() -> None: | |
"""Test splitting works with just a single doc.""" | |
docs = [Document(page_content="foo")] | |
doc_list = _split_list_of_docs(docs, _fake_docs_len_func, 100) | |
assert doc_list == [docs] | |
def test__split_list_double_doc() -> None: | |
"""Test splitting works with just two docs.""" | |
docs = [Document(page_content="foo"), Document(page_content="bar")] | |
doc_list = _split_list_of_docs(docs, _fake_docs_len_func, 100) | |
assert doc_list == [docs] | |
def test__split_list_works_correctly() -> None: | |
"""Test splitting works correctly.""" | |
docs = [ | |
Document(page_content="foo"), | |
Document(page_content="bar"), | |
Document(page_content="baz"), | |
Document(page_content="foo" * 2), | |
Document(page_content="bar"), | |
Document(page_content="baz"), | |
] | |
doc_list = _split_list_of_docs(docs, _fake_docs_len_func, 10) | |
expected_result = [ | |
# Test a group of three. | |
[ | |
Document(page_content="foo"), | |
Document(page_content="bar"), | |
Document(page_content="baz"), | |
], | |
# Test a group of two, where one is bigger. | |
[Document(page_content="foo" * 2), Document(page_content="bar")], | |
# Test no errors on last | |
[Document(page_content="baz")], | |
] | |
assert doc_list == expected_result | |
def test__collapse_docs_no_metadata() -> None: | |
"""Test collapse documents functionality when no metadata.""" | |
docs = [ | |
Document(page_content="foo"), | |
Document(page_content="bar"), | |
Document(page_content="baz"), | |
] | |
output = _collapse_docs(docs, _fake_combine_docs_func) | |
expected_output = Document(page_content="foobarbaz") | |
assert output == expected_output | |
def test__collapse_docs_one_doc() -> None: | |
"""Test collapse documents functionality when only one document present.""" | |
# Test with no metadata. | |
docs = [Document(page_content="foo")] | |
output = _collapse_docs(docs, _fake_combine_docs_func) | |
assert output == docs[0] | |
# Test with metadata. | |
docs = [Document(page_content="foo", metadata={"source": "a"})] | |
output = _collapse_docs(docs, _fake_combine_docs_func) | |
assert output == docs[0] | |
def test__collapse_docs_metadata() -> None: | |
"""Test collapse documents functionality when metadata exists.""" | |
metadata1 = {"source": "a", "foo": 2, "bar": "1", "extra1": "foo"} | |
metadata2 = {"source": "b", "foo": "3", "bar": 2, "extra2": "bar"} | |
docs = [ | |
Document(page_content="foo", metadata=metadata1), | |
Document(page_content="bar", metadata=metadata2), | |
] | |
output = _collapse_docs(docs, _fake_combine_docs_func) | |
expected_metadata = { | |
"source": "a, b", | |
"foo": "2, 3", | |
"bar": "1, 2", | |
"extra1": "foo", | |
"extra2": "bar", | |
} | |
expected_output = Document(page_content="foobar", metadata=expected_metadata) | |
assert output == expected_output | |