Factor out doc formatting and add validation (#3026)

@cnhhoang850 slightly more generic fix for #2944, works for whatever the
expected metadata keys are not just `source`
This commit is contained in:
Davis Chase
2023-04-17 20:28:01 -07:00
committed by GitHub
parent 3453b7457c
commit 19c85aa990
4 changed files with 53 additions and 18 deletions

View File

@@ -4,6 +4,8 @@ from typing import Any, List
import pytest
from langchain import PromptTemplate
from langchain.chains.combine_documents.base import format_document
from langchain.chains.combine_documents.map_reduce import (
_collapse_docs,
_split_list_of_docs,
@@ -116,3 +118,24 @@ def test__collapse_docs_metadata() -> None:
}
expected_output = Document(page_content="foobar", metadata=expected_metadata)
assert output == expected_output
def test_format_doc_with_metadata() -> None:
"""Test format doc on a valid document."""
doc = Document(page_content="foo", metadata={"bar": "baz"})
prompt = PromptTemplate(
input_variables=["page_content", "bar"], template="{page_content}, {bar}"
)
expected_output = "foo, baz"
output = format_document(doc, prompt)
assert output == expected_output
def test_format_doc_missing_metadata() -> None:
"""Test format doc on a document with missing metadata."""
doc = Document(page_content="foo")
prompt = PromptTemplate(
input_variables=["page_content", "bar"], template="{page_content}, {bar}"
)
with pytest.raises(ValueError):
format_document(doc, prompt)