diff --git a/libs/community/langchain_community/document_loaders/csv_loader.py b/libs/community/langchain_community/document_loaders/csv_loader.py index fca2f1f0f9b..37e6f565531 100644 --- a/libs/community/langchain_community/document_loaders/csv_loader.py +++ b/libs/community/langchain_community/document_loaders/csv_loader.py @@ -97,7 +97,9 @@ class CSVLoader(BaseLoader): f"Source column '{self.source_column}' not found in CSV file." ) content = "\n".join( - f"{k.strip()}: {v.strip() if v is not None else v}" + f"""{k.strip() if k is not None else k}: {v.strip() + if isinstance(v, str) else ','.join(map(str.strip, v)) + if isinstance(v, list) else v}""" for k, v in row.items() if k not in self.metadata_columns ) diff --git a/libs/community/tests/unit_tests/document_loaders/test_csv_loader.py b/libs/community/tests/unit_tests/document_loaders/test_csv_loader.py index a9d4212c15e..a7ab65e35a4 100644 --- a/libs/community/tests/unit_tests/document_loaders/test_csv_loader.py +++ b/libs/community/tests/unit_tests/document_loaders/test_csv_loader.py @@ -85,6 +85,29 @@ class TestCSVLoader: # Assert assert result == expected_docs + def test_csv_loader_load_none_column_file(self) -> None: + # Setup + file_path = self._get_csv_file_path("test_none_col.csv") + expected_docs = [ + Document( + page_content="column1: value1\ncolumn2: value2\n" + "column3: value3\nNone: value4,value5", + metadata={"source": file_path, "row": 0}, + ), + Document( + page_content="column1: value6\ncolumn2: value7\n" + "column3: value8\nNone: value9", + metadata={"source": file_path, "row": 1}, + ), + ] + + # Exercise + loader = CSVLoader(file_path=file_path) + result = loader.load() + + # Assert + assert result == expected_docs + # utility functions def _get_csv_file_path(self, file_name: str) -> str: return str(Path(__file__).resolve().parent / "test_docs" / "csv" / file_name) diff --git a/libs/community/tests/unit_tests/document_loaders/test_directory_loader.py b/libs/community/tests/unit_tests/document_loaders/test_directory_loader.py index d8795a6ce3b..e385e77b33e 100644 --- a/libs/community/tests/unit_tests/document_loaders/test_directory_loader.py +++ b/libs/community/tests/unit_tests/document_loaders/test_directory_loader.py @@ -58,6 +58,22 @@ class TestDirectoryLoader: "row": 1, }, ), + Document( + page_content="column1: value1\ncolumn2: value2\n" + "column3: value3\nNone: value4,value5", + metadata={ + "source": self._get_csv_file_path("test_none_col.csv"), + "row": 0, + }, + ), + Document( + page_content="column1: value6\ncolumn2: value7\n" + "column3: value8\nNone: value9", + metadata={ + "source": self._get_csv_file_path("test_none_col.csv"), + "row": 1, + }, + ), ] loaded_docs = sorted(loader.load(), key=lambda doc: doc.metadata["source"]) @@ -141,6 +157,20 @@ class TestDirectoryLoader: metadata={"source": file_path, "row": 0}, ) ] + file_name = "test_none_col.csv" + file_path = self._get_csv_file_path(file_name) + expected_docs += [ + Document( + page_content="column1: value1\ncolumn2: value2\n" + "column3: value3\nNone: value4,value5", + metadata={"source": file_path, "row": 0}, + ), + Document( + page_content="column1: value6\ncolumn2: value7\n" + "column3: value8\nNone: value9", + metadata={"source": file_path, "row": 1}, + ), + ] # Assert loader = DirectoryLoader(dir_path, loader_cls=CSVLoader) diff --git a/libs/community/tests/unit_tests/document_loaders/test_docs/csv/test_none_col.csv b/libs/community/tests/unit_tests/document_loaders/test_docs/csv/test_none_col.csv new file mode 100644 index 00000000000..a6a3d77e050 --- /dev/null +++ b/libs/community/tests/unit_tests/document_loaders/test_docs/csv/test_none_col.csv @@ -0,0 +1,3 @@ +column1,column2,column3 +value1,value2,value3,value4,value5 +value6,value7,value8,value9