diff --git a/libs/community/langchain_community/document_loaders/csv_loader.py b/libs/community/langchain_community/document_loaders/csv_loader.py index 37e6f565531..efc8507aa6f 100644 --- a/libs/community/langchain_community/document_loaders/csv_loader.py +++ b/libs/community/langchain_community/document_loaders/csv_loader.py @@ -16,8 +16,9 @@ from langchain_community.document_loaders.unstructured import ( class CSVLoader(BaseLoader): """Load a `CSV` file into a list of Documents. - Each document represents one row of the CSV file. Every row is converted into a - key/value pair and outputted to a new line in the document's page_content. + Each document represents one row of the CSV file. Every row is converted + into a key/value pair and outputted to a new line in the document's + page_content. The source for each document loaded from csv is set to the value of the `file_path` argument for all documents by default. @@ -32,6 +33,67 @@ class CSVLoader(BaseLoader): column1: value1 column2: value2 column3: value3 + + Instantiate: + .. code-block:: python + + from langchain_community.document_loaders import CSVLoader + + loader = CSVLoader(file_path='./hw_200.csv', + csv_args={ + 'delimiter': ',', + 'quotechar': '"', + 'fieldnames': ['Index', 'Height', 'Weight'] + }) + + Load: + .. code-block:: python + + docs = loader.load() + print(docs[0].page_content[:100]) + print(docs[0].metadata) + + .. code-block:: python + + Index: Index + Height: Height(Inches)" + Weight: "Weight(Pounds)" + {'source': './hw_200.csv', 'row': 0} + + Async load: + .. code-block:: python + + docs = await loader.aload() + print(docs[0].page_content[:100]) + print(docs[0].metadata) + + .. code-block:: python + + Index: Index + Height: Height(Inches)" + Weight: "Weight(Pounds)" + {'source': './hw_200.csv', 'row': 0} + + Lazy load: + .. code-block:: python + + docs = [] + docs_lazy = loader.lazy_load() + + # async variant: + # docs_lazy = await loader.alazy_load() + + for doc in docs_lazy: + docs.append(doc) + print(docs[0].page_content[:100]) + print(docs[0].metadata) + + .. code-block:: python + + Index: Index + Height: Height(Inches)" + Weight: "Weight(Pounds)" + {'source': './hw_200.csv', 'row': 0} """ def __init__( diff --git a/libs/community/langchain_community/document_loaders/json_loader.py b/libs/community/langchain_community/document_loaders/json_loader.py index 30eb1962553..839fe482f62 100644 --- a/libs/community/langchain_community/document_loaders/json_loader.py +++ b/libs/community/langchain_community/document_loaders/json_loader.py @@ -8,12 +8,76 @@ from langchain_community.document_loaders.base import BaseLoader class JSONLoader(BaseLoader): - """Load a `JSON` file using a `jq` schema. + """ + Load a `JSON` file using a `jq` schema. - Example: - [{"text": ...}, {"text": ...}, {"text": ...}] -> schema = .[].text - {"key": [{"text": ...}, {"text": ...}, {"text": ...}]} -> schema = .key[].text - ["", "", ""] -> schema = .[] + Setup: + .. code-block:: bash + + pip install -U jq + + Instantiate: + .. code-block:: python + + from langchain_community.document_loaders import JSONLoader + import json + from pathlib import Path + + file_path='./sample_quiz.json' + data = json.loads(Path(file_path).read_text()) + loader = JSONLoader( + file_path=file_path, + jq_schema='.quiz', + text_content=False) + + Load: + .. code-block:: python + + docs = loader.load() + print(docs[0].page_content[:100]) + print(docs[0].metadata) + + .. code-block:: python + + {"sport": {"q1": {"question": "Which one is correct team name in + NBA?", "options": ["New York Bulls" + {'source': '/sample_quiz + .json', 'seq_num': 1} + + Async load: + .. code-block:: python + + docs = await loader.aload() + print(docs[0].page_content[:100]) + print(docs[0].metadata) + + .. code-block:: python + + {"sport": {"q1": {"question": "Which one is correct team name in + NBA?", "options": ["New York Bulls" + {'source': '/sample_quizg + .json', 'seq_num': 1} + + Lazy load: + .. code-block:: python + + docs = [] + docs_lazy = loader.lazy_load() + + # async variant: + # docs_lazy = await loader.alazy_load() + + for doc in docs_lazy: + docs.append(doc) + print(docs[0].page_content[:100]) + print(docs[0].metadata) + + .. code-block:: python + + {"sport": {"q1": {"question": "Which one is correct team name in + NBA?", "options": ["New York Bulls" + {'source': '/sample_quiz + .json', 'seq_num': 1} """ def __init__(