From e25a5966b5e0023f61577d47850b70d809526a2b Mon Sep 17 00:00:00 2001
From: Lucas Tucker <47258766+lucas-tucker@users.noreply.github.com>
Date: Mon, 17 Jun 2024 22:26:36 -0500
Subject: [PATCH] docs: Standardize DocumentLoader docstrings (#22932)

**Standardizing DocumentLoader docstrings (of which there are many)**

This PR addresses issue #22866 and adds docstrings according to the
issue's specified format (in the appendix) for files csv_loader.py and
json_loader.py in langchain_community.document_loaders. In particular,
the following sections have been added to both CSVLoader and JSONLoader:
Setup, Instantiate, Load, Async load, and Lazy load. It may be worth
adding a 'Metadata' section to the JSONLoader docstring to clarify how
we want to extract the JSON metadata (using the `metadata_func`
argument). The files I used to walkthrough the various sections were
`example_2.json` from
[HERE](https://support.oneskyapp.com/hc/en-us/articles/208047697-JSON-sample-files)
and `hw_200.csv` from
[HERE](https://people.sc.fsu.edu/~jburkardt/data/csv/csv.html).

---------

Co-authored-by: lucast2021 <lucast2021@headroyce.org>
Co-authored-by: isaac hershenson <ihershenson@hmc.edu>
---
 .../document_loaders/csv_loader.py            | 66 ++++++++++++++++-
 .../document_loaders/json_loader.py           | 74 +++++++++++++++++--
 2 files changed, 133 insertions(+), 7 deletions(-)

diff --git a/libs/community/langchain_community/document_loaders/csv_loader.py b/libs/community/langchain_community/document_loaders/csv_loader.py
index 37e6f565531..efc8507aa6f 100644
--- a/libs/community/langchain_community/document_loaders/csv_loader.py
+++ b/libs/community/langchain_community/document_loaders/csv_loader.py
@@ -16,8 +16,9 @@ from langchain_community.document_loaders.unstructured import (
 class CSVLoader(BaseLoader):
     """Load a `CSV` file into a list of Documents.
 
-    Each document represents one row of the CSV file. Every row is converted into a
-    key/value pair and outputted to a new line in the document's page_content.
+    Each document represents one row of the CSV file. Every row is converted
+    into a key/value pair and outputted to a new line in the document's
+    page_content.
 
     The source for each document loaded from csv is set to the value of the
     `file_path` argument for all documents by default.
@@ -32,6 +33,67 @@ class CSVLoader(BaseLoader):
             column1: value1
             column2: value2
             column3: value3
+
+    Instantiate:
+        .. code-block:: python
+
+            from langchain_community.document_loaders import CSVLoader
+
+            loader = CSVLoader(file_path='./hw_200.csv',
+                csv_args={
+                'delimiter': ',',
+                'quotechar': '"',
+                'fieldnames': ['Index', 'Height', 'Weight']
+            })
+
+    Load:
+        .. code-block:: python
+
+            docs = loader.load()
+            print(docs[0].page_content[:100])
+            print(docs[0].metadata)
+
+        .. code-block:: python
+
+            Index: Index
+            Height: Height(Inches)"
+            Weight: "Weight(Pounds)"
+            {'source': './hw_200.csv', 'row': 0}
+
+    Async load:
+        .. code-block:: python
+
+            docs = await loader.aload()
+            print(docs[0].page_content[:100])
+            print(docs[0].metadata)
+
+        .. code-block:: python
+
+            Index: Index
+            Height: Height(Inches)"
+            Weight: "Weight(Pounds)"
+            {'source': './hw_200.csv', 'row': 0}
+
+    Lazy load:
+        .. code-block:: python
+
+            docs = []
+            docs_lazy = loader.lazy_load()
+
+            # async variant:
+            # docs_lazy = await loader.alazy_load()
+
+            for doc in docs_lazy:
+                docs.append(doc)
+            print(docs[0].page_content[:100])
+            print(docs[0].metadata)
+
+        .. code-block:: python
+
+            Index: Index
+            Height: Height(Inches)"
+            Weight: "Weight(Pounds)"
+            {'source': './hw_200.csv', 'row': 0}
     """
 
     def __init__(
diff --git a/libs/community/langchain_community/document_loaders/json_loader.py b/libs/community/langchain_community/document_loaders/json_loader.py
index 30eb1962553..839fe482f62 100644
--- a/libs/community/langchain_community/document_loaders/json_loader.py
+++ b/libs/community/langchain_community/document_loaders/json_loader.py
@@ -8,12 +8,76 @@ from langchain_community.document_loaders.base import BaseLoader
 
 
 class JSONLoader(BaseLoader):
-    """Load a `JSON` file using a `jq` schema.
+    """
+    Load a `JSON` file using a `jq` schema.
 
-    Example:
-        [{"text": ...}, {"text": ...}, {"text": ...}] -> schema = .[].text
-        {"key": [{"text": ...}, {"text": ...}, {"text": ...}]} -> schema = .key[].text
-        ["", "", ""] -> schema = .[]
+    Setup:
+        .. code-block:: bash
+
+            pip install -U jq
+
+    Instantiate:
+        .. code-block:: python
+
+            from langchain_community.document_loaders import JSONLoader
+            import json
+            from pathlib import Path
+
+            file_path='./sample_quiz.json'
+            data = json.loads(Path(file_path).read_text())
+            loader = JSONLoader(
+                     file_path=file_path,
+                     jq_schema='.quiz',
+                     text_content=False)
+
+    Load:
+        .. code-block:: python
+
+            docs = loader.load()
+            print(docs[0].page_content[:100])
+            print(docs[0].metadata)
+
+        .. code-block:: python
+
+            {"sport": {"q1": {"question": "Which one is correct team name in
+            NBA?", "options": ["New York Bulls"
+            {'source': '/sample_quiz
+            .json', 'seq_num': 1}
+
+    Async load:
+        .. code-block:: python
+
+            docs = await loader.aload()
+            print(docs[0].page_content[:100])
+            print(docs[0].metadata)
+
+        .. code-block:: python
+
+            {"sport": {"q1": {"question": "Which one is correct team name in
+            NBA?", "options": ["New York Bulls"
+            {'source': '/sample_quizg
+            .json', 'seq_num': 1}
+
+    Lazy load:
+        .. code-block:: python
+
+            docs = []
+            docs_lazy = loader.lazy_load()
+
+            # async variant:
+            # docs_lazy = await loader.alazy_load()
+
+            for doc in docs_lazy:
+                docs.append(doc)
+            print(docs[0].page_content[:100])
+            print(docs[0].metadata)
+
+        .. code-block:: python
+
+            {"sport": {"q1": {"question": "Which one is correct team name in
+            NBA?", "options": ["New York Bulls"
+            {'source': '/sample_quiz
+            .json', 'seq_num': 1}
     """
 
     def __init__(