mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-26 16:43:35 +00:00
docs: Standardize DocumentLoader docstrings (#22932)
**Standardizing DocumentLoader docstrings (of which there are many)** This PR addresses issue #22866 and adds docstrings according to the issue's specified format (in the appendix) for files csv_loader.py and json_loader.py in langchain_community.document_loaders. In particular, the following sections have been added to both CSVLoader and JSONLoader: Setup, Instantiate, Load, Async load, and Lazy load. It may be worth adding a 'Metadata' section to the JSONLoader docstring to clarify how we want to extract the JSON metadata (using the `metadata_func` argument). The files I used to walkthrough the various sections were `example_2.json` from [HERE](https://support.oneskyapp.com/hc/en-us/articles/208047697-JSON-sample-files) and `hw_200.csv` from [HERE](https://people.sc.fsu.edu/~jburkardt/data/csv/csv.html). --------- Co-authored-by: lucast2021 <lucast2021@headroyce.org> Co-authored-by: isaac hershenson <ihershenson@hmc.edu>
This commit is contained in:
parent
a56ff199a7
commit
e25a5966b5
@ -16,8 +16,9 @@ from langchain_community.document_loaders.unstructured import (
|
||||
class CSVLoader(BaseLoader):
|
||||
"""Load a `CSV` file into a list of Documents.
|
||||
|
||||
Each document represents one row of the CSV file. Every row is converted into a
|
||||
key/value pair and outputted to a new line in the document's page_content.
|
||||
Each document represents one row of the CSV file. Every row is converted
|
||||
into a key/value pair and outputted to a new line in the document's
|
||||
page_content.
|
||||
|
||||
The source for each document loaded from csv is set to the value of the
|
||||
`file_path` argument for all documents by default.
|
||||
@ -32,6 +33,67 @@ class CSVLoader(BaseLoader):
|
||||
column1: value1
|
||||
column2: value2
|
||||
column3: value3
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.document_loaders import CSVLoader
|
||||
|
||||
loader = CSVLoader(file_path='./hw_200.csv',
|
||||
csv_args={
|
||||
'delimiter': ',',
|
||||
'quotechar': '"',
|
||||
'fieldnames': ['Index', 'Height', 'Weight']
|
||||
})
|
||||
|
||||
Load:
|
||||
.. code-block:: python
|
||||
|
||||
docs = loader.load()
|
||||
print(docs[0].page_content[:100])
|
||||
print(docs[0].metadata)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
Index: Index
|
||||
Height: Height(Inches)"
|
||||
Weight: "Weight(Pounds)"
|
||||
{'source': './hw_200.csv', 'row': 0}
|
||||
|
||||
Async load:
|
||||
.. code-block:: python
|
||||
|
||||
docs = await loader.aload()
|
||||
print(docs[0].page_content[:100])
|
||||
print(docs[0].metadata)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
Index: Index
|
||||
Height: Height(Inches)"
|
||||
Weight: "Weight(Pounds)"
|
||||
{'source': './hw_200.csv', 'row': 0}
|
||||
|
||||
Lazy load:
|
||||
.. code-block:: python
|
||||
|
||||
docs = []
|
||||
docs_lazy = loader.lazy_load()
|
||||
|
||||
# async variant:
|
||||
# docs_lazy = await loader.alazy_load()
|
||||
|
||||
for doc in docs_lazy:
|
||||
docs.append(doc)
|
||||
print(docs[0].page_content[:100])
|
||||
print(docs[0].metadata)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
Index: Index
|
||||
Height: Height(Inches)"
|
||||
Weight: "Weight(Pounds)"
|
||||
{'source': './hw_200.csv', 'row': 0}
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
@ -8,12 +8,76 @@ from langchain_community.document_loaders.base import BaseLoader
|
||||
|
||||
|
||||
class JSONLoader(BaseLoader):
|
||||
"""Load a `JSON` file using a `jq` schema.
|
||||
"""
|
||||
Load a `JSON` file using a `jq` schema.
|
||||
|
||||
Example:
|
||||
[{"text": ...}, {"text": ...}, {"text": ...}] -> schema = .[].text
|
||||
{"key": [{"text": ...}, {"text": ...}, {"text": ...}]} -> schema = .key[].text
|
||||
["", "", ""] -> schema = .[]
|
||||
Setup:
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U jq
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.document_loaders import JSONLoader
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
file_path='./sample_quiz.json'
|
||||
data = json.loads(Path(file_path).read_text())
|
||||
loader = JSONLoader(
|
||||
file_path=file_path,
|
||||
jq_schema='.quiz',
|
||||
text_content=False)
|
||||
|
||||
Load:
|
||||
.. code-block:: python
|
||||
|
||||
docs = loader.load()
|
||||
print(docs[0].page_content[:100])
|
||||
print(docs[0].metadata)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{"sport": {"q1": {"question": "Which one is correct team name in
|
||||
NBA?", "options": ["New York Bulls"
|
||||
{'source': '/sample_quiz
|
||||
.json', 'seq_num': 1}
|
||||
|
||||
Async load:
|
||||
.. code-block:: python
|
||||
|
||||
docs = await loader.aload()
|
||||
print(docs[0].page_content[:100])
|
||||
print(docs[0].metadata)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{"sport": {"q1": {"question": "Which one is correct team name in
|
||||
NBA?", "options": ["New York Bulls"
|
||||
{'source': '/sample_quizg
|
||||
.json', 'seq_num': 1}
|
||||
|
||||
Lazy load:
|
||||
.. code-block:: python
|
||||
|
||||
docs = []
|
||||
docs_lazy = loader.lazy_load()
|
||||
|
||||
# async variant:
|
||||
# docs_lazy = await loader.alazy_load()
|
||||
|
||||
for doc in docs_lazy:
|
||||
docs.append(doc)
|
||||
print(docs[0].page_content[:100])
|
||||
print(docs[0].metadata)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{"sport": {"q1": {"question": "Which one is correct team name in
|
||||
NBA?", "options": ["New York Bulls"
|
||||
{'source': '/sample_quiz
|
||||
.json', 'seq_num': 1}
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
Loading…
Reference in New Issue
Block a user