mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-10 11:10:23 +00:00
Compare commits
42 Commits
legacy-doc
...
cc/depreca
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
39d1759b66 | ||
|
|
7005c4fe5b | ||
|
|
1eace6523d | ||
|
|
bacf4c58ef | ||
|
|
b71b5bd3d7 | ||
|
|
31364de10c | ||
|
|
8a70754dfe | ||
|
|
9bd4459f9a | ||
|
|
50c1ecc5f1 | ||
|
|
f51a9024ae | ||
|
|
8afbab4cf6 | ||
|
|
66e30efa61 | ||
|
|
ba167dc158 | ||
|
|
44f69063b1 | ||
|
|
f18b77fd59 | ||
|
|
966b408634 | ||
|
|
bd261456f6 | ||
|
|
ec8ffc8f40 | ||
|
|
2494cecabf | ||
|
|
df632b8cde | ||
|
|
1050e890c6 | ||
|
|
c4779f5b9c | ||
|
|
0a99935794 | ||
|
|
63aba3fe5b | ||
|
|
dc80be5efe | ||
|
|
ab29ee79a3 | ||
|
|
1d3f7231b8 | ||
|
|
a58d4ba340 | ||
|
|
d178fb9dc3 | ||
|
|
414154fa59 | ||
|
|
94c9cb7321 | ||
|
|
012929551c | ||
|
|
63c483ea01 | ||
|
|
eec7bb4f51 | ||
|
|
f0f125dac7 | ||
|
|
f4196f1fb8 | ||
|
|
d0ad713937 | ||
|
|
ddd7919f6a | ||
|
|
493e474063 | ||
|
|
15254d1027 | ||
|
|
d38c9c7026 | ||
|
|
d249318f94 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -172,6 +172,8 @@ docs/api_reference/*/
|
||||
!docs/api_reference/_static/
|
||||
!docs/api_reference/templates/
|
||||
!docs/api_reference/themes/
|
||||
!docs/api_reference/_extensions/
|
||||
!docs/api_reference/scripts/
|
||||
docs/docs/build
|
||||
docs/docs/node_modules
|
||||
docs/docs/yarn.lock
|
||||
|
||||
5
Makefile
5
Makefile
@@ -31,6 +31,7 @@ docs_linkcheck:
|
||||
api_docs_build:
|
||||
poetry run python docs/api_reference/create_api_rst.py
|
||||
cd docs/api_reference && poetry run make html
|
||||
poetry run python docs/api_reference/scripts/custom_formatter.py docs/api_reference/_build/html/
|
||||
|
||||
API_PKG ?= text-splitters
|
||||
|
||||
@@ -38,12 +39,14 @@ api_docs_quick_preview:
|
||||
poetry run pip install "pydantic<2"
|
||||
poetry run python docs/api_reference/create_api_rst.py $(API_PKG)
|
||||
cd docs/api_reference && poetry run make html
|
||||
open docs/api_reference/_build/html/$(shell echo $(API_PKG) | sed 's/-/_/g')_api_reference.html
|
||||
poetry run python docs/api_reference/scripts/custom_formatter.py docs/api_reference/_build/html/
|
||||
open docs/api_reference/_build/html/reference.html
|
||||
|
||||
## api_docs_clean: Clean the API Reference documentation build artifacts.
|
||||
api_docs_clean:
|
||||
find ./docs/api_reference -name '*_api_reference.rst' -delete
|
||||
git clean -fdX ./docs/api_reference
|
||||
rm docs/api_reference/index.md
|
||||
|
||||
|
||||
## api_docs_linkcheck: Run linkchecker on the API Reference documentation.
|
||||
|
||||
@@ -82,6 +82,10 @@ vercel-build: install-vercel-deps build generate-references
|
||||
rm -rf docs
|
||||
mv $(OUTPUT_NEW_DOCS_DIR) docs
|
||||
rm -rf build
|
||||
mkdir static/api_reference
|
||||
git clone --depth=1 https://github.com/baskaryan/langchain-api-docs-build.git
|
||||
mv langchain-api-docs-build/api_reference_build/html/* static/api_reference/
|
||||
rm -rf langchain-api-docs-build
|
||||
NODE_OPTIONS="--max-old-space-size=5000" yarn run docusaurus build
|
||||
mv build v0.2
|
||||
mkdir build
|
||||
|
||||
144
docs/api_reference/_extensions/gallery_directive.py
Normal file
144
docs/api_reference/_extensions/gallery_directive.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""A directive to generate a gallery of images from structured data.
|
||||
|
||||
Generating a gallery of images that are all the same size is a common
|
||||
pattern in documentation, and this can be cumbersome if the gallery is
|
||||
generated programmatically. This directive wraps this particular use-case
|
||||
in a helper-directive to generate it with a single YAML configuration file.
|
||||
|
||||
It currently exists for maintainers of the pydata-sphinx-theme,
|
||||
but might be abstracted into a standalone package if it proves useful.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, ClassVar, Dict, List
|
||||
|
||||
from docutils import nodes
|
||||
from docutils.parsers.rst import directives
|
||||
from sphinx.application import Sphinx
|
||||
from sphinx.util import logging
|
||||
from sphinx.util.docutils import SphinxDirective
|
||||
from yaml import safe_load
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
TEMPLATE_GRID = """
|
||||
`````{{grid}} {columns}
|
||||
{options}
|
||||
|
||||
{content}
|
||||
|
||||
`````
|
||||
"""
|
||||
|
||||
GRID_CARD = """
|
||||
````{{grid-item-card}} {title}
|
||||
{options}
|
||||
|
||||
{content}
|
||||
````
|
||||
"""
|
||||
|
||||
|
||||
class GalleryGridDirective(SphinxDirective):
|
||||
"""A directive to show a gallery of images and links in a Bootstrap grid.
|
||||
|
||||
The grid can be generated from a YAML file that contains a list of items, or
|
||||
from the content of the directive (also formatted in YAML). Use the parameter
|
||||
"class-card" to add an additional CSS class to all cards. When specifying the grid
|
||||
items, you can use all parameters from "grid-item-card" directive to customize
|
||||
individual cards + ["image", "header", "content", "title"].
|
||||
|
||||
Danger:
|
||||
This directive can only be used in the context of a Myst documentation page as
|
||||
the templates use Markdown flavored formatting.
|
||||
"""
|
||||
|
||||
name = "gallery-grid"
|
||||
has_content = True
|
||||
required_arguments = 0
|
||||
optional_arguments = 1
|
||||
final_argument_whitespace = True
|
||||
option_spec: ClassVar[dict[str, Any]] = {
|
||||
# A class to be added to the resulting container
|
||||
"grid-columns": directives.unchanged,
|
||||
"class-container": directives.unchanged,
|
||||
"class-card": directives.unchanged,
|
||||
}
|
||||
|
||||
def run(self) -> List[nodes.Node]:
|
||||
"""Create the gallery grid."""
|
||||
if self.arguments:
|
||||
# If an argument is given, assume it's a path to a YAML file
|
||||
# Parse it and load it into the directive content
|
||||
path_data_rel = Path(self.arguments[0])
|
||||
path_doc, _ = self.get_source_info()
|
||||
path_doc = Path(path_doc).parent
|
||||
path_data = (path_doc / path_data_rel).resolve()
|
||||
if not path_data.exists():
|
||||
logger.info(f"Could not find grid data at {path_data}.")
|
||||
nodes.text("No grid data found at {path_data}.")
|
||||
return
|
||||
yaml_string = path_data.read_text()
|
||||
else:
|
||||
yaml_string = "\n".join(self.content)
|
||||
|
||||
# Use all the element with an img-bottom key as sites to show
|
||||
# and generate a card item for each of them
|
||||
grid_items = []
|
||||
for item in safe_load(yaml_string):
|
||||
# remove parameters that are not needed for the card options
|
||||
title = item.pop("title", "")
|
||||
|
||||
# build the content of the card using some extra parameters
|
||||
header = f"{item.pop('header')} \n^^^ \n" if "header" in item else ""
|
||||
image = f"}) \n" if "image" in item else ""
|
||||
content = f"{item.pop('content')} \n" if "content" in item else ""
|
||||
|
||||
# optional parameter that influence all cards
|
||||
if "class-card" in self.options:
|
||||
item["class-card"] = self.options["class-card"]
|
||||
|
||||
loc_options_str = "\n".join(f":{k}: {v}" for k, v in item.items()) + " \n"
|
||||
|
||||
card = GRID_CARD.format(
|
||||
options=loc_options_str, content=header + image + content, title=title
|
||||
)
|
||||
grid_items.append(card)
|
||||
|
||||
# Parse the template with Sphinx Design to create an output container
|
||||
# Prep the options for the template grid
|
||||
class_ = "gallery-directive" + f' {self.options.get("class-container", "")}'
|
||||
options = {"gutter": 2, "class-container": class_}
|
||||
options_str = "\n".join(f":{k}: {v}" for k, v in options.items())
|
||||
|
||||
# Create the directive string for the grid
|
||||
grid_directive = TEMPLATE_GRID.format(
|
||||
columns=self.options.get("grid-columns", "1 2 3 4"),
|
||||
options=options_str,
|
||||
content="\n".join(grid_items),
|
||||
)
|
||||
|
||||
# Parse content as a directive so Sphinx Design processes it
|
||||
container = nodes.container()
|
||||
self.state.nested_parse([grid_directive], 0, container)
|
||||
|
||||
# Sphinx Design outputs a container too, so just use that
|
||||
return [container.children[0]]
|
||||
|
||||
|
||||
def setup(app: Sphinx) -> Dict[str, Any]:
|
||||
"""Add custom configuration to sphinx app.
|
||||
|
||||
Args:
|
||||
app: the Sphinx application
|
||||
|
||||
Returns:
|
||||
the 2 parallel parameters set to ``True``.
|
||||
"""
|
||||
app.add_directive("gallery-grid", GalleryGridDirective)
|
||||
|
||||
return {
|
||||
"parallel_read_safe": True,
|
||||
"parallel_write_safe": True,
|
||||
}
|
||||
@@ -1,51 +1,411 @@
|
||||
pre {
|
||||
white-space: break-spaces;
|
||||
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;700&display=swap');
|
||||
|
||||
/*******************************************************************************
|
||||
* master color map. Only the colors that actually differ between light and dark
|
||||
* themes are specified separately.
|
||||
*
|
||||
* To see the full list of colors see https://www.figma.com/file/rUrrHGhUBBIAAjQ82x6pz9/PyData-Design-system---proposal-for-implementation-(2)?node-id=1234%3A765&t=ifcFT1JtnrSshGfi-1
|
||||
*/
|
||||
/**
|
||||
* Function to get items from nested maps
|
||||
*/
|
||||
/* Assign base colors for the PyData theme */
|
||||
:root {
|
||||
--pst-teal-50: #f4fbfc;
|
||||
--pst-teal-100: #e9f6f8;
|
||||
--pst-teal-200: #d0ecf1;
|
||||
--pst-teal-300: #abdde6;
|
||||
--pst-teal-400: #3fb1c5;
|
||||
--pst-teal-500: #0a7d91;
|
||||
--pst-teal-600: #085d6c;
|
||||
--pst-teal-700: #064752;
|
||||
--pst-teal-800: #042c33;
|
||||
--pst-teal-900: #021b1f;
|
||||
--pst-violet-50: #f4eefb;
|
||||
--pst-violet-100: #e0c7ff;
|
||||
--pst-violet-200: #d5b4fd;
|
||||
--pst-violet-300: #b780ff;
|
||||
--pst-violet-400: #9c5ffd;
|
||||
--pst-violet-500: #8045e5;
|
||||
--pst-violet-600: #6432bd;
|
||||
--pst-violet-700: #4b258f;
|
||||
--pst-violet-800: #341a61;
|
||||
--pst-violet-900: #1e0e39;
|
||||
--pst-gray-50: #f9f9fa;
|
||||
--pst-gray-100: #f3f4f5;
|
||||
--pst-gray-200: #e5e7ea;
|
||||
--pst-gray-300: #d1d5da;
|
||||
--pst-gray-400: #9ca4af;
|
||||
--pst-gray-500: #677384;
|
||||
--pst-gray-600: #48566b;
|
||||
--pst-gray-700: #29313d;
|
||||
--pst-gray-800: #222832;
|
||||
--pst-gray-900: #14181e;
|
||||
--pst-pink-50: #fcf8fd;
|
||||
--pst-pink-100: #fcf0fa;
|
||||
--pst-pink-200: #f8dff5;
|
||||
--pst-pink-300: #f3c7ee;
|
||||
--pst-pink-400: #e47fd7;
|
||||
--pst-pink-500: #c132af;
|
||||
--pst-pink-600: #912583;
|
||||
--pst-pink-700: #6e1c64;
|
||||
--pst-pink-800: #46123f;
|
||||
--pst-pink-900: #2b0b27;
|
||||
--pst-foundation-white: #ffffff;
|
||||
--pst-foundation-black: #14181e;
|
||||
--pst-green-10: #f1fdfd;
|
||||
--pst-green-50: #E0F7F6;
|
||||
--pst-green-100: #B3E8E6;
|
||||
--pst-green-200: #80D6D3;
|
||||
--pst-green-300: #4DC4C0;
|
||||
--pst-green-400: #4FB2AD;
|
||||
--pst-green-500: #287977;
|
||||
--pst-green-600: #246161;
|
||||
--pst-green-700: #204F4F;
|
||||
--pst-green-800: #1C3C3C;
|
||||
--pst-green-900: #0D2427;
|
||||
--pst-lilac-50: #f4eefb;
|
||||
--pst-lilac-100: #DAD6FE;
|
||||
--pst-lilac-200: #BCB2FD;
|
||||
--pst-lilac-300: #9F8BFA;
|
||||
--pst-lilac-400: #7F5CF6;
|
||||
--pst-lilac-500: #6F3AED;
|
||||
--pst-lilac-600: #6028D9;
|
||||
--pst-lilac-700: #5021B6;
|
||||
--pst-lilac-800: #431D95;
|
||||
--pst-lilac-900: #1e0e39;
|
||||
--pst-header-height: 2.5rem;
|
||||
}
|
||||
|
||||
@media (min-width: 1200px) {
|
||||
.container,
|
||||
.container-lg,
|
||||
.container-md,
|
||||
.container-sm,
|
||||
.container-xl {
|
||||
max-width: 2560px !important;
|
||||
}
|
||||
html {
|
||||
--pst-font-family-base: 'Inter';
|
||||
--pst-font-family-heading: 'Inter Tight', sans-serif;
|
||||
}
|
||||
|
||||
#my-component-root *,
|
||||
#headlessui-portal-root * {
|
||||
z-index: 10000;
|
||||
/*******************************************************************************
|
||||
* write the color rules for each theme (light/dark)
|
||||
*/
|
||||
/* NOTE:
|
||||
* Mixins enable us to reuse the same definitions for the different modes
|
||||
* https://sass-lang.com/documentation/at-rules/mixin
|
||||
* something inserts a variable into a CSS selector or property name
|
||||
* https://sass-lang.com/documentation/interpolation
|
||||
*/
|
||||
/* Defaults to light mode if data-theme is not set */
|
||||
html:not([data-theme]) {
|
||||
--pst-color-primary: #287977;
|
||||
--pst-color-primary-bg: #80D6D3;
|
||||
--pst-color-secondary: #6F3AED;
|
||||
--pst-color-secondary-bg: #DAD6FE;
|
||||
--pst-color-accent: #c132af;
|
||||
--pst-color-accent-bg: #f8dff5;
|
||||
--pst-color-info: #276be9;
|
||||
--pst-color-info-bg: #dce7fc;
|
||||
--pst-color-warning: #f66a0a;
|
||||
--pst-color-warning-bg: #f8e3d0;
|
||||
--pst-color-success: #00843f;
|
||||
--pst-color-success-bg: #d6ece1;
|
||||
--pst-color-attention: var(--pst-color-warning);
|
||||
--pst-color-attention-bg: var(--pst-color-warning-bg);
|
||||
--pst-color-danger: #d72d47;
|
||||
--pst-color-danger-bg: #f9e1e4;
|
||||
--pst-color-text-base: #222832;
|
||||
--pst-color-text-muted: #48566b;
|
||||
--pst-color-heading-color: #ffffff;
|
||||
--pst-color-shadow: rgba(0, 0, 0, 0.1);
|
||||
--pst-color-border: #d1d5da;
|
||||
--pst-color-border-muted: rgba(23, 23, 26, 0.2);
|
||||
--pst-color-inline-code: #912583;
|
||||
--pst-color-inline-code-links: #246161;
|
||||
--pst-color-target: #f3cf95;
|
||||
--pst-color-background: #ffffff;
|
||||
--pst-color-on-background: #F4F9F8;
|
||||
--pst-color-surface: #F4F9F8;
|
||||
--pst-color-on-surface: #222832;
|
||||
}
|
||||
html:not([data-theme]) {
|
||||
--pst-color-link: var(--pst-color-primary);
|
||||
--pst-color-link-hover: var(--pst-color-secondary);
|
||||
}
|
||||
html:not([data-theme]) .only-dark,
|
||||
html:not([data-theme]) .only-dark ~ figcaption {
|
||||
display: none !important;
|
||||
}
|
||||
|
||||
table.longtable code {
|
||||
white-space: normal;
|
||||
/* NOTE: @each {...} is like a for-loop
|
||||
* https://sass-lang.com/documentation/at-rules/control/each
|
||||
*/
|
||||
html[data-theme=light] {
|
||||
--pst-color-primary: #287977;
|
||||
--pst-color-primary-bg: #80D6D3;
|
||||
--pst-color-secondary: #6F3AED;
|
||||
--pst-color-secondary-bg: #DAD6FE;
|
||||
--pst-color-accent: #c132af;
|
||||
--pst-color-accent-bg: #f8dff5;
|
||||
--pst-color-info: #276be9;
|
||||
--pst-color-info-bg: #dce7fc;
|
||||
--pst-color-warning: #f66a0a;
|
||||
--pst-color-warning-bg: #f8e3d0;
|
||||
--pst-color-success: #00843f;
|
||||
--pst-color-success-bg: #d6ece1;
|
||||
--pst-color-attention: var(--pst-color-warning);
|
||||
--pst-color-attention-bg: var(--pst-color-warning-bg);
|
||||
--pst-color-danger: #d72d47;
|
||||
--pst-color-danger-bg: #f9e1e4;
|
||||
--pst-color-text-base: #222832;
|
||||
--pst-color-text-muted: #48566b;
|
||||
--pst-color-heading-color: #ffffff;
|
||||
--pst-color-shadow: rgba(0, 0, 0, 0.1);
|
||||
--pst-color-border: #d1d5da;
|
||||
--pst-color-border-muted: rgba(23, 23, 26, 0.2);
|
||||
--pst-color-inline-code: #912583;
|
||||
--pst-color-inline-code-links: #246161;
|
||||
--pst-color-target: #f3cf95;
|
||||
--pst-color-background: #ffffff;
|
||||
--pst-color-on-background: #F4F9F8;
|
||||
--pst-color-surface: #F4F9F8;
|
||||
--pst-color-on-surface: #222832;
|
||||
color-scheme: light;
|
||||
}
|
||||
html[data-theme=light] {
|
||||
--pst-color-link: var(--pst-color-primary);
|
||||
--pst-color-link-hover: var(--pst-color-secondary);
|
||||
}
|
||||
html[data-theme=light] .only-dark,
|
||||
html[data-theme=light] .only-dark ~ figcaption {
|
||||
display: none !important;
|
||||
}
|
||||
|
||||
table.longtable td {
|
||||
max-width: 600px;
|
||||
html[data-theme=dark] {
|
||||
--pst-color-primary: #4FB2AD;
|
||||
--pst-color-primary-bg: #1C3C3C;
|
||||
--pst-color-secondary: #7F5CF6;
|
||||
--pst-color-secondary-bg: #431D95;
|
||||
--pst-color-accent: #e47fd7;
|
||||
--pst-color-accent-bg: #46123f;
|
||||
--pst-color-info: #79a3f2;
|
||||
--pst-color-info-bg: #06245d;
|
||||
--pst-color-warning: #ff9245;
|
||||
--pst-color-warning-bg: #652a02;
|
||||
--pst-color-success: #5fb488;
|
||||
--pst-color-success-bg: #002f17;
|
||||
--pst-color-attention: var(--pst-color-warning);
|
||||
--pst-color-attention-bg: var(--pst-color-warning-bg);
|
||||
--pst-color-danger: #e78894;
|
||||
--pst-color-danger-bg: #4e111b;
|
||||
--pst-color-text-base: #ced6dd;
|
||||
--pst-color-text-muted: #9ca4af;
|
||||
--pst-color-heading-color: #14181e;
|
||||
--pst-color-shadow: rgba(0, 0, 0, 0.2);
|
||||
--pst-color-border: #48566b;
|
||||
--pst-color-border-muted: #29313d;
|
||||
--pst-color-inline-code: #f3c7ee;
|
||||
--pst-color-inline-code-links: #4FB2AD;
|
||||
--pst-color-target: #675c04;
|
||||
--pst-color-background: #14181e;
|
||||
--pst-color-on-background: #222832;
|
||||
--pst-color-surface: #29313d;
|
||||
--pst-color-on-surface: #f3f4f5;
|
||||
/* Adjust images in dark mode (unless they have class .only-dark or
|
||||
* .dark-light, in which case assume they're already optimized for dark
|
||||
* mode).
|
||||
*/
|
||||
/* Give images a light background in dark mode in case they have
|
||||
* transparency and black text (unless they have class .only-dark or .dark-light, in
|
||||
* which case assume they're already optimized for dark mode).
|
||||
*/
|
||||
color-scheme: dark;
|
||||
}
|
||||
html[data-theme=dark] {
|
||||
--pst-color-link: var(--pst-color-primary);
|
||||
--pst-color-link-hover: var(--pst-color-secondary);
|
||||
}
|
||||
html[data-theme=dark] .only-light,
|
||||
html[data-theme=dark] .only-light ~ figcaption {
|
||||
display: none !important;
|
||||
}
|
||||
html[data-theme=dark] img:not(.only-dark):not(.dark-light) {
|
||||
filter: brightness(0.8) contrast(1.2);
|
||||
}
|
||||
html[data-theme=dark] .bd-content img:not(.only-dark):not(.dark-light) {
|
||||
background: rgb(255, 255, 255);
|
||||
border-radius: 0.25rem;
|
||||
}
|
||||
html[data-theme=dark] .MathJax_SVG * {
|
||||
fill: var(--pst-color-text-base);
|
||||
}
|
||||
|
||||
/* Banner Styles */
|
||||
.banner {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
background-color: #ffcc00;
|
||||
color: #000;
|
||||
text-align: center;
|
||||
padding: 5px 0;
|
||||
z-index: 1003; /* Highest z-index to sit above everything */
|
||||
height: 30px; /* Fixed height */
|
||||
line-height: 20px; /* Vertically center the text */
|
||||
.pst-color-primary {
|
||||
color: var(--pst-color-primary);
|
||||
}
|
||||
|
||||
/* Navbar Styles */
|
||||
#navbar {
|
||||
position: fixed;
|
||||
top: 30px; /* Positioned right below the banner */
|
||||
left: 0;
|
||||
right: 0;
|
||||
z-index: 1002; /* Below the banner */
|
||||
height: 50px; /* Fixed height */
|
||||
.pst-color-secondary {
|
||||
color: var(--pst-color-secondary);
|
||||
}
|
||||
|
||||
.pst-color-accent {
|
||||
color: var(--pst-color-accent);
|
||||
}
|
||||
|
||||
.pst-color-info {
|
||||
color: var(--pst-color-info);
|
||||
}
|
||||
|
||||
.pst-color-warning {
|
||||
color: var(--pst-color-warning);
|
||||
}
|
||||
|
||||
.pst-color-success {
|
||||
color: var(--pst-color-success);
|
||||
}
|
||||
|
||||
.pst-color-attention {
|
||||
color: var(--pst-color-attention);
|
||||
}
|
||||
|
||||
.pst-color-danger {
|
||||
color: var(--pst-color-danger);
|
||||
}
|
||||
|
||||
.pst-color-text-base {
|
||||
color: var(--pst-color-text-base);
|
||||
}
|
||||
|
||||
.pst-color-text-muted {
|
||||
color: var(--pst-color-text-muted);
|
||||
}
|
||||
|
||||
.pst-color-heading-color {
|
||||
color: var(--pst-color-heading-color);
|
||||
}
|
||||
|
||||
.pst-color-shadow {
|
||||
color: var(--pst-color-shadow);
|
||||
}
|
||||
|
||||
.pst-color-border {
|
||||
color: var(--pst-color-border);
|
||||
}
|
||||
|
||||
.pst-color-border-muted {
|
||||
color: var(--pst-color-border-muted);
|
||||
}
|
||||
|
||||
.pst-color-inline-code {
|
||||
color: var(--pst-color-inline-code);
|
||||
}
|
||||
|
||||
.pst-color-inline-code-links {
|
||||
color: var(--pst-color-inline-code-links);
|
||||
}
|
||||
|
||||
.pst-color-target {
|
||||
color: var(--pst-color-target);
|
||||
}
|
||||
|
||||
.pst-color-background {
|
||||
color: var(--pst-color-background);
|
||||
}
|
||||
|
||||
.pst-color-on-background {
|
||||
color: var(--pst-color-on-background);
|
||||
}
|
||||
|
||||
.pst-color-surface {
|
||||
color: var(--pst-color-surface);
|
||||
}
|
||||
|
||||
.pst-color-on-surface {
|
||||
color: var(--pst-color-on-surface);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Adjust the height of the navbar */
|
||||
.bd-header .bd-header__inner{
|
||||
height: 52px; /* Adjust this value as needed */
|
||||
}
|
||||
|
||||
.navbar-nav > li > a {
|
||||
line-height: 52px; /* Vertically center the navbar links */
|
||||
}
|
||||
|
||||
/* Make sure the navbar items align properly */
|
||||
.navbar-nav {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
|
||||
.bd-header .navbar-header-items__start{
|
||||
margin-left: 0rem
|
||||
}
|
||||
|
||||
.bd-header button.primary-toggle {
|
||||
margin-right: 0rem;
|
||||
}
|
||||
|
||||
.bd-header ul.navbar-nav .dropdown .dropdown-menu {
|
||||
overflow-y: auto; /* Enable vertical scrolling */
|
||||
max-height: 80vh
|
||||
}
|
||||
|
||||
.bd-sidebar-primary {
|
||||
width: 22%; /* Adjust this value to your preference */
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.bd-sidebar-secondary {
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.toc-entry a.nav-link, .toc-entry a>code {
|
||||
background-color: transparent;
|
||||
border-color: transparent;
|
||||
}
|
||||
|
||||
.bd-sidebar-primary code{
|
||||
background-color: transparent;
|
||||
border-color: transparent;
|
||||
}
|
||||
|
||||
|
||||
.toctree-wrapper li[class^=toctree-l1]>a {
|
||||
font-size: 1.3em
|
||||
}
|
||||
|
||||
.toctree-wrapper li[class^=toctree-l1] {
|
||||
margin-bottom: 2em;
|
||||
}
|
||||
|
||||
.toctree-wrapper li[class^=toctree-l]>ul {
|
||||
margin-top: 0.5em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
*, :after, :before {
|
||||
font-style: normal;
|
||||
}
|
||||
|
||||
div.deprecated {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 2em;
|
||||
}
|
||||
|
||||
.admonition-beta.admonition, div.admonition-beta.admonition {
|
||||
border-color: var(--pst-color-warning);
|
||||
margin-top:0.5em;
|
||||
margin-bottom: 2em;
|
||||
}
|
||||
|
||||
.admonition-beta>.admonition-title, div.admonition-beta>.admonition-title {
|
||||
background-color: var(--pst-color-warning-bg);
|
||||
}
|
||||
|
||||
dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) dd {
|
||||
margin-left: 1rem;
|
||||
}
|
||||
|
||||
p {
|
||||
font-size: 0.9rem;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
BIN
docs/api_reference/_static/img/brand/favicon.png
Normal file
BIN
docs/api_reference/_static/img/brand/favicon.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 777 B |
11
docs/api_reference/_static/wordmark-api-dark.svg
Normal file
11
docs/api_reference/_static/wordmark-api-dark.svg
Normal file
@@ -0,0 +1,11 @@
|
||||
<svg width="72" height="19" viewBox="0 0 72 19" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<g clip-path="url(#clip0_4019_2020)">
|
||||
<path d="M29.4038 5.84477C30.1256 6.56657 30.1256 7.74117 29.4038 8.46296L27.7869 10.0538L27.7704 9.96259C27.6524 9.30879 27.3415 8.71552 26.8723 8.24627C26.5189 7.8936 26.1012 7.63282 25.6305 7.47143C25.3383 7.76508 25.1777 8.14989 25.1777 8.55487C25.1777 8.63706 25.1851 8.72224 25.2001 8.80742C25.4593 8.90082 25.6887 9.04503 25.8815 9.23781C26.6033 9.9596 26.6033 11.1342 25.8815 11.856L24.4738 13.2637C24.1129 13.6246 23.6392 13.8047 23.1647 13.8047C22.6902 13.8047 22.2165 13.6246 21.8556 13.2637C21.1338 12.5419 21.1338 11.3673 21.8556 10.6455L23.4725 9.05549L23.489 9.14665C23.6063 9.79896 23.9171 10.3922 24.3879 10.8622C24.742 11.2164 25.1343 11.4518 25.6043 11.6124L25.691 11.5257C25.954 11.2627 26.0982 10.913 26.0982 10.5402C26.0982 10.4572 26.0907 10.3743 26.0765 10.2929C25.8053 10.2032 25.5819 10.0754 25.3786 9.87218C25.0857 9.57928 24.9034 9.20493 24.8526 8.79024C24.8489 8.76035 24.8466 8.73121 24.8437 8.70132C24.8033 8.16109 24.9983 7.63357 25.3786 7.25399L26.7864 5.84627C27.1353 5.49733 27.6001 5.30455 28.0955 5.30455C28.5909 5.30455 29.0556 5.49658 29.4046 5.84627L29.4038 5.84477ZM36.7548 9.56583C36.7548 14.7163 32.5645 18.9058 27.4148 18.9058H9.34C4.1903 18.9058 0 14.7163 0 9.56583C0 4.41538 4.1903 0.22583 9.34 0.22583H27.4148C32.5652 0.22583 36.7548 4.41613 36.7548 9.56583ZM18 14.25C18.1472 14.0714 17.4673 13.5686 17.3283 13.384C17.0459 13.0777 17.0444 12.6368 16.8538 12.2789C16.3876 11.1985 15.8518 10.1262 15.1024 9.21166C14.3104 8.21116 13.333 7.38326 12.4745 6.44403C11.8371 5.78873 11.6668 4.85548 11.1041 4.15087C10.3285 3.00541 7.87624 2.69308 7.51683 4.31077C7.51833 4.36158 7.50264 4.39371 7.45855 4.42584C7.2598 4.57005 7.08271 4.73518 6.93402 4.93468C6.57013 5.44129 6.51409 6.30057 6.96839 6.75561C6.98333 6.51576 6.99155 6.28936 7.18134 6.1175C7.53252 6.41862 8.06304 6.52547 8.47026 6.30057C9.36989 7.585 9.14573 9.36184 9.86005 10.7457C10.0573 11.0729 10.2561 11.4069 10.5094 11.6939C10.7148 12.0137 11.4247 12.391 11.4665 12.6869C11.474 13.195 11.4142 13.7502 11.7475 14.1753C11.9044 14.4936 11.5188 14.8134 11.208 14.7738C10.8045 14.8291 10.3121 14.5026 9.95868 14.7036C9.8339 14.8388 9.58957 14.6894 9.48197 14.8769C9.44461 14.9741 9.24286 15.1108 9.36316 15.2042C9.49691 15.1026 9.62095 14.9965 9.80102 15.057C9.77412 15.2035 9.88994 15.2244 9.98184 15.267C9.97886 15.3663 9.92057 15.468 9.99679 15.5524C10.0857 15.4627 10.1388 15.3357 10.28 15.2983C10.7492 15.9238 11.2267 14.6655 12.2421 15.2318C12.0359 15.2214 11.8528 15.2475 11.7139 15.4172C11.6795 15.4553 11.6503 15.5001 11.7109 15.5494C12.2586 15.196 12.2556 15.6705 12.6112 15.5248C12.8847 15.382 13.1567 15.2035 13.4817 15.2543C13.1657 15.3454 13.153 15.5995 12.9677 15.8139C12.9363 15.8468 12.9213 15.8842 12.9579 15.9387C13.614 15.8834 13.6678 15.6652 14.1975 15.3977C14.5928 15.1564 14.9866 15.7414 15.3288 15.4082C15.4043 15.3357 15.5074 15.3604 15.6008 15.3507C15.4812 14.7133 14.1669 15.4672 14.1878 14.6124C14.6107 14.3247 14.5136 13.7741 14.542 13.3295C15.0284 13.5992 15.5694 13.7561 16.0461 14.0139C16.2867 14.4025 16.6641 14.9158 17.1669 14.8822C17.1804 14.8433 17.1923 14.8089 17.2065 14.7693C17.359 14.7955 17.5547 14.8964 17.6384 14.7036C17.8663 14.9419 18.201 14.93 18.4992 14.8687C18.7196 14.6894 18.0845 14.4338 17.9993 14.2493L18 14.25ZM31.3458 7.15387C31.3458 6.28413 31.0081 5.46744 30.3946 4.85399C29.7812 4.24054 28.9645 3.9028 28.094 3.9028C27.2235 3.9028 26.4068 4.24054 25.7933 4.85399L24.3856 6.26171C24.0569 6.59048 23.8073 6.97678 23.6436 7.40941L23.6339 7.43407L23.6085 7.44154C23.0974 7.5992 22.6469 7.86969 22.2696 8.24702L20.8618 9.65475C19.5938 10.9235 19.5938 12.9873 20.8618 14.2553C21.4753 14.8687 22.292 15.2064 23.1617 15.2064C24.0314 15.2064 24.8489 14.8687 25.4623 14.2553L26.8701 12.8475C27.1973 12.5203 27.4454 12.1355 27.609 11.7036L27.6188 11.6789L27.6442 11.6707C28.1463 11.5168 28.6095 11.2373 28.9854 10.8622L30.3931 9.4545C31.0066 8.84105 31.3443 8.02436 31.3443 7.15387H31.3458ZM12.8802 13.1972C12.7592 13.6695 12.7196 14.4742 12.1054 14.4974C12.0546 14.7701 12.2944 14.8724 12.5119 14.785C12.7278 14.6856 12.8302 14.8635 12.9026 15.0406C13.2359 15.0891 13.7291 14.9292 13.7477 14.5347C13.2501 14.2478 13.0962 13.7023 12.8795 13.1972H12.8802Z" fill="#F4F3FF"/>
|
||||
<path d="M43.5142 15.2258L47.1462 3.70583H49.9702L53.6022 15.2258H51.6182L48.3222 4.88983H48.7542L45.4982 15.2258H43.5142ZM45.5382 12.7298V10.9298H51.5862V12.7298H45.5382ZM55.0486 15.2258V3.70583H59.8086C59.9206 3.70583 60.0646 3.71116 60.2406 3.72183C60.4166 3.72716 60.5792 3.74316 60.7286 3.76983C61.3952 3.87116 61.9446 4.0925 62.3766 4.43383C62.8139 4.77516 63.1366 5.20716 63.3446 5.72983C63.5579 6.24716 63.6646 6.82316 63.6646 7.45783C63.6646 8.08716 63.5579 8.66316 63.3446 9.18583C63.1312 9.70316 62.8059 10.1325 62.3686 10.4738C61.9366 10.8152 61.3899 11.0365 60.7286 11.1378C60.5792 11.1592 60.4139 11.1752 60.2326 11.1858C60.0566 11.1965 59.9152 11.2018 59.8086 11.2018H56.9766V15.2258H55.0486ZM56.9766 9.40183H59.7286C59.8352 9.40183 59.9552 9.3965 60.0886 9.38583C60.2219 9.37516 60.3446 9.35383 60.4566 9.32183C60.7766 9.24183 61.0272 9.1005 61.2086 8.89783C61.3952 8.69516 61.5259 8.46583 61.6006 8.20983C61.6806 7.95383 61.7206 7.70316 61.7206 7.45783C61.7206 7.2125 61.6806 6.96183 61.6006 6.70583C61.5259 6.4445 61.3952 6.2125 61.2086 6.00983C61.0272 5.80716 60.7766 5.66583 60.4566 5.58583C60.3446 5.55383 60.2219 5.53516 60.0886 5.52983C59.9552 5.51916 59.8352 5.51383 59.7286 5.51383H56.9766V9.40183ZM65.4273 15.2258V3.70583H67.3553V15.2258H65.4273Z" fill="#F4F3FF"/>
|
||||
</g>
|
||||
<defs>
|
||||
<clipPath id="clip0_4019_2020">
|
||||
<rect width="71.0711" height="18.68" fill="white" transform="translate(0 0.22583)"/>
|
||||
</clipPath>
|
||||
</defs>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 5.7 KiB |
11
docs/api_reference/_static/wordmark-api.svg
Normal file
11
docs/api_reference/_static/wordmark-api.svg
Normal file
@@ -0,0 +1,11 @@
|
||||
<svg width="72" height="20" viewBox="0 0 72 20" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<g clip-path="url(#clip0_4019_689)">
|
||||
<path d="M29.4038 5.97905C30.1256 6.70085 30.1256 7.87545 29.4038 8.59724L27.7869 10.188L27.7704 10.0969C27.6524 9.44307 27.3415 8.84979 26.8723 8.38055C26.5189 8.02787 26.1012 7.7671 25.6305 7.60571C25.3383 7.89936 25.1777 8.28416 25.1777 8.68915C25.1777 8.77134 25.1851 8.85652 25.2001 8.9417C25.4593 9.0351 25.6887 9.17931 25.8815 9.37209C26.6033 10.0939 26.6033 11.2685 25.8815 11.9903L24.4738 13.398C24.1129 13.7589 23.6392 13.939 23.1647 13.939C22.6902 13.939 22.2165 13.7589 21.8556 13.398C21.1338 12.6762 21.1338 11.5016 21.8556 10.7798L23.4725 9.18977L23.489 9.28093C23.6063 9.93323 23.9171 10.5265 24.3879 10.9965C24.742 11.3507 25.1343 11.586 25.6043 11.7467L25.691 11.66C25.954 11.397 26.0982 11.0473 26.0982 10.6745C26.0982 10.5915 26.0907 10.5086 26.0765 10.4271C25.8053 10.3375 25.5819 10.2097 25.3786 10.0065C25.0857 9.71356 24.9034 9.33921 24.8526 8.92451C24.8489 8.89463 24.8466 8.86549 24.8437 8.8356C24.8033 8.29537 24.9983 7.76785 25.3786 7.38827L26.7864 5.98055C27.1353 5.6316 27.6001 5.43883 28.0955 5.43883C28.5909 5.43883 29.0556 5.63086 29.4046 5.98055L29.4038 5.97905ZM36.7548 9.70011C36.7548 14.8506 32.5645 19.0401 27.4148 19.0401H9.34C4.1903 19.0401 0 14.8506 0 9.70011C0 4.54966 4.1903 0.360107 9.34 0.360107H27.4148C32.5652 0.360107 36.7548 4.55041 36.7548 9.70011ZM18 14.3843C18.1472 14.2057 17.4673 13.7029 17.3283 13.5183C17.0459 13.2119 17.0444 12.7711 16.8538 12.4132C16.3876 11.3327 15.8518 10.2605 15.1024 9.34594C14.3104 8.34543 13.333 7.51754 12.4745 6.57831C11.8371 5.92301 11.6668 4.98976 11.1041 4.28515C10.3285 3.13969 7.87624 2.82736 7.51683 4.44505C7.51833 4.49586 7.50264 4.52799 7.45855 4.56012C7.2598 4.70433 7.08271 4.86946 6.93402 5.06896C6.57013 5.57556 6.51409 6.43484 6.96839 6.88989C6.98333 6.65004 6.99155 6.42364 7.18134 6.25178C7.53252 6.5529 8.06304 6.65975 8.47026 6.43484C9.36989 7.71928 9.14573 9.49612 9.86005 10.8799C10.0573 11.2072 10.2561 11.5412 10.5094 11.8281C10.7148 12.1479 11.4247 12.5253 11.4665 12.8212C11.474 13.3293 11.4142 13.8844 11.7475 14.3096C11.9044 14.6279 11.5188 14.9477 11.208 14.9081C10.8045 14.9634 10.3121 14.6369 9.95868 14.8379C9.8339 14.9731 9.58957 14.8237 9.48197 15.0112C9.44461 15.1083 9.24286 15.2451 9.36316 15.3385C9.49691 15.2369 9.62095 15.1308 9.80102 15.1913C9.77412 15.3377 9.88994 15.3587 9.98184 15.4012C9.97886 15.5006 9.92057 15.6022 9.99679 15.6867C10.0857 15.597 10.1388 15.47 10.28 15.4326C10.7492 16.058 11.2267 14.7997 12.2421 15.3661C12.0359 15.3557 11.8528 15.3818 11.7139 15.5514C11.6795 15.5895 11.6503 15.6344 11.7109 15.6837C12.2586 15.3303 12.2556 15.8047 12.6112 15.659C12.8847 15.5163 13.1567 15.3377 13.4817 15.3885C13.1657 15.4797 13.153 15.7337 12.9677 15.9482C12.9363 15.9811 12.9213 16.0184 12.9579 16.073C13.614 16.0177 13.6678 15.7995 14.1975 15.532C14.5928 15.2907 14.9866 15.8757 15.3288 15.5425C15.4043 15.47 15.5074 15.4946 15.6008 15.4849C15.4812 14.8476 14.1669 15.6015 14.1878 14.7467C14.6107 14.459 14.5136 13.9083 14.542 13.4638C15.0284 13.7335 15.5694 13.8904 16.0461 14.1482C16.2867 14.5367 16.6641 15.0501 17.1669 15.0164C17.1804 14.9776 17.1923 14.9432 17.2065 14.9036C17.359 14.9298 17.5547 15.0306 17.6384 14.8379C17.8663 15.0762 18.201 15.0643 18.4992 15.003C18.7196 14.8237 18.0845 14.5681 17.9993 14.3836L18 14.3843ZM31.3458 7.28815C31.3458 6.41841 31.0081 5.60172 30.3946 4.98826C29.7812 4.37481 28.9645 4.03708 28.094 4.03708C27.2235 4.03708 26.4068 4.37481 25.7933 4.98826L24.3856 6.39599C24.0569 6.72476 23.8073 7.11106 23.6436 7.54369L23.6339 7.56835L23.6085 7.57582C23.0974 7.73348 22.6469 8.00396 22.2696 8.3813L20.8618 9.78902C19.5938 11.0578 19.5938 13.1215 20.8618 14.3895C21.4753 15.003 22.292 15.3407 23.1617 15.3407C24.0314 15.3407 24.8489 15.003 25.4623 14.3895L26.8701 12.9818C27.1973 12.6545 27.4454 12.2697 27.609 11.8378L27.6188 11.8132L27.6442 11.805C28.1463 11.651 28.6095 11.3716 28.9854 10.9965L30.3931 9.58878C31.0066 8.97532 31.3443 8.15863 31.3443 7.28815H31.3458ZM12.8802 13.3315C12.7592 13.8037 12.7196 14.6085 12.1054 14.6316C12.0546 14.9044 12.2944 15.0067 12.5119 14.9193C12.7278 14.8199 12.8302 14.9978 12.9026 15.1748C13.2359 15.2234 13.7291 15.0635 13.7477 14.669C13.2501 14.3821 13.0962 13.8366 12.8795 13.3315H12.8802Z" fill="#246161"/>
|
||||
<path d="M43.5142 15.3601L47.1462 3.84011H49.9702L53.6022 15.3601H51.6182L48.3222 5.02411H48.7542L45.4982 15.3601H43.5142ZM45.5382 12.8641V11.0641H51.5862V12.8641H45.5382ZM55.0486 15.3601V3.84011H59.8086C59.9206 3.84011 60.0646 3.84544 60.2406 3.85611C60.4166 3.86144 60.5792 3.87744 60.7286 3.90411C61.3952 4.00544 61.9446 4.22677 62.3766 4.56811C62.8139 4.90944 63.1366 5.34144 63.3446 5.86411C63.5579 6.38144 63.6646 6.95744 63.6646 7.59211C63.6646 8.22144 63.5579 8.79744 63.3446 9.32011C63.1312 9.83744 62.8059 10.2668 62.3686 10.6081C61.9366 10.9494 61.3899 11.1708 60.7286 11.2721C60.5792 11.2934 60.4139 11.3094 60.2326 11.3201C60.0566 11.3308 59.9152 11.3361 59.8086 11.3361H56.9766V15.3601H55.0486ZM56.9766 9.53611H59.7286C59.8352 9.53611 59.9552 9.53077 60.0886 9.52011C60.2219 9.50944 60.3446 9.48811 60.4566 9.45611C60.7766 9.37611 61.0272 9.23477 61.2086 9.03211C61.3952 8.82944 61.5259 8.60011 61.6006 8.34411C61.6806 8.08811 61.7206 7.83744 61.7206 7.59211C61.7206 7.34677 61.6806 7.09611 61.6006 6.84011C61.5259 6.57877 61.3952 6.34677 61.2086 6.14411C61.0272 5.94144 60.7766 5.80011 60.4566 5.72011C60.3446 5.68811 60.2219 5.66944 60.0886 5.66411C59.9552 5.65344 59.8352 5.64811 59.7286 5.64811H56.9766V9.53611ZM65.4273 15.3601V3.84011H67.3553V15.3601H65.4273Z" fill="#246161"/>
|
||||
</g>
|
||||
<defs>
|
||||
<clipPath id="clip0_4019_689">
|
||||
<rect width="71.0711" height="18.68" fill="white" transform="translate(0 0.360107)"/>
|
||||
</clipPath>
|
||||
</defs>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 5.7 KiB |
@@ -62,7 +62,7 @@ class ExampleLinksDirective(SphinxDirective):
|
||||
item_node.append(para_node)
|
||||
list_node.append(item_node)
|
||||
if list_node.children:
|
||||
title_node = nodes.title()
|
||||
title_node = nodes.rubric()
|
||||
title_node.append(nodes.Text(f"Examples using {class_or_func_name}"))
|
||||
return [title_node, list_node]
|
||||
return [list_node]
|
||||
@@ -75,7 +75,10 @@ class Beta(BaseAdmonition):
|
||||
def run(self):
|
||||
self.content = self.content or StringList(
|
||||
[
|
||||
"This feature is in beta. It is actively being worked on, so the API may change."
|
||||
(
|
||||
"This feature is in beta. It is actively being worked on, so the "
|
||||
"API may change."
|
||||
)
|
||||
]
|
||||
)
|
||||
self.arguments = self.arguments or ["Beta"]
|
||||
@@ -90,13 +93,10 @@ def setup(app):
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = "🦜🔗 LangChain"
|
||||
copyright = "2023, LangChain, Inc."
|
||||
author = "LangChain, Inc."
|
||||
copyright = "2023, LangChain Inc"
|
||||
author = "LangChain, Inc"
|
||||
|
||||
version = data["tool"]["poetry"]["version"]
|
||||
release = version
|
||||
|
||||
html_title = project + " " + version
|
||||
html_favicon = "_static/img/brand/favicon.png"
|
||||
html_last_updated_fmt = "%b %d, %Y"
|
||||
|
||||
|
||||
@@ -112,11 +112,13 @@ extensions = [
|
||||
"sphinx.ext.napoleon",
|
||||
"sphinx.ext.viewcode",
|
||||
"sphinxcontrib.autodoc_pydantic",
|
||||
"sphinx_copybutton",
|
||||
"sphinx_panels",
|
||||
"IPython.sphinxext.ipython_console_highlighting",
|
||||
"myst_parser",
|
||||
"_extensions.gallery_directive",
|
||||
"sphinx_design",
|
||||
"sphinx_copybutton",
|
||||
]
|
||||
source_suffix = [".rst"]
|
||||
source_suffix = [".rst", ".md"]
|
||||
|
||||
# some autodoc pydantic options are repeated in the actual template.
|
||||
# potentially user error, but there may be bugs in the sphinx extension
|
||||
@@ -148,23 +150,84 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
html_theme = "scikit-learn-modern"
|
||||
html_theme_path = ["themes"]
|
||||
# The theme to use for HTML and HTML Help pages.
|
||||
html_theme = "pydata_sphinx_theme"
|
||||
|
||||
# redirects dictionary maps from old links to new links
|
||||
html_additional_pages = {}
|
||||
redirects = {
|
||||
"index": "langchain_api_reference",
|
||||
# Theme options are theme-specific and customize the look and feel of a theme
|
||||
# further. For a list of options available for each theme, see the
|
||||
# documentation.
|
||||
html_theme_options = {
|
||||
# # -- General configuration ------------------------------------------------
|
||||
"sidebar_includehidden": True,
|
||||
"use_edit_page_button": False,
|
||||
# # "analytics": {
|
||||
# # "plausible_analytics_domain": "scikit-learn.org",
|
||||
# # "plausible_analytics_url": "https://views.scientific-python.org/js/script.js",
|
||||
# # },
|
||||
# # If "prev-next" is included in article_footer_items, then setting show_prev_next
|
||||
# # to True would repeat prev and next links. See
|
||||
# # https://github.com/pydata/pydata-sphinx-theme/blob/b731dc230bc26a3d1d1bb039c56c977a9b3d25d8/src/pydata_sphinx_theme/theme/pydata_sphinx_theme/layout.html#L118-L129
|
||||
"show_prev_next": False,
|
||||
"search_bar_text": "Search",
|
||||
"navigation_with_keys": True,
|
||||
"collapse_navigation": True,
|
||||
"navigation_depth": 3,
|
||||
"show_nav_level": 1,
|
||||
"show_toc_level": 3,
|
||||
"navbar_align": "left",
|
||||
"header_links_before_dropdown": 5,
|
||||
"header_dropdown_text": "Integrations",
|
||||
"logo": {
|
||||
"image_light": "_static/wordmark-api.svg",
|
||||
"image_dark": "_static/wordmark-api-dark.svg",
|
||||
},
|
||||
"surface_warnings": True,
|
||||
# # -- Template placement in theme layouts ----------------------------------
|
||||
"navbar_start": ["navbar-logo"],
|
||||
# # Note that the alignment of navbar_center is controlled by navbar_align
|
||||
"navbar_center": ["navbar-nav"],
|
||||
"navbar_end": ["langchain_docs", "theme-switcher", "navbar-icon-links"],
|
||||
# # navbar_persistent is persistent right (even when on mobiles)
|
||||
"navbar_persistent": ["search-field"],
|
||||
"article_header_start": ["breadcrumbs"],
|
||||
"article_header_end": [],
|
||||
"article_footer_items": [],
|
||||
"content_footer_items": [],
|
||||
# # Use html_sidebars that map page patterns to list of sidebar templates
|
||||
# "primary_sidebar_end": [],
|
||||
"footer_start": ["copyright"],
|
||||
"footer_center": [],
|
||||
"footer_end": [],
|
||||
# # When specified as a dictionary, the keys should follow glob-style patterns, as in
|
||||
# # https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-exclude_patterns
|
||||
# # In particular, "**" specifies the default for all pages
|
||||
# # Use :html_theme.sidebar_secondary.remove: for file-wide removal
|
||||
# "secondary_sidebar_items": {"**": ["page-toc", "sourcelink"]},
|
||||
# "show_version_warning_banner": True,
|
||||
# "announcement": None,
|
||||
"icon_links": [
|
||||
{
|
||||
# Label for this link
|
||||
"name": "GitHub",
|
||||
# URL where the link will redirect
|
||||
"url": "https://github.com/langchain-ai/langchain", # required
|
||||
# Icon class (if "type": "fontawesome"), or path to local image (if "type": "local")
|
||||
"icon": "fa-brands fa-square-github",
|
||||
# The type of image to be used (see below for details)
|
||||
"type": "fontawesome",
|
||||
},
|
||||
{
|
||||
"name": "X / Twitter",
|
||||
"url": "https://twitter.com/langchainai",
|
||||
"icon": "fab fa-twitter-square",
|
||||
},
|
||||
],
|
||||
"icon_links_label": "Quick Links",
|
||||
"external_links": [
|
||||
{"name": "Legacy reference", "url": "https://api.python.langchain.com/"},
|
||||
],
|
||||
}
|
||||
for old_link in redirects:
|
||||
html_additional_pages[old_link] = "redirects.html"
|
||||
|
||||
partners_dir = Path(__file__).parent.parent.parent / "libs/partners"
|
||||
partners = [
|
||||
(p.name, p.name.replace("-", "_") + "_api_reference")
|
||||
for p in partners_dir.iterdir()
|
||||
]
|
||||
partners = sorted(partners)
|
||||
|
||||
html_context = {
|
||||
"display_github": True, # Integrate GitHub
|
||||
@@ -172,8 +235,6 @@ html_context = {
|
||||
"github_repo": "langchain", # Repo name
|
||||
"github_version": "master", # Version
|
||||
"conf_py_path": "/docs/api_reference", # Path in the checkout to the docs root
|
||||
"redirects": redirects,
|
||||
"partners": partners,
|
||||
}
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
@@ -183,9 +244,7 @@ html_static_path = ["_static"]
|
||||
|
||||
# These paths are either relative to html_static_path
|
||||
# or fully qualified paths (e.g. https://...)
|
||||
html_css_files = [
|
||||
"css/custom.css",
|
||||
]
|
||||
html_css_files = ["css/custom.css"]
|
||||
html_use_index = False
|
||||
|
||||
myst_enable_extensions = ["colon_fence"]
|
||||
@@ -202,3 +261,5 @@ html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "")
|
||||
# Tell Jinja2 templates the build is running on Read the Docs
|
||||
if os.environ.get("READTHEDOCS", "") == "True":
|
||||
html_context["READTHEDOCS"] = True
|
||||
|
||||
master_doc = "index"
|
||||
|
||||
@@ -239,7 +239,7 @@ def _construct_doc(
|
||||
package_namespace: str,
|
||||
members_by_namespace: Dict[str, ModuleMembers],
|
||||
package_version: str,
|
||||
) -> str:
|
||||
) -> List[typing.Tuple[str, str]]:
|
||||
"""Construct the contents of the reference.rst file for the given package.
|
||||
|
||||
Args:
|
||||
@@ -251,15 +251,38 @@ def _construct_doc(
|
||||
Returns:
|
||||
The contents of the reference.rst file.
|
||||
"""
|
||||
full_doc = f"""\
|
||||
=======================
|
||||
``{package_namespace}`` {package_version}
|
||||
=======================
|
||||
docs = []
|
||||
index_doc = f"""\
|
||||
:html_theme.sidebar_secondary.remove:
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. _{package_namespace}:
|
||||
|
||||
======================================
|
||||
{package_namespace.replace('_', '-')}: {package_version}
|
||||
======================================
|
||||
|
||||
.. automodule:: {package_namespace}
|
||||
:no-members:
|
||||
:no-inherited-members:
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
:maxdepth: 2
|
||||
|
||||
"""
|
||||
index_autosummary = """
|
||||
"""
|
||||
namespaces = sorted(members_by_namespace)
|
||||
|
||||
for module in namespaces:
|
||||
index_doc += f" {module}\n"
|
||||
module_doc = f"""\
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. _{module}:
|
||||
"""
|
||||
_members = members_by_namespace[module]
|
||||
classes = [
|
||||
el
|
||||
@@ -281,9 +304,9 @@ def _construct_doc(
|
||||
]
|
||||
if not (classes or functions):
|
||||
continue
|
||||
section = f":mod:`{package_namespace}.{module}`"
|
||||
section = f":mod:`{module}`"
|
||||
underline = "=" * (len(section) + 1)
|
||||
full_doc += f"""\
|
||||
module_doc += f"""
|
||||
{section}
|
||||
{underline}
|
||||
|
||||
@@ -291,16 +314,26 @@ def _construct_doc(
|
||||
:no-members:
|
||||
:no-inherited-members:
|
||||
|
||||
"""
|
||||
|
||||
index_autosummary += f"""
|
||||
:ref:`{module}`
|
||||
{'^' * (len(module) + 5)}
|
||||
"""
|
||||
|
||||
if classes:
|
||||
full_doc += f"""\
|
||||
Classes
|
||||
--------------
|
||||
module_doc += f"""\
|
||||
**Classes**
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. autosummary::
|
||||
:toctree: {module}
|
||||
"""
|
||||
index_autosummary += """
|
||||
**Classes**
|
||||
|
||||
.. autosummary::
|
||||
"""
|
||||
|
||||
for class_ in sorted(classes, key=lambda c: c["qualified_name"]):
|
||||
@@ -317,19 +350,22 @@ Classes
|
||||
else:
|
||||
template = "class.rst"
|
||||
|
||||
full_doc += f"""\
|
||||
module_doc += f"""\
|
||||
:template: {template}
|
||||
|
||||
{class_["qualified_name"]}
|
||||
|
||||
"""
|
||||
index_autosummary += f"""
|
||||
{class_['qualified_name']}
|
||||
"""
|
||||
|
||||
if functions:
|
||||
_functions = [f["qualified_name"] for f in functions]
|
||||
fstring = "\n ".join(sorted(_functions))
|
||||
full_doc += f"""\
|
||||
Functions
|
||||
--------------
|
||||
module_doc += f"""\
|
||||
**Functions**
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. autosummary::
|
||||
@@ -338,11 +374,18 @@ Functions
|
||||
|
||||
{fstring}
|
||||
|
||||
"""
|
||||
|
||||
index_autosummary += f"""
|
||||
**Functions**
|
||||
|
||||
.. autosummary::
|
||||
|
||||
{fstring}
|
||||
"""
|
||||
if deprecated_classes:
|
||||
full_doc += f"""\
|
||||
Deprecated classes
|
||||
--------------
|
||||
module_doc += f"""\
|
||||
**Deprecated classes**
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
@@ -350,6 +393,12 @@ Deprecated classes
|
||||
:toctree: {module}
|
||||
"""
|
||||
|
||||
index_autosummary += """
|
||||
**Deprecated classes**
|
||||
|
||||
.. autosummary::
|
||||
"""
|
||||
|
||||
for class_ in sorted(deprecated_classes, key=lambda c: c["qualified_name"]):
|
||||
if class_["kind"] == "TypedDict":
|
||||
template = "typeddict.rst"
|
||||
@@ -364,19 +413,21 @@ Deprecated classes
|
||||
else:
|
||||
template = "class.rst"
|
||||
|
||||
full_doc += f"""\
|
||||
module_doc += f"""\
|
||||
:template: {template}
|
||||
|
||||
{class_["qualified_name"]}
|
||||
|
||||
"""
|
||||
index_autosummary += f"""
|
||||
{class_['qualified_name']}
|
||||
"""
|
||||
|
||||
if deprecated_functions:
|
||||
_functions = [f["qualified_name"] for f in deprecated_functions]
|
||||
fstring = "\n ".join(sorted(_functions))
|
||||
full_doc += f"""\
|
||||
Deprecated functions
|
||||
--------------
|
||||
module_doc += f"""\
|
||||
**Deprecated functions**
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
@@ -387,7 +438,18 @@ Deprecated functions
|
||||
{fstring}
|
||||
|
||||
"""
|
||||
return full_doc
|
||||
index_autosummary += f"""
|
||||
**Deprecated functions**
|
||||
|
||||
.. autosummary::
|
||||
|
||||
{fstring}
|
||||
|
||||
"""
|
||||
docs.append((f"{module}.rst", module_doc))
|
||||
docs.append(("index.rst", index_doc + index_autosummary))
|
||||
|
||||
return docs
|
||||
|
||||
|
||||
def _build_rst_file(package_name: str = "langchain") -> None:
|
||||
@@ -399,13 +461,14 @@ def _build_rst_file(package_name: str = "langchain") -> None:
|
||||
package_dir = _package_dir(package_name)
|
||||
package_members = _load_package_modules(package_dir)
|
||||
package_version = _get_package_version(package_dir)
|
||||
with open(_out_file_path(package_name), "w") as f:
|
||||
f.write(
|
||||
_doc_first_line(package_name)
|
||||
+ _construct_doc(
|
||||
_package_namespace(package_name), package_members, package_version
|
||||
)
|
||||
)
|
||||
output_dir = _out_file_path(package_name)
|
||||
os.mkdir(output_dir)
|
||||
rsts = _construct_doc(
|
||||
_package_namespace(package_name), package_members, package_version
|
||||
)
|
||||
for name, rst in rsts:
|
||||
with open(output_dir / name, "w") as f:
|
||||
f.write(rst)
|
||||
|
||||
|
||||
def _package_namespace(package_name: str) -> str:
|
||||
@@ -455,12 +518,117 @@ def _get_package_version(package_dir: Path) -> str:
|
||||
|
||||
def _out_file_path(package_name: str) -> Path:
|
||||
"""Return the path to the file containing the documentation."""
|
||||
return HERE / f"{package_name.replace('-', '_')}_api_reference.rst"
|
||||
return HERE / f"{package_name.replace('-', '_')}"
|
||||
|
||||
|
||||
def _doc_first_line(package_name: str) -> str:
|
||||
"""Return the path to the file containing the documentation."""
|
||||
return f".. {package_name.replace('-', '_')}_api_reference:\n\n"
|
||||
def _build_index(dirs: List[str]) -> None:
|
||||
custom_names = {
|
||||
"airbyte": "Airbyte",
|
||||
"aws": "AWS",
|
||||
"ai21": "AI21",
|
||||
}
|
||||
ordered = ["core", "langchain", "text-splitters", "community", "experimental"]
|
||||
main_ = [dir_ for dir_ in ordered if dir_ in dirs]
|
||||
integrations = sorted(dir_ for dir_ in dirs if dir_ not in main_)
|
||||
main_headers = [
|
||||
" ".join(custom_names.get(x, x.title()) for x in dir_.split("-"))
|
||||
for dir_ in main_
|
||||
]
|
||||
integration_headers = [
|
||||
" ".join(
|
||||
custom_names.get(x, x.title().replace("ai", "AI").replace("db", "DB"))
|
||||
for x in dir_.split("-")
|
||||
)
|
||||
for dir_ in integrations
|
||||
]
|
||||
main_tree = "\n".join(
|
||||
f"{header_name}<{dir_.replace('-', '_')}/index>"
|
||||
for header_name, dir_ in zip(main_headers, main_)
|
||||
)
|
||||
main_grid = "\n".join(
|
||||
f'- header: "**{header_name}**"\n content: "{_package_namespace(dir_).replace("_", "-")}: {_get_package_version(_package_dir(dir_))}"\n link: {dir_.replace("-", "_")}/index.html'
|
||||
for header_name, dir_ in zip(main_headers, main_)
|
||||
)
|
||||
integration_tree = "\n".join(
|
||||
f"{header_name}<{dir_.replace('-', '_')}/index>"
|
||||
for header_name, dir_ in zip(integration_headers, integrations)
|
||||
)
|
||||
|
||||
integration_grid = ""
|
||||
integrations_to_show = [
|
||||
"openai",
|
||||
"anthropic",
|
||||
"google-vertexai",
|
||||
"aws",
|
||||
"huggingface",
|
||||
"mistralai",
|
||||
]
|
||||
for header_name, dir_ in sorted(
|
||||
zip(integration_headers, integrations),
|
||||
key=lambda h_d: integrations_to_show.index(h_d[1])
|
||||
if h_d[1] in integrations_to_show
|
||||
else len(integrations_to_show),
|
||||
)[: len(integrations_to_show)]:
|
||||
integration_grid += f'\n- header: "**{header_name}**"\n content: {_package_namespace(dir_).replace("_", "-")} {_get_package_version(_package_dir(dir_))}\n link: {dir_.replace("-", "_")}/index.html'
|
||||
doc = f"""# LangChain Python API Reference
|
||||
|
||||
Welcome to the LangChain Python API reference. This is a reference for all
|
||||
`langchain-x` packages.
|
||||
|
||||
For user guides see [https://python.langchain.com](https://python.langchain.com).
|
||||
|
||||
For the legacy API reference hosted on ReadTheDocs see [https://api.python.langchain.com/](https://api.python.langchain.com/).
|
||||
|
||||
## Base packages
|
||||
|
||||
```{{gallery-grid}}
|
||||
:grid-columns: "1 2 2 3"
|
||||
|
||||
{main_grid}
|
||||
```
|
||||
|
||||
```{{toctree}}
|
||||
:maxdepth: 2
|
||||
:hidden:
|
||||
:caption: Base packages
|
||||
|
||||
{main_tree}
|
||||
```
|
||||
|
||||
## Integrations
|
||||
|
||||
```{{gallery-grid}}
|
||||
:grid-columns: "1 2 2 3"
|
||||
|
||||
{integration_grid}
|
||||
```
|
||||
|
||||
See the full list of integrations in the Section Navigation.
|
||||
|
||||
```{{toctree}}
|
||||
:maxdepth: 2
|
||||
:hidden:
|
||||
:caption: Integrations
|
||||
|
||||
{integration_tree}
|
||||
```
|
||||
|
||||
"""
|
||||
with open(HERE / "reference.md", "w") as f:
|
||||
f.write(doc)
|
||||
|
||||
dummy_index = """\
|
||||
# API reference
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 3
|
||||
:hidden:
|
||||
|
||||
Reference<reference>
|
||||
```
|
||||
"""
|
||||
with open(HERE / "index.md", "w") as f:
|
||||
f.write(dummy_index)
|
||||
|
||||
|
||||
def main(dirs: Optional[list] = None) -> None:
|
||||
@@ -488,6 +656,8 @@ def main(dirs: Optional[list] = None) -> None:
|
||||
else:
|
||||
print("Building package:", dir_)
|
||||
_build_rst_file(package_name=dir_)
|
||||
|
||||
_build_index(dirs)
|
||||
print("API reference files built.")
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
=============
|
||||
LangChain API
|
||||
=============
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
api_reference.rst
|
||||
@@ -1,17 +1,11 @@
|
||||
-e libs/experimental
|
||||
-e libs/langchain
|
||||
-e libs/core
|
||||
-e libs/community
|
||||
pydantic<2
|
||||
autodoc_pydantic==1.8.0
|
||||
myst_parser
|
||||
nbsphinx==0.8.9
|
||||
sphinx>=5
|
||||
sphinx-autobuild==2021.3.14
|
||||
sphinx_rtd_theme==1.0.0
|
||||
sphinx-typlog-theme==0.8.0
|
||||
sphinx-panels
|
||||
toml
|
||||
myst_nb
|
||||
sphinx_copybutton
|
||||
pydata-sphinx-theme==0.13.1
|
||||
autodoc_pydantic>=1,<2
|
||||
sphinx<=7
|
||||
myst-parser>=3
|
||||
sphinx-autobuild>=2024
|
||||
pydata-sphinx-theme>=0.15
|
||||
toml>=0.10.2
|
||||
myst-nb>=1.1.1
|
||||
pyyaml
|
||||
sphinx-design
|
||||
sphinx-copybutton
|
||||
beautifulsoup4
|
||||
41
docs/api_reference/scripts/custom_formatter.py
Normal file
41
docs/api_reference/scripts/custom_formatter.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import sys
|
||||
from glob import glob
|
||||
from pathlib import Path
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
CUR_DIR = Path(__file__).parents[1]
|
||||
|
||||
|
||||
def process_toc_h3_elements(html_content: str) -> str:
|
||||
"""Update Class.method() TOC headers to just method()."""
|
||||
# Create a BeautifulSoup object
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
|
||||
# Find all <li> elements with class "toc-h3"
|
||||
toc_h3_elements = soup.find_all("li", class_="toc-h3")
|
||||
|
||||
# Process each element
|
||||
for element in toc_h3_elements:
|
||||
element = element.a.code.span
|
||||
# Get the text content of the element
|
||||
content = element.get_text()
|
||||
|
||||
# Apply the regex substitution
|
||||
modified_content = content.split(".")[-1]
|
||||
|
||||
# Update the element's content
|
||||
element.string = modified_content
|
||||
|
||||
# Return the modified HTML
|
||||
return str(soup)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
dir = sys.argv[1]
|
||||
for fn in glob(str(f"{dir.rstrip('/')}/**/*.html"), recursive=True):
|
||||
with open(fn, "r") as f:
|
||||
html = f.read()
|
||||
processed_html = process_toc_h3_elements(html)
|
||||
with open(fn, "w") as f:
|
||||
f.write(processed_html)
|
||||
@@ -1,4 +1,4 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ objname }}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
.. autosummary::
|
||||
{% for item in attributes %}
|
||||
~{{ name }}.{{ item }}
|
||||
~{{ item }}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
@@ -22,11 +22,11 @@
|
||||
|
||||
.. autosummary::
|
||||
{% for item in methods %}
|
||||
~{{ name }}.{{ item }}
|
||||
~{{ item }}
|
||||
{%- endfor %}
|
||||
|
||||
{% for item in methods %}
|
||||
.. automethod:: {{ name }}.{{ item }}
|
||||
.. automethod:: {{ item }}
|
||||
{%- endfor %}
|
||||
|
||||
{% endif %}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ objname }}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ objname }}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
12
docs/api_reference/templates/langchain_docs.html
Normal file
12
docs/api_reference/templates/langchain_docs.html
Normal file
@@ -0,0 +1,12 @@
|
||||
<!-- This will display a link to LangChain docs -->
|
||||
<head>
|
||||
<style>
|
||||
.text-link {
|
||||
text-decoration: none; /* Remove underline */
|
||||
color: inherit; /* Inherit color from parent element */
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<a href="https://python.langchain.com/" class='text-link'>Docs</a>
|
||||
</body>
|
||||
@@ -1,4 +1,4 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ objname }}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ objname }}
|
||||
{{ underline }}==============
|
||||
|
||||
.. NOTE:: {{objname}} implements the standard :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>`. 🏃
|
||||
|
||||
The :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>` has additional methods that are available on runnables, such as :py:meth:`with_types <langchain_core.runnables.base.Runnable.with_types>`, :py:meth:`with_retry <langchain_core.runnables.base.Runnable.with_retry>`, :py:meth:`assign <langchain_core.runnables.base.Runnable.assign>`, :py:meth:`bind <langchain_core.runnables.base.Runnable.bind>`, :py:meth:`get_graph <langchain_core.runnables.base.Runnable.get_graph>`, and more.
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
.. autoclass:: {{ objname }}
|
||||
|
||||
.. NOTE:: {{objname}} implements the standard :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>`. 🏃
|
||||
|
||||
The :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>` has additional methods that are available on runnables, such as :py:meth:`with_types <langchain_core.runnables.base.Runnable.with_types>`, :py:meth:`with_retry <langchain_core.runnables.base.Runnable.with_retry>`, :py:meth:`assign <langchain_core.runnables.base.Runnable.assign>`, :py:meth:`bind <langchain_core.runnables.base.Runnable.bind>`, :py:meth:`get_graph <langchain_core.runnables.base.Runnable.get_graph>`, and more.
|
||||
|
||||
{% block attributes %}
|
||||
{% if attributes %}
|
||||
.. rubric:: {{ _('Attributes') }}
|
||||
|
||||
.. autosummary::
|
||||
{% for item in attributes %}
|
||||
~{{ name }}.{{ item }}
|
||||
~{{ item }}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
@@ -26,11 +26,11 @@
|
||||
|
||||
.. autosummary::
|
||||
{% for item in methods %}
|
||||
~{{ name }}.{{ item }}
|
||||
~{{ item }}
|
||||
{%- endfor %}
|
||||
|
||||
{% for item in methods %}
|
||||
.. automethod:: {{ name }}.{{ item }}
|
||||
.. automethod:: {{ item }}
|
||||
{%- endfor %}
|
||||
|
||||
{% endif %}
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ objname }}
|
||||
{{ underline }}==============
|
||||
|
||||
.. NOTE:: {{objname}} implements the standard :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>`. 🏃
|
||||
|
||||
The :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>` has additional methods that are available on runnables, such as :py:meth:`with_types <langchain_core.runnables.base.Runnable.with_types>`, :py:meth:`with_retry <langchain_core.runnables.base.Runnable.with_retry>`, :py:meth:`assign <langchain_core.runnables.base.Runnable.assign>`, :py:meth:`bind <langchain_core.runnables.base.Runnable.bind>`, :py:meth:`get_graph <langchain_core.runnables.base.Runnable.get_graph>`, and more.
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
.. autopydantic_model:: {{ objname }}
|
||||
@@ -19,6 +15,10 @@
|
||||
:member-order: groupwise
|
||||
:show-inheritance: True
|
||||
:special-members: __call__
|
||||
:exclude-members: construct, copy, dict, from_orm, parse_file, parse_obj, parse_raw, schema, schema_json, update_forward_refs, validate, json, is_lc_serializable, to_json_not_implemented, lc_secrets, lc_attributes, lc_id, get_lc_namespace, astream_log, transform, atransform, get_output_schema, get_prompts, config_schema, map, pick, pipe, with_listeners, with_alisteners, with_config, with_fallbacks, with_types, with_retry, InputType, OutputType, config_specs, output_schema, get_input_schema, get_graph, get_name, input_schema, name, bind, assign
|
||||
:exclude-members: construct, copy, dict, from_orm, parse_file, parse_obj, parse_raw, schema, schema_json, update_forward_refs, validate, json, is_lc_serializable, to_json_not_implemented, lc_secrets, lc_attributes, lc_id, get_lc_namespace, astream_log, transform, atransform, get_output_schema, get_prompts, config_schema, map, pick, pipe, with_listeners, with_alisteners, with_config, with_fallbacks, with_types, with_retry, InputType, OutputType, config_specs, output_schema, get_input_schema, get_graph, get_name, input_schema, name, bind, assign, as_tool
|
||||
|
||||
.. NOTE:: {{objname}} implements the standard :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>`. 🏃
|
||||
|
||||
The :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>` has additional methods that are available on runnables, such as :py:meth:`with_types <langchain_core.runnables.base.Runnable.with_types>`, :py:meth:`with_retry <langchain_core.runnables.base.Runnable.with_retry>`, :py:meth:`assign <langchain_core.runnables.base.Runnable.assign>`, :py:meth:`bind <langchain_core.runnables.base.Runnable.bind>`, :py:meth:`get_graph <langchain_core.runnables.base.Runnable.get_graph>`, and more.
|
||||
|
||||
.. example_links:: {{ objname }}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ objname }}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
Copyright (c) 2007-2023 The scikit-learn developers.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
@@ -1,67 +0,0 @@
|
||||
<script>
|
||||
$(document).ready(function() {
|
||||
/* Add a [>>>] button on the top-right corner of code samples to hide
|
||||
* the >>> and ... prompts and the output and thus make the code
|
||||
* copyable. */
|
||||
var div = $('.highlight-python .highlight,' +
|
||||
'.highlight-python3 .highlight,' +
|
||||
'.highlight-pycon .highlight,' +
|
||||
'.highlight-default .highlight')
|
||||
var pre = div.find('pre');
|
||||
|
||||
// get the styles from the current theme
|
||||
pre.parent().parent().css('position', 'relative');
|
||||
var hide_text = 'Hide prompts and outputs';
|
||||
var show_text = 'Show prompts and outputs';
|
||||
|
||||
// create and add the button to all the code blocks that contain >>>
|
||||
div.each(function(index) {
|
||||
var jthis = $(this);
|
||||
if (jthis.find('.gp').length > 0) {
|
||||
var button = $('<span class="copybutton">>>></span>');
|
||||
button.attr('title', hide_text);
|
||||
button.data('hidden', 'false');
|
||||
jthis.prepend(button);
|
||||
}
|
||||
// tracebacks (.gt) contain bare text elements that need to be
|
||||
// wrapped in a span to work with .nextUntil() (see later)
|
||||
jthis.find('pre:has(.gt)').contents().filter(function() {
|
||||
return ((this.nodeType == 3) && (this.data.trim().length > 0));
|
||||
}).wrap('<span>');
|
||||
});
|
||||
|
||||
// define the behavior of the button when it's clicked
|
||||
$('.copybutton').click(function(e){
|
||||
e.preventDefault();
|
||||
var button = $(this);
|
||||
if (button.data('hidden') === 'false') {
|
||||
// hide the code output
|
||||
button.parent().find('.go, .gp, .gt').hide();
|
||||
button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'hidden');
|
||||
button.css('text-decoration', 'line-through');
|
||||
button.attr('title', show_text);
|
||||
button.data('hidden', 'true');
|
||||
} else {
|
||||
// show the code output
|
||||
button.parent().find('.go, .gp, .gt').show();
|
||||
button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'visible');
|
||||
button.css('text-decoration', 'none');
|
||||
button.attr('title', hide_text);
|
||||
button.data('hidden', 'false');
|
||||
}
|
||||
});
|
||||
|
||||
/*** Add permalink buttons next to glossary terms ***/
|
||||
$('dl.glossary > dt[id]').append(function() {
|
||||
return ('<a class="headerlink" href="#' +
|
||||
this.getAttribute('id') +
|
||||
'" title="Permalink to this term">¶</a>');
|
||||
});
|
||||
});
|
||||
|
||||
</script>
|
||||
{%- if pagename != 'index' and pagename != 'documentation' %}
|
||||
{% if theme_mathjax_path %}
|
||||
<script id="MathJax-script" async src="{{ theme_mathjax_path }}"></script>
|
||||
{% endif %}
|
||||
{%- endif %}
|
||||
@@ -1,135 +0,0 @@
|
||||
{# TEMPLATE VAR SETTINGS #}
|
||||
{%- set url_root = pathto('', 1) %}
|
||||
{%- if url_root == '#' %}{% set url_root = '' %}{% endif %}
|
||||
{%- if not embedded and docstitle %}
|
||||
{%- set titlesuffix = " — "|safe + docstitle|e %}
|
||||
{%- else %}
|
||||
{%- set titlesuffix = "" %}
|
||||
{%- endif %}
|
||||
{%- set lang_attr = 'en' %}
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="{{ lang_attr }}" > <![endif]-->
|
||||
<!--[if gt IE 8]><!-->
|
||||
<html class="no-js" lang="{{ lang_attr }}"> <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
{{ metatags }}
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
{% block htmltitle %}
|
||||
<title>{{ title|striptags|e }}{{ titlesuffix }}</title>
|
||||
{% endblock %}
|
||||
<link rel="canonical"
|
||||
href="https://api.python.langchain.com/en/latest/{{ pagename }}.html"/>
|
||||
|
||||
{% if favicon_url %}
|
||||
<link rel="shortcut icon" href="{{ favicon_url|e }}"/>
|
||||
{% endif %}
|
||||
|
||||
<link rel="stylesheet"
|
||||
href="{{ pathto('_static/css/vendor/bootstrap.min.css', 1) }}"
|
||||
type="text/css"/>
|
||||
{%- for css in css_files %}
|
||||
{%- if css|attr("rel") %}
|
||||
<link rel="{{ css.rel }}" href="{{ pathto(css.filename, 1) }}"
|
||||
type="text/css"{% if css.title is not none %}
|
||||
title="{{ css.title }}"{% endif %} />
|
||||
{%- else %}
|
||||
<link rel="stylesheet" href="{{ pathto(css, 1) }}" type="text/css"/>
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
<link rel="stylesheet" href="{{ pathto('_static/' + style, 1) }}" type="text/css"/>
|
||||
<script id="documentation_options" data-url_root="{{ pathto('', 1) }}"
|
||||
src="{{ pathto('_static/documentation_options.js', 1) }}"></script>
|
||||
<script src="{{ pathto('_static/jquery.js', 1) }}"></script>
|
||||
{%- block extrahead %} {% endblock %}
|
||||
</head>
|
||||
<body>
|
||||
<div class="banner">
|
||||
<p>This is a legacy site. Please use the latest <a href="https://python.langchain.com/v0.2/api_reference/reference.html">v0.2</a> and <a href="https://python.langchain.com/api_reference/">v0.3</a> API references instead.</p>
|
||||
</div>
|
||||
{% include "nav.html" %}
|
||||
{%- block content %}
|
||||
<div class="d-flex" id="sk-doc-wrapper">
|
||||
<input type="checkbox" name="sk-toggle-checkbox" id="sk-toggle-checkbox">
|
||||
<label id="sk-sidemenu-toggle" class="sk-btn-toggle-toc btn sk-btn-primary"
|
||||
for="sk-toggle-checkbox">Toggle Menu</label>
|
||||
<div id="sk-sidebar-wrapper" class="border-right">
|
||||
<div class="sk-sidebar-toc-wrapper">
|
||||
{%- if meta and meta['parenttoc']|tobool %}
|
||||
<div class="sk-sidebar-toc">
|
||||
{% set nav = get_nav_object(maxdepth=3, collapse=True, numbered=True) %}
|
||||
<ul>
|
||||
{% for main_nav_item in nav %}
|
||||
{% if main_nav_item.active %}
|
||||
<li>
|
||||
<a href="{{ main_nav_item.url }}"
|
||||
class="sk-toc-active">{{ main_nav_item.title }}</a>
|
||||
</li>
|
||||
<ul>
|
||||
{% for nav_item in main_nav_item.children %}
|
||||
<li>
|
||||
<a href="{{ nav_item.url }}"
|
||||
class="{% if nav_item.active %}sk-toc-active{% endif %}">{{ nav_item.title }}</a>
|
||||
{% if nav_item.children %}
|
||||
<ul>
|
||||
{% for inner_child in nav_item.children %}
|
||||
<li class="sk-toctree-l3">
|
||||
<a href="{{ inner_child.url }}">{{ inner_child.title }}</a>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{%- elif meta and meta['globalsidebartoc']|tobool %}
|
||||
<div class="sk-sidebar-toc sk-sidebar-global-toc">
|
||||
{{ toctree(maxdepth=2, titles_only=True) }}
|
||||
</div>
|
||||
{%- else %}
|
||||
<div class="sk-sidebar-toc">
|
||||
{{ toc }}
|
||||
</div>
|
||||
{%- endif %}
|
||||
</div>
|
||||
</div>
|
||||
<div id="sk-page-content-wrapper">
|
||||
<div class="sk-page-content container-fluid body px-md-3" role="main">
|
||||
{% block body %}{% endblock %}
|
||||
</div>
|
||||
<div class="container">
|
||||
<footer class="sk-content-footer">
|
||||
{%- if pagename != 'index' %}
|
||||
{%- if show_copyright %}
|
||||
{%- if hasdoc('copyright') %}
|
||||
{% trans path=pathto('copyright'), copyright=copyright|e %}
|
||||
© {{ copyright }}.{% endtrans %}
|
||||
{%- else %}
|
||||
{% trans copyright=copyright|e %}© {{ copyright }}
|
||||
.{% endtrans %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if last_updated %}
|
||||
{% trans last_updated=last_updated|e %}Last updated
|
||||
on {{ last_updated }}.{% endtrans %}
|
||||
{%- endif %}
|
||||
{%- if show_source and has_source and sourcename %}
|
||||
<a href="{{ pathto('_sources/' + sourcename, true)|e }}"
|
||||
rel="nofollow">{{ _('Show this page source') }}</a>
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{%- endblock %}
|
||||
<script src="{{ pathto('_static/js/vendor/bootstrap.min.js', 1) }}"></script>
|
||||
{% include "javascript.html" %}
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,78 +0,0 @@
|
||||
{%- if pagename != 'index' and pagename != 'documentation' %}
|
||||
{%- set nav_bar_class = "sk-docs-navbar" %}
|
||||
{%- set top_container_cls = "sk-docs-container" %}
|
||||
{%- else %}
|
||||
{%- set nav_bar_class = "sk-landing-navbar" %}
|
||||
{%- set top_container_cls = "sk-landing-container" %}
|
||||
{%- endif %}
|
||||
|
||||
<nav id="navbar" class="{{ nav_bar_class }} navbar navbar-expand-md navbar-light bg-light py-0">
|
||||
<div class="container-fluid {{ top_container_cls }} px-0">
|
||||
{%- if logo_url %}
|
||||
<a class="navbar-brand py-0" href="{{ pathto('index') }}">
|
||||
<img
|
||||
class="sk-brand-img"
|
||||
src="{{ logo_url|e }}"
|
||||
alt="logo"/>
|
||||
</a>
|
||||
{%- endif %}
|
||||
<button
|
||||
id="sk-navbar-toggler"
|
||||
class="navbar-toggler"
|
||||
type="button"
|
||||
data-toggle="collapse"
|
||||
data-target="#navbarSupportedContent"
|
||||
aria-controls="navbarSupportedContent"
|
||||
aria-expanded="false"
|
||||
aria-label="Toggle navigation"
|
||||
>
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
|
||||
<div class="sk-navbar-collapse collapse navbar-collapse" id="navbarSupportedContent">
|
||||
<ul class="navbar-nav mr-auto">
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('langchain_api_reference') }}">LangChain</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('core_api_reference') }}">Core</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('community_api_reference') }}">Community</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('experimental_api_reference') }}">Experimental</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('text_splitters_api_reference') }}">Text splitters</a>
|
||||
</li>
|
||||
{%- for title, pathname in partners %}
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link nav-more-item-mobile-items" href="{{ pathto(pathname) }}">{{ title }}</a>
|
||||
</li>
|
||||
{%- endfor %}
|
||||
<li class="nav-item dropdown nav-more-item-dropdown">
|
||||
<a class="sk-nav-link nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Partner libs</a>
|
||||
<div class="dropdown-menu" aria-labelledby="navbarDropdown">
|
||||
{%- for title, pathname in partners %}
|
||||
<a class="sk-nav-dropdown-item dropdown-item" href="{{ pathto(pathname) }}">{{ title }}</a>
|
||||
{%- endfor %}
|
||||
</div>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" target="_blank" rel="noopener noreferrer" href="https://python.langchain.com/">Docs</a>
|
||||
</li>
|
||||
</ul>
|
||||
{%- if pagename != "search"%}
|
||||
<div id="searchbox" role="search">
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="{{ pathto('search') }}" method="get">
|
||||
<input class="sk-search-text-input" type="text" name="q" aria-labelledby="searchlabel" />
|
||||
<input class="sk-search-text-btn" type="submit" value="{{ _('Go') }}" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
{%- endif %}
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
@@ -1,16 +0,0 @@
|
||||
{%- extends "basic/search.html" %}
|
||||
{% block extrahead %}
|
||||
<script type="text/javascript" src="{{ pathto('_static/underscore.js', 1) }}"></script>
|
||||
<script type="text/javascript" src="{{ pathto('searchindex.js', 1) }}" defer></script>
|
||||
<script type="text/javascript" src="{{ pathto('_static/doctools.js', 1) }}"></script>
|
||||
<script type="text/javascript" src="{{ pathto('_static/language_data.js', 1) }}"></script>
|
||||
<script type="text/javascript" src="{{ pathto('_static/searchtools.js', 1) }}"></script>
|
||||
<script type="text/javascript" src="{{ pathto('_static/sphinx_highlight.js', 1) }}"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() {
|
||||
if (!Search.out) {
|
||||
Search.init();
|
||||
}
|
||||
});
|
||||
</script>
|
||||
{% endblock %}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,8 +0,0 @@
|
||||
[theme]
|
||||
inherit = basic
|
||||
pygments_style = default
|
||||
stylesheet = css/theme.css
|
||||
|
||||
[options]
|
||||
link_to_live_contributing_page = false
|
||||
mathjax_path =
|
||||
@@ -63,7 +63,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# <!-- ruff: noqa: F821 -->\n",
|
||||
"from langchain.globals import set_llm_cache"
|
||||
"from langchain_core.globals import set_llm_cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -103,7 +103,7 @@
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"from langchain.cache import InMemoryCache\n",
|
||||
"from langchain_core.caches import InMemoryCache\n",
|
||||
"\n",
|
||||
"set_llm_cache(InMemoryCache())\n",
|
||||
"\n",
|
||||
|
||||
@@ -63,7 +63,7 @@
|
||||
"* The `load` methods is a convenience method meant solely for prototyping work -- it just invokes `list(self.lazy_load())`.\n",
|
||||
"* The `alazy_load` has a default implementation that will delegate to `lazy_load`. If you're using async, we recommend overriding the default implementation and providing a native async implementation.\n",
|
||||
"\n",
|
||||
"::: {.callout-important}\n",
|
||||
":::{.callout-important}\n",
|
||||
"When implementing a document loader do **NOT** provide parameters via the `lazy_load` or `alazy_load` methods.\n",
|
||||
"\n",
|
||||
"All configuration is expected to be passed through the initializer (__init__). This was a design choice made by LangChain to make sure that once a document loader has been instantiated it has all the information needed to load documents.\n",
|
||||
@@ -235,7 +235,7 @@
|
||||
"id": "56cb443e-f987-4386-b4ec-975ee129adb2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"::: {.callout-tip}\n",
|
||||
":::{.callout-tip}\n",
|
||||
"\n",
|
||||
"`load()` can be helpful in an interactive environment such as a jupyter notebook.\n",
|
||||
"\n",
|
||||
@@ -276,7 +276,7 @@
|
||||
"source": [
|
||||
"## Working with Files\n",
|
||||
"\n",
|
||||
"Many document loaders invovle parsing files. The difference between such loaders usually stems from how the file is parsed rather than how the file is loaded. For example, you can use `open` to read the binary content of either a PDF or a markdown file, but you need different parsing logic to convert that binary data into text.\n",
|
||||
"Many document loaders involve parsing files. The difference between such loaders usually stems from how the file is parsed, rather than how the file is loaded. For example, you can use `open` to read the binary content of either a PDF or a markdown file, but you need different parsing logic to convert that binary data into text.\n",
|
||||
"\n",
|
||||
"As a result, it can be helpful to decouple the parsing logic from the loading logic, which makes it easier to re-use a given parser regardless of how the data was loaded.\n",
|
||||
"\n",
|
||||
@@ -355,7 +355,7 @@
|
||||
"id": "433bfb7c-7767-43bc-b71e-42413d7494a8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Using the **blob** API also allows one to load content direclty from memory without having to read it from a file!"
|
||||
"Using the **blob** API also allows one to load content directly from memory without having to read it from a file!"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -182,7 +182,7 @@ pprint(data)
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
Another option is set `jq_schema='.'` and provide `content_key`:
|
||||
Another option is to set `jq_schema='.'` and provide `content_key`:
|
||||
|
||||
```python
|
||||
loader = JSONLoader(
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -364,7 +364,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.graph_qa.cypher_utils import CypherQueryCorrector, Schema\n",
|
||||
"from langchain_community.chains.graph_qa.cypher_utils import (\n",
|
||||
" CypherQueryCorrector,\n",
|
||||
" Schema,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Cypher validation tool for relationship directions\n",
|
||||
"corrector_schema = [\n",
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.globals import set_llm_cache\n",
|
||||
"from langchain_core.globals import set_llm_cache\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"# To make the caching really obvious, lets use a slower and older model.\n",
|
||||
@@ -71,7 +71,7 @@
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"from langchain.cache import InMemoryCache\n",
|
||||
"from langchain_core.caches import InMemoryCache\n",
|
||||
"\n",
|
||||
"set_llm_cache(InMemoryCache())\n",
|
||||
"\n",
|
||||
|
||||
@@ -38,8 +38,8 @@
|
||||
" Operator,\n",
|
||||
" StructuredQuery,\n",
|
||||
")\n",
|
||||
"from langchain.retrievers.self_query.chroma import ChromaTranslator\n",
|
||||
"from langchain.retrievers.self_query.elasticsearch import ElasticsearchTranslator\n",
|
||||
"from langchain_community.query_constructors.chroma import ChromaTranslator\n",
|
||||
"from langchain_community.query_constructors.elasticsearch import ElasticsearchTranslator\n",
|
||||
"from langchain_core.pydantic_v1 import BaseModel"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -512,7 +512,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.retrievers.self_query.chroma import ChromaTranslator\n",
|
||||
"from langchain_community.query_constructors.chroma import ChromaTranslator\n",
|
||||
"\n",
|
||||
"retriever = SelfQueryRetriever(\n",
|
||||
" query_constructor=query_constructor,\n",
|
||||
|
||||
@@ -299,16 +299,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "423c6e099e94ca69",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"### Gradient\n",
|
||||
"\n",
|
||||
"In this method, the gradient of distance is used to split chunks along with the percentile method.\n",
|
||||
"This method is useful when chunks are highly correlated with each other or specific to a domain e.g. legal or medical. The idea is to apply anomaly detection on gradient array so that the distribution become wider and easy to identify boundaries in highly semantic data."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "423c6e099e94ca69"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -325,6 +325,8 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "e9f393d316ce1f6c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -337,13 +339,13 @@
|
||||
"source": [
|
||||
"docs = text_splitter.create_documents([state_of_the_union])\n",
|
||||
"print(docs[0].page_content)"
|
||||
],
|
||||
"metadata": {},
|
||||
"id": "e9f393d316ce1f6c"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "a407cd57f02a0db4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -355,9 +357,7 @@
|
||||
],
|
||||
"source": [
|
||||
"print(len(docs))"
|
||||
],
|
||||
"metadata": {},
|
||||
"id": "a407cd57f02a0db4"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -6,6 +6,8 @@ keywords: [compatibility]
|
||||
|
||||
# Chat models
|
||||
|
||||
[Chat models](/docs/concepts/#chat-models) are language models that use a sequence of [messages](/docs/concepts/#messages) as inputs and return messages as outputs (as opposed to using plain text). These are generally newer models.
|
||||
|
||||
:::info
|
||||
|
||||
If you'd like to write your own chat model, see [this how-to](/docs/how_to/custom_chat_model/).
|
||||
|
||||
@@ -226,8 +226,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools import tool\n",
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"from langchain_core.tools import tool\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class WeatherInput(BaseModel):\n",
|
||||
|
||||
@@ -53,7 +53,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"TOGETHER_API_KEY\"] = getpass.getpass(\"Enter your Together API key: \")"
|
||||
"if \"TOGETHER_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"TOGETHER_API_KEY\"] = getpass.getpass(\"Enter your Together API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -87,21 +88,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "652d6238-1f87-422a-b135-f5abbb8652fc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.1.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-together"
|
||||
]
|
||||
@@ -113,14 +103,12 @@
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:\n",
|
||||
"\n",
|
||||
"- TODO: Update model instantiation with relevant params."
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 3,
|
||||
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -147,7 +135,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"id": "62e0dbc3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -156,10 +144,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore la programmation.\", response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 35, 'total_tokens': 44}, 'model_name': 'meta-llama/Llama-3-70b-chat-hf', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-79efa49b-dbaf-4ef8-9dce-958533823ef6-0', usage_metadata={'input_tokens': 35, 'output_tokens': 9, 'total_tokens': 44})"
|
||||
"AIMessage(content=\"J'adore la programmation.\", response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 35, 'total_tokens': 44}, 'model_name': 'meta-llama/Llama-3-70b-chat-hf', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-eabcbe33-cdd8-45b8-ab0b-f90b6e7dfad8-0', usage_metadata={'input_tokens': 35, 'output_tokens': 9, 'total_tokens': 44})"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -178,7 +166,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 5,
|
||||
"id": "d86145b3-bfef-46e8-b227-4dda5c9c2705",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -206,17 +194,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Ich liebe das Programmieren.', response_metadata={'token_usage': {'completion_tokens': 7, 'prompt_tokens': 30, 'total_tokens': 37}, 'model_name': 'meta-llama/Llama-3-70b-chat-hf', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-80bba5fa-1723-4242-8d5a-c09b76b8350b-0', usage_metadata={'input_tokens': 30, 'output_tokens': 7, 'total_tokens': 37})"
|
||||
"AIMessage(content='Ich liebe das Programmieren.', response_metadata={'token_usage': {'completion_tokens': 7, 'prompt_tokens': 30, 'total_tokens': 37}, 'model_name': 'meta-llama/Llama-3-70b-chat-hf', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-a249aa24-ee31-46ba-9bf9-f4eb135b0a95-0', usage_metadata={'input_tokens': 30, 'output_tokens': 7, 'total_tokens': 37})"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -271,7 +259,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
243
docs/docs/integrations/document_loaders/bshtml.ipynb
Normal file
243
docs/docs/integrations/document_loaders/bshtml.ipynb
Normal file
@@ -0,0 +1,243 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BSHTMLLoader\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with BeautifulSoup4 [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.html_bs.BSHTMLLoader.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [BSHTMLLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.html_bs.BSHTMLLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| BSHTMLLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access BSHTMLLoader document loader you'll need to install the `langchain-community` integration package and the `bs4` python package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed to use the `BSHTMLLoader` class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community** and **bs4**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community bs4"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:\n",
|
||||
"\n",
|
||||
"- TODO: Update model instantiation with relevant params."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import BSHTMLLoader\n",
|
||||
"\n",
|
||||
"loader = BSHTMLLoader(\n",
|
||||
" file_path=\"./example_data/fake-content.html\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/fake-content.html', 'title': 'Test Title'}, page_content='\\nTest Title\\n\\n\\nMy First Heading\\nMy first paragraph.\\n\\n\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/fake-content.html', 'title': 'Test Title'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/fake-content.html', 'title': 'Test Title'}, page_content='\\nTest Title\\n\\n\\nMy First Heading\\nMy first paragraph.\\n\\n\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []\n",
|
||||
"page[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Adding separator to BS4\n",
|
||||
"\n",
|
||||
"We can also pass a separator to use when calling get_text on the soup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='\n",
|
||||
", Test Title, \n",
|
||||
", \n",
|
||||
", \n",
|
||||
", My First Heading, \n",
|
||||
", My first paragraph., \n",
|
||||
", \n",
|
||||
", \n",
|
||||
"' metadata={'source': './example_data/fake-content.html', 'title': 'Test Title'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = BSHTMLLoader(\n",
|
||||
" file_path=\"./example_data/fake-content.html\", get_text_separator=\", \"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all BSHTMLLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.html_bs.BSHTMLLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
# Sample Markdown Document
|
||||
|
||||
## Introduction
|
||||
|
||||
Welcome to this sample Markdown document. Markdown is a lightweight markup language used for formatting text. It's widely used for documentation, readme files, and more.
|
||||
|
||||
## Features
|
||||
|
||||
### Headers
|
||||
|
||||
Markdown supports multiple levels of headers:
|
||||
|
||||
- **Header 1**: `# Header 1`
|
||||
- **Header 2**: `## Header 2`
|
||||
- **Header 3**: `### Header 3`
|
||||
|
||||
### Lists
|
||||
|
||||
#### Unordered List
|
||||
|
||||
- Item 1
|
||||
- Item 2
|
||||
- Subitem 2.1
|
||||
- Subitem 2.2
|
||||
|
||||
#### Ordered List
|
||||
|
||||
1. First item
|
||||
2. Second item
|
||||
3. Third item
|
||||
|
||||
### Links
|
||||
|
||||
[OpenAI](https://www.openai.com) is an AI research organization.
|
||||
|
||||
### Images
|
||||
|
||||
Here's an example image:
|
||||
|
||||

|
||||
|
||||
### Code
|
||||
|
||||
#### Inline Code
|
||||
|
||||
Use `code` for inline code snippets.
|
||||
|
||||
#### Code Block
|
||||
|
||||
```python
|
||||
def greet(name):
|
||||
return f"Hello, {name}!"
|
||||
|
||||
print(greet("World"))
|
||||
```
|
||||
@@ -30,6 +30,7 @@
|
||||
{
|
||||
"sender_name": "User 2",
|
||||
"timestamp_ms": 1675595060730,
|
||||
"content": "",
|
||||
"photos": [
|
||||
{"uri": "url_of_some_picture.jpg", "creation_timestamp": 1675595059}
|
||||
]
|
||||
|
||||
@@ -164,7 +164,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import GithubFileLoader"
|
||||
"from langchain_community.document_loaders import GithubFileLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -21,24 +21,24 @@ loader = CSVLoader(
|
||||
data = loader.load()
|
||||
```
|
||||
|
||||
## Common File Types
|
||||
|
||||
The below document loaders allow you to load data from common data formats.
|
||||
|
||||
<CategoryTable category="common_loaders" />
|
||||
|
||||
## PDFs
|
||||
|
||||
The below document loaders allow you to load documents.
|
||||
|
||||
<CategoryTable category="pdf_loaders" />
|
||||
|
||||
## Webpages
|
||||
|
||||
The below document loaders allow you to load webpages.
|
||||
|
||||
<CategoryTable category="webpage_loaders" />
|
||||
|
||||
## PDFs
|
||||
|
||||
The below document loaders allow you to load PDF documents.
|
||||
|
||||
<CategoryTable category="pdf_loaders" />
|
||||
|
||||
## Common File Types
|
||||
|
||||
The below document loaders allow you to load data from common data formats.
|
||||
|
||||
<CategoryTable category="common_loaders" />
|
||||
|
||||
|
||||
## All document loaders
|
||||
|
||||
|
||||
348
docs/docs/integrations/document_loaders/json.ipynb
Normal file
348
docs/docs/integrations/document_loaders/json.ipynb
Normal file
@@ -0,0 +1,348 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# JSONLoader\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with JSON [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all JSONLoader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.json_loader.JSONLoader.html).\n",
|
||||
"\n",
|
||||
"- TODO: Add any other relevant links, like information about underlying API, etc.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/file_loaders/json/)|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [JSONLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.json_loader.JSONLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ✅ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| JSONLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access JSON document loader you'll need to install the `langchain-community` integration package as well as the ``jq`` python package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are required to use the `JSONLoader` class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community** and **jq**:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community jq "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:\n",
|
||||
"\n",
|
||||
"- TODO: Update model instantiation with relevant params."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import JSONLoader\n",
|
||||
"\n",
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path=\"./example_data/facebook_chat.json\",\n",
|
||||
" jq_schema=\".messages[].content\",\n",
|
||||
" text_content=False,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat.json', 'seq_num': 1}, page_content='Bye!')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat.json', 'seq_num': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pages = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" pages.append(doc)\n",
|
||||
" if len(pages) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(pages)\n",
|
||||
"\n",
|
||||
" pages = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Read from JSON Lines file\n",
|
||||
"\n",
|
||||
"If you want to load documents from a JSON Lines file, you pass `json_lines=True`\n",
|
||||
"and specify `jq_schema` to extract `page_content` from a single JSON object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='Bye!' metadata={'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat_messages.jsonl', 'seq_num': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path=\"./example_data/facebook_chat_messages.jsonl\",\n",
|
||||
" jq_schema=\".content\",\n",
|
||||
" text_content=False,\n",
|
||||
" json_lines=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Read specific content keys\n",
|
||||
"\n",
|
||||
"Another option is to set `jq_schema='.'` and provide a `content_key` in order to only load specific content:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='User 2' metadata={'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat_messages.jsonl', 'seq_num': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path=\"./example_data/facebook_chat_messages.jsonl\",\n",
|
||||
" jq_schema=\".\",\n",
|
||||
" content_key=\"sender_name\",\n",
|
||||
" json_lines=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## JSON file with jq schema `content_key`\n",
|
||||
"\n",
|
||||
"To load documents from a JSON file using the `content_key` within the jq schema, set `is_content_key_jq_parsable=True`. Ensure that `content_key` is compatible and can be parsed using the jq schema."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='Bye!' metadata={'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat.json', 'seq_num': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path=\"./example_data/facebook_chat.json\",\n",
|
||||
" jq_schema=\".messages[]\",\n",
|
||||
" content_key=\".content\",\n",
|
||||
" is_content_key_jq_parsable=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Extracting metadata\n",
|
||||
"\n",
|
||||
"Generally, we want to include metadata available in the JSON file into the documents that we create from the content.\n",
|
||||
"\n",
|
||||
"The following demonstrates how metadata can be extracted using the `JSONLoader`.\n",
|
||||
"\n",
|
||||
"There are some key changes to be noted. In the previous example where we didn't collect the metadata, we managed to directly specify in the schema where the value for the `page_content` can be extracted from.\n",
|
||||
"\n",
|
||||
"In this example, we have to tell the loader to iterate over the records in the `messages` field. The jq_schema then has to be `.messages[]`\n",
|
||||
"\n",
|
||||
"This allows us to pass the records (dict) into the `metadata_func` that has to be implemented. The `metadata_func` is responsible for identifying which pieces of information in the record should be included in the metadata stored in the final `Document` object.\n",
|
||||
"\n",
|
||||
"Additionally, we now have to explicitly specify in the loader, via the `content_key` argument, the key from the record where the value for the `page_content` needs to be extracted from."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat.json', 'seq_num': 1, 'sender_name': 'User 2', 'timestamp_ms': 1675597571851}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Define the metadata extraction function.\n",
|
||||
"def metadata_func(record: dict, metadata: dict) -> dict:\n",
|
||||
" metadata[\"sender_name\"] = record.get(\"sender_name\")\n",
|
||||
" metadata[\"timestamp_ms\"] = record.get(\"timestamp_ms\")\n",
|
||||
"\n",
|
||||
" return metadata\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path=\"./example_data/facebook_chat.json\",\n",
|
||||
" jq_schema=\".messages[]\",\n",
|
||||
" content_key=\"content\",\n",
|
||||
" metadata_func=metadata_func,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all JSONLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.json_loader.JSONLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
178
docs/docs/integrations/document_loaders/mathpix.ipynb
Normal file
178
docs/docs/integrations/document_loaders/mathpix.ipynb
Normal file
@@ -0,0 +1,178 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MathPixPDFLoader\n",
|
||||
"\n",
|
||||
"Inspired by Daniel Gross's snippet here: [https://gist.github.com/danielgross/3ab4104e14faccc12b49200843adab21](https://gist.github.com/danielgross/3ab4104e14faccc12b49200843adab21)\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [MathPixPDFLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.MathpixPDFLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| MathPixPDFLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Sign up for Mathpix and [create an API key](https://mathpix.com/docs/ocr/creating-an-api-key) to set the `MATHPIX_API_KEY` variables in your environment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if \"MATHPIX_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"MATHPIX_API_KEY\"] = getpass.getpass(\"Enter your Mathpix API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we are ready to initialize our loader:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import MathpixPDFLoader\n",
|
||||
"\n",
|
||||
"file_path = \"./example_data/layout-parser-paper.pdf\"\n",
|
||||
"loader = MathpixPDFLoader(file_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all MathpixPDFLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.MathpixPDFLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
317
docs/docs/integrations/document_loaders/pdfminer.ipynb
Normal file
317
docs/docs/integrations/document_loaders/pdfminer.ipynb
Normal file
File diff suppressed because one or more lines are too long
183
docs/docs/integrations/document_loaders/pdfplumber.ipynb
Normal file
183
docs/docs/integrations/document_loaders/pdfplumber.ipynb
Normal file
@@ -0,0 +1,183 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PDFPlumber\n",
|
||||
"\n",
|
||||
"Like PyMuPDF, the output Documents contain detailed metadata about the PDF and its pages, and returns one document per page.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PDFPlumberLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PDFPlumberLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PDFPlumberLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed to use this loader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PDFPlumberLoader\n",
|
||||
"\n",
|
||||
"loader = PDFPlumberLoader(\"./example_data/layout-parser-paper.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'file_path': './example_data/layout-parser-paper.pdf', 'page': 0, 'total_pages': 16, 'Author': '', 'CreationDate': 'D:20210622012710Z', 'Creator': 'LaTeX with hyperref', 'Keywords': '', 'ModDate': 'D:20210622012710Z', 'PTEX.Fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'Producer': 'pdfTeX-1.40.21', 'Subject': '', 'Title': '', 'Trapped': 'False'}, page_content='LayoutParser: A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1 ((cid:0)), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1 Allen Institute for AI\\nshannons@allenai.org\\n2 Brown University\\nruochen zhang@brown.edu\\n3 Harvard University\\n{melissadell,jacob carlson}@fas.harvard.edu\\n4 University of Washington\\nbcgl@cs.washington.edu\\n5 University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recentadvancesindocumentimageanalysis(DIA)havebeen\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomescouldbeeasilydeployedinproductionandextendedforfurther\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportantinnovationsbyawideaudience.Thoughtherehavebeenon-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopmentindisciplineslikenaturallanguageprocessingandcomputer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademicresearchacross awiderangeof disciplinesinthesocialsciences\\nand humanities. This paper introduces LayoutParser, an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitiveinterfacesforapplyingandcustomizingDLmodelsforlayoutde-\\ntection,characterrecognition,andmanyotherdocumentprocessingtasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io.\\nKeywords: DocumentImageAnalysis·DeepLearning·LayoutAnalysis\\n· Character Recognition · Open Source library · Toolkit.\\n1 Introduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocumentimageanalysis(DIA)tasksincludingdocumentimageclassification[11,\\n1202\\nnuJ\\n12\\n]VC.sc[\\n2v84351.3012:viXra\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'file_path': './example_data/layout-parser-paper.pdf', 'page': 0, 'total_pages': 16, 'Author': '', 'CreationDate': 'D:20210622012710Z', 'Creator': 'LaTeX with hyperref', 'Keywords': '', 'ModDate': 'D:20210622012710Z', 'PTEX.Fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'Producer': 'pdfTeX-1.40.21', 'Subject': '', 'Title': '', 'Trapped': 'False'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all PDFPlumberLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PDFPlumberLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
185
docs/docs/integrations/document_loaders/pymupdf.ipynb
Normal file
185
docs/docs/integrations/document_loaders/pymupdf.ipynb
Normal file
@@ -0,0 +1,185 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PyMuPDF\n",
|
||||
"\n",
|
||||
"`PyMuPDF` is optimized for speed, and contains detailed metadata about the PDF and its pages. It returns one document per page.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PyMuPDFLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyMuPDFLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PyMuPDFLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed to use the `PyMuPDFLoader`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community** and **pymupdf**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-community pymupdf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can initialize our loader and start loading documents. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PyMuPDFLoader\n",
|
||||
"\n",
|
||||
"loader = PyMuPDFLoader(\"./example_data/layout-parser-paper.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load\n",
|
||||
"\n",
|
||||
"You can pass along any of the options from the [PyMuPDF documentation](https://pymupdf.readthedocs.io/en/latest/app1.html#plain-text/) as keyword arguments in the `load` call, and it will be pass along to the `get_text()` call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'file_path': './example_data/layout-parser-paper.pdf', 'page': 0, 'total_pages': 16, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': 'LaTeX with hyperref', 'producer': 'pdfTeX-1.40.21', 'creationDate': 'D:20210622012710Z', 'modDate': 'D:20210622012710Z', 'trapped': ''}, page_content='LayoutParser: A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1 (\\x00), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1 Allen Institute for AI\\nshannons@allenai.org\\n2 Brown University\\nruochen zhang@brown.edu\\n3 Harvard University\\n{melissadell,jacob carlson}@fas.harvard.edu\\n4 University of Washington\\nbcgl@cs.washington.edu\\n5 University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser, an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io.\\nKeywords: Document Image Analysis · Deep Learning · Layout Analysis\\n· Character Recognition · Open Source library · Toolkit.\\n1\\nIntroduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classification [11,\\narXiv:2103.15348v2 [cs.CV] 21 Jun 2021\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'file_path': './example_data/layout-parser-paper.pdf', 'page': 0, 'total_pages': 16, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': 'LaTeX with hyperref', 'producer': 'pdfTeX-1.40.21', 'creationDate': 'D:20210622012710Z', 'modDate': 'D:20210622012710Z', 'trapped': ''}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all PyMuPDFLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyMuPDFLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
187
docs/docs/integrations/document_loaders/pypdfdirectory.ipynb
Normal file
187
docs/docs/integrations/document_loaders/pypdfdirectory.ipynb
Normal file
@@ -0,0 +1,187 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PyPDFDirectoryLoader\n",
|
||||
"\n",
|
||||
"This loader loads all PDF files from a specific directory.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PyPDFDirectoryLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFDirectoryLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PyPDFDirectoryLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed for this loader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PyPDFDirectoryLoader\n",
|
||||
"\n",
|
||||
"directory_path = (\n",
|
||||
" \"../../docs/integrations/document_loaders/example_data/layout-parser-paper.pdf\"\n",
|
||||
")\n",
|
||||
"loader = PyPDFDirectoryLoader(\"example_data/\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': 'example_data/layout-parser-paper.pdf', 'page': 0}, page_content='LayoutParser : A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1( \\x00), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1Allen Institute for AI\\nshannons@allenai.org\\n2Brown University\\nruochen zhang@brown.edu\\n3Harvard University\\n{melissadell,jacob carlson }@fas.harvard.edu\\n4University of Washington\\nbcgl@cs.washington.edu\\n5University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser , an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io .\\nKeywords: Document Image Analysis ·Deep Learning ·Layout Analysis\\n·Character Recognition ·Open Source library ·Toolkit.\\n1 Introduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classification [ 11,arXiv:2103.15348v2 [cs.CV] 21 Jun 2021')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': 'example_data/layout-parser-paper.pdf', 'page': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all PyPDFDirectoryLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFDirectoryLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
188
docs/docs/integrations/document_loaders/pypdfium2.ipynb
Normal file
188
docs/docs/integrations/document_loaders/pypdfium2.ipynb
Normal file
@@ -0,0 +1,188 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PyPDFium2Loader\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with PyPDFium2 [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFium2Loader.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PyPDFium2Loader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFium2Loader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PyPDFium2Loader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"To access PyPDFium2 document loader you'll need to install the `langchain-community` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PyPDFium2Loader\n",
|
||||
"\n",
|
||||
"file_path = \"./example_data/layout-parser-paper.pdf\"\n",
|
||||
"loader = PyPDFium2Loader(file_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'page': 0}, page_content='LayoutParser: A Unified Toolkit for Deep\\r\\nLearning Based Document Image Analysis\\r\\nZejiang Shen\\r\\n1\\r\\n(), Ruochen Zhang\\r\\n2\\r\\n, Melissa Dell\\r\\n3\\r\\n, Benjamin Charles Germain\\r\\nLee\\r\\n4\\r\\n, Jacob Carlson\\r\\n3\\r\\n, and Weining Li\\r\\n5\\r\\n1 Allen Institute for AI\\r\\nshannons@allenai.org 2 Brown University\\r\\nruochen zhang@brown.edu 3 Harvard University\\r\\n{melissadell,jacob carlson}@fas.harvard.edu\\r\\n4 University of Washington\\r\\nbcgl@cs.washington.edu 5 University of Waterloo\\r\\nw422li@uwaterloo.ca\\r\\nAbstract. Recent advances in document image analysis (DIA) have been\\r\\nprimarily driven by the application of neural networks. Ideally, research\\r\\noutcomes could be easily deployed in production and extended for further\\r\\ninvestigation. However, various factors like loosely organized codebases\\r\\nand sophisticated model configurations complicate the easy reuse of im\\x02portant innovations by a wide audience. Though there have been on-going\\r\\nefforts to improve reusability and simplify deep learning (DL) model\\r\\ndevelopment in disciplines like natural language processing and computer\\r\\nvision, none of them are optimized for challenges in the domain of DIA.\\r\\nThis represents a major gap in the existing toolkit, as DIA is central to\\r\\nacademic research across a wide range of disciplines in the social sciences\\r\\nand humanities. This paper introduces LayoutParser, an open-source\\r\\nlibrary for streamlining the usage of DL in DIA research and applica\\x02tions. The core LayoutParser library comes with a set of simple and\\r\\nintuitive interfaces for applying and customizing DL models for layout de\\x02tection, character recognition, and many other document processing tasks.\\r\\nTo promote extensibility, LayoutParser also incorporates a community\\r\\nplatform for sharing both pre-trained models and full document digiti\\x02zation pipelines. We demonstrate that LayoutParser is helpful for both\\r\\nlightweight and large-scale digitization pipelines in real-word use cases.\\r\\nThe library is publicly available at https://layout-parser.github.io.\\r\\nKeywords: Document Image Analysis· Deep Learning· Layout Analysis\\r\\n· Character Recognition· Open Source library· Toolkit.\\r\\n1 Introduction\\r\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\r\\ndocument image analysis (DIA) tasks including document image classification [11,\\r\\narXiv:2103.15348v2 [cs.CV] 21 Jun 2021\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'page': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all PyPDFium2Loader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFium2Loader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -6,9 +6,35 @@
|
||||
"source": [
|
||||
"# Sitemap\n",
|
||||
"\n",
|
||||
"Extends from the `WebBaseLoader`, `SitemapLoader` loads a sitemap from a given URL, and then scrape and load all pages in the sitemap, returning each page as a Document.\n",
|
||||
"Extends from the `WebBaseLoader`, `SitemapLoader` loads a sitemap from a given URL, and then scrapes and loads all pages in the sitemap, returning each page as a Document.\n",
|
||||
"\n",
|
||||
"The scraping is done concurrently. There are reasonable limits to concurrent requests, defaulting to 2 per second. If you aren't concerned about being a good citizen, or you control the scrapped server, or don't care about load. Note, while this will speed up the scraping process, but it may cause the server to block you. Be careful!"
|
||||
"The scraping is done concurrently. There are reasonable limits to concurrent requests, defaulting to 2 per second. If you aren't concerned about being a good citizen, or you control the scrapped server, or don't care about load you can increase this limit. Note, while this will speed up the scraping process, it may cause the server to block you. Be careful!\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/web_loaders/sitemap/)|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [SiteMapLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.sitemap.SitemapLoader.html#langchain_community.document_loaders.sitemap.SitemapLoader) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ✅ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| SiteMapLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access SiteMap document loader you'll need to install the `langchain-community` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed to run this."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -17,21 +43,55 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet nest_asyncio"
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Fix notebook asyncio bug"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# fixes a bug with asyncio and jupyter\n",
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -43,13 +103,63 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sitemap_loader = SitemapLoader(web_path=\"https://api.python.langchain.com/sitemap.xml\")\n",
|
||||
"\n",
|
||||
"docs = sitemap_loader.load()"
|
||||
"sitemap_loader = SitemapLoader(web_path=\"https://api.python.langchain.com/sitemap.xml\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Fetching pages: 100%|##########| 28/28 [00:04<00:00, 6.83it/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': 'https://api.python.langchain.com/en/stable/', 'loc': 'https://api.python.langchain.com/en/stable/', 'lastmod': '2024-05-15T00:29:42.163001+00:00', 'changefreq': 'weekly', 'priority': '1'}, page_content='\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nLangChain Python API Reference Documentation.\\n\\n\\nYou will be automatically redirected to the new location of this page.\\n\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = sitemap_loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': 'https://api.python.langchain.com/en/stable/', 'loc': 'https://api.python.langchain.com/en/stable/', 'lastmod': '2024-05-15T00:29:42.163001+00:00', 'changefreq': 'weekly', 'priority': '1'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -71,24 +181,37 @@
|
||||
"sitemap_loader.requests_kwargs = {\"verify\": False}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load\n",
|
||||
"\n",
|
||||
"You can also load the pages lazily in order to minimize the memory load."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nLangChain Python API Reference Documentation.\\n\\n\\nYou will be automatically redirected to the new location of this page.\\n\\n', metadata={'source': 'https://api.python.langchain.com/en/stable/', 'loc': 'https://api.python.langchain.com/en/stable/', 'lastmod': '2024-02-09T01:10:49.422114+00:00', 'changefreq': 'weekly', 'priority': '1'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Fetching pages: 100%|##########| 28/28 [00:01<00:00, 19.06it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0]"
|
||||
"page = []\n",
|
||||
"for doc in sitemap_loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -224,11 +347,13 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all SiteMapLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.sitemap.SitemapLoader.html#langchain_community.document_loaders.sitemap.SitemapLoader"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -247,7 +372,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -0,0 +1,269 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# UnstructuredMarkdownLoader\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with UnstructuredMarkdown [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.markdown.UnstructuredMarkdownLoader.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/file_loaders/unstructured/)|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [UnstructuredMarkdownLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.markdown.UnstructuredMarkdownLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ❌ | ❌ | ✅ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| UnstructuredMarkdownLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access UnstructuredMarkdownLoader document loader you'll need to install the `langchain-community` integration package and the `unstructured` python package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed to use this loader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community** and **unstructured**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community unstructured"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents. \n",
|
||||
"\n",
|
||||
"You can run the loader in one of two modes: \"single\" and \"elements\". If you use \"single\" mode, the document will be returned as a single `Document` object. If you use \"elements\" mode, the unstructured library will split the document into elements such as `Title` and `NarrativeText`. You can pass in additional `unstructured` kwargs after mode to apply different `unstructured` settings."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import UnstructuredMarkdownLoader\n",
|
||||
"\n",
|
||||
"loader = UnstructuredMarkdownLoader(\n",
|
||||
" \"./example_data/example.md\",\n",
|
||||
" mode=\"single\",\n",
|
||||
" strategy=\"fast\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/example.md'}, page_content='Sample Markdown Document\\n\\nIntroduction\\n\\nWelcome to this sample Markdown document. Markdown is a lightweight markup language used for formatting text. It\\'s widely used for documentation, readme files, and more.\\n\\nFeatures\\n\\nHeaders\\n\\nMarkdown supports multiple levels of headers:\\n\\nHeader 1: # Header 1\\n\\nHeader 2: ## Header 2\\n\\nHeader 3: ### Header 3\\n\\nLists\\n\\nUnordered List\\n\\nItem 1\\n\\nItem 2\\n\\nSubitem 2.1\\n\\nSubitem 2.2\\n\\nOrdered List\\n\\nFirst item\\n\\nSecond item\\n\\nThird item\\n\\nLinks\\n\\nOpenAI is an AI research organization.\\n\\nImages\\n\\nHere\\'s an example image:\\n\\nCode\\n\\nInline Code\\n\\nUse code for inline code snippets.\\n\\nCode Block\\n\\n```python def greet(name): return f\"Hello, {name}!\"\\n\\nprint(greet(\"World\")) ```')"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/example.md'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/example.md', 'link_texts': ['OpenAI'], 'link_urls': ['https://www.openai.com'], 'last_modified': '2024-08-14T15:04:18', 'languages': ['eng'], 'parent_id': 'de1f74bf226224377ab4d8b54f215bb9', 'filetype': 'text/markdown', 'file_directory': './example_data', 'filename': 'example.md', 'category': 'NarrativeText', 'element_id': '898a542a261f7dc65e0072d1e847d535'}, page_content='OpenAI is an AI research organization.')"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []\n",
|
||||
"page[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load Elements\n",
|
||||
"\n",
|
||||
"In this example we will load in the `elements` mode, which will return a list of the different elements in the markdown document:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"29"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import UnstructuredMarkdownLoader\n",
|
||||
"\n",
|
||||
"loader = UnstructuredMarkdownLoader(\n",
|
||||
" \"./example_data/example.md\",\n",
|
||||
" mode=\"elements\",\n",
|
||||
" strategy=\"fast\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As you see there are 29 elements that were pulled from the `example.md` file. The first element is the title of the document as expected:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Sample Markdown Document'"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all UnstructuredMarkdownLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.markdown.UnstructuredMarkdownLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -15,45 +15,47 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "427d5745",
|
||||
"metadata": {},
|
||||
"source": "from langchain_community.document_loaders import YoutubeLoader",
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import YoutubeLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "34a25b57",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet youtube-transcript-api"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bc8b308a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = YoutubeLoader.from_youtube_url(\n",
|
||||
" \"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=False\n",
|
||||
")"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d073dd36",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
@@ -66,26 +68,26 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ba28af69",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet pytube"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9b8ea390",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = YoutubeLoader.from_youtube_url(\n",
|
||||
" \"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=True\n",
|
||||
")\n",
|
||||
"loader.load()"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
@@ -102,8 +104,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "08510625",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = YoutubeLoader.from_youtube_url(\n",
|
||||
" \"https://www.youtube.com/watch?v=QsYGlZkevEg\",\n",
|
||||
@@ -112,13 +116,12 @@
|
||||
" translation=\"en\",\n",
|
||||
")\n",
|
||||
"loader.load()"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "69f4e399a9764d73",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Get transcripts as timestamped chunks\n",
|
||||
"\n",
|
||||
@@ -127,12 +130,14 @@
|
||||
"`transcript_format` param: One of the `langchain_community.document_loaders.youtube.TranscriptFormat` values. In this case, `TranscriptFormat.CHUNKS`.\n",
|
||||
"\n",
|
||||
"`chunk_size_seconds` param: An integer number of video seconds to be represented by each chunk of transcript data. Default is 120 seconds."
|
||||
],
|
||||
"id": "69f4e399a9764d73"
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "540bbf19182f38bc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders.youtube import TranscriptFormat\n",
|
||||
"\n",
|
||||
@@ -143,10 +148,7 @@
|
||||
" chunk_size_seconds=30,\n",
|
||||
")\n",
|
||||
"print(\"\\n\\n\".join(map(repr, loader.load())))"
|
||||
],
|
||||
"id": "540bbf19182f38bc",
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
@@ -172,8 +174,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c345bc43",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Init the GoogleApiClient\n",
|
||||
"from pathlib import Path\n",
|
||||
@@ -198,9 +202,7 @@
|
||||
"\n",
|
||||
"# returns a list of Documents\n",
|
||||
"youtube_loader_channel.load()"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -331,8 +331,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.embeddings import OpenVINOEmbeddings\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.embeddings import OpenVINOEmbeddings\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
|
||||
@@ -245,8 +245,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import boto3\n",
|
||||
"from langchain.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain\n",
|
||||
"from langchain_aws import ChatBedrock\n",
|
||||
"from langchain_community.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain\n",
|
||||
"from langchain_community.graphs import NeptuneRdfGraph\n",
|
||||
"\n",
|
||||
"host = \"<your host>\"\n",
|
||||
|
||||
@@ -65,7 +65,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"from langchain.chains.graph_qa.gremlin import GremlinQAChain\n",
|
||||
"from langchain_community.chains.graph_qa.gremlin import GremlinQAChain\n",
|
||||
"from langchain_community.graphs import GremlinGraph\n",
|
||||
"from langchain_community.graphs.graph_document import GraphDocument, Node, Relationship\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
|
||||
@@ -49,7 +49,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.indexes import GraphIndexCreator\n",
|
||||
"from langchain_community.graphs.index_creator import GraphIndexCreator\n",
|
||||
"from langchain_openai import OpenAI"
|
||||
]
|
||||
},
|
||||
@@ -252,7 +252,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.indexes.graph import NetworkxEntityGraph"
|
||||
"from langchain_community.graphs import NetworkxEntityGraph"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -6,6 +6,14 @@ keywords: [compatibility]
|
||||
|
||||
# LLMs
|
||||
|
||||
:::caution
|
||||
You are currently on a page documenting the use of [text completion models](/docs/concepts/#llms). Many of the latest and most popular models are [chat completion models](/docs/concepts/#chat-models).
|
||||
|
||||
Unless you are specifically using more advanced prompting techniques, you are probably looking for [this page instead](/docs/integrations/chat/).
|
||||
:::
|
||||
|
||||
[LLMs](docs/concepts/#llms) are language models that take a string as input and return a string as output.
|
||||
|
||||
:::info
|
||||
|
||||
If you'd like to write your own LLM, see [this how-to](/docs/how_to/custom_llm/).
|
||||
@@ -13,14 +21,6 @@ If you'd like to contribute an integration, see [Contributing integrations](/doc
|
||||
|
||||
:::
|
||||
|
||||
## Features (natively supported)
|
||||
All LLMs implement the Runnable interface, which comes with default implementations of all methods, ie. `ainvoke`, `batch`, `abatch`, `stream`, `astream`. This gives all LLMs basic support for async, streaming and batch, which by default is implemented as below:
|
||||
- *Async* support defaults to calling the respective sync method in asyncio's default thread pool executor. This lets other async functions in your application make progress while the LLM is being executed, by moving this call to a background thread.
|
||||
- *Streaming* support defaults to returning an `Iterator` (or `AsyncIterator` in the case of async streaming) of a single value, the final result returned by the underlying LLM provider. This obviously doesn't give you token-by-token streaming, which requires native support from the LLM provider, but ensures your code that expects an iterator of tokens can work for any of our LLM integrations.
|
||||
- *Batch* support defaults to calling the underlying LLM in parallel for each input by making use of a thread pool executor (in the sync batch case) or `asyncio.gather` (in the async batch case). The concurrency can be controlled with the `max_concurrency` key in `RunnableConfig`.
|
||||
|
||||
Each LLM integration can optionally provide native implementations for async, streaming or batch, which, for providers that support it, can be more efficient. The table shows, for each integration, which features have been implemented with native support.
|
||||
|
||||
import { CategoryTable, IndexTable } from "@theme/FeatureTables";
|
||||
|
||||
<CategoryTable category="llms" />
|
||||
|
||||
@@ -74,7 +74,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.llms import Konko\n",
|
||||
"from langchain_community.llms import Konko\n",
|
||||
"\n",
|
||||
"llm = Konko(model=\"mistralai/mistral-7b-v0.1\", temperature=0.1, max_tokens=128)\n",
|
||||
"\n",
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory.motorhead_memory import MotorheadMemory"
|
||||
"from langchain_community.memory.motorhead_memory import MotorheadMemory"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"from langchain.agents import AgentType, initialize_agent\n",
|
||||
"from langchain.memory import ZepMemory\n",
|
||||
"from langchain_community.memory.zep_memory import ZepMemory\n",
|
||||
"from langchain_community.retrievers import ZepRetriever\n",
|
||||
"from langchain_community.utilities import WikipediaAPIWrapper\n",
|
||||
"from langchain_core.messages import AIMessage, HumanMessage\n",
|
||||
|
||||
@@ -69,11 +69,12 @@
|
||||
"source": [
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"from langchain.agents import AgentType, Tool, initialize_agent\n",
|
||||
"from langchain.agents import AgentType, initialize_agent\n",
|
||||
"from langchain_community.memory.zep_cloud_memory import ZepCloudMemory\n",
|
||||
"from langchain_community.retrievers import ZepCloudRetriever\n",
|
||||
"from langchain_community.utilities import WikipediaAPIWrapper\n",
|
||||
"from langchain_core.messages import AIMessage, HumanMessage\n",
|
||||
"from langchain_core.tools import Tool\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"session_id = str(uuid4()) # This is a unique identifier for the session"
|
||||
|
||||
@@ -308,7 +308,7 @@ See a [usage example](/docs/integrations/graphs/amazon_neptune_open_cypher).
|
||||
```python
|
||||
from langchain_community.graphs import NeptuneGraph
|
||||
from langchain_community.graphs import NeptuneAnalyticsGraph
|
||||
from langchain.chains import NeptuneOpenCypherQAChain
|
||||
from langchain_community.chains.graph_qa.neptune_cypher import NeptuneOpenCypherQAChain
|
||||
```
|
||||
|
||||
### Amazon Neptune with SPARQL
|
||||
@@ -317,7 +317,7 @@ See a [usage example](/docs/integrations/graphs/amazon_neptune_sparql).
|
||||
|
||||
```python
|
||||
from langchain_community.graphs import NeptuneRdfGraph
|
||||
from langchain.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain
|
||||
from langchain_community.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain
|
||||
```
|
||||
|
||||
|
||||
|
||||
@@ -122,5 +122,5 @@ pip install transformers huggingface_hub
|
||||
See a [usage example](/docs/integrations/tools/huggingface_tools).
|
||||
|
||||
```python
|
||||
from langchain.agents import load_huggingface_tool
|
||||
from langchain_community.agent_toolkits.load_tools import load_huggingface_tool
|
||||
```
|
||||
|
||||
@@ -18,6 +18,5 @@ There are two document loaders available for GitHub.
|
||||
See a [usage example](/docs/integrations/document_loaders/github).
|
||||
|
||||
```python
|
||||
from langchain_community.document_loaders import GitHubIssuesLoader
|
||||
from langchain.document_loaders import GithubFileLoader
|
||||
from langchain_community.document_loaders import GitHubIssuesLoader, GithubFileLoader
|
||||
```
|
||||
|
||||
@@ -41,7 +41,7 @@ See a usage [example](/docs/integrations/llms/konko).
|
||||
- **Completion with mistralai/Mistral-7B-v0.1:**
|
||||
|
||||
```python
|
||||
from langchain.llms import Konko
|
||||
from langchain_community.llms import Konko
|
||||
llm = Konko(max_tokens=800, model='mistralai/Mistral-7B-v0.1')
|
||||
prompt = "Generate a Product Description for Apple Iphone 15"
|
||||
response = llm.invoke(prompt)
|
||||
|
||||
@@ -167,7 +167,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema.output_parser import StrOutputParser\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
|
||||
@@ -12,5 +12,5 @@ See instructions at [Motörhead](https://github.com/getmetal/motorhead) for runn
|
||||
See a [usage example](/docs/integrations/memory/motorhead_memory).
|
||||
|
||||
```python
|
||||
from langchain.memory import MotorheadMemory
|
||||
from langchain_community.memory import MotorheadMemory
|
||||
```
|
||||
|
||||
@@ -108,7 +108,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
||||
@@ -86,7 +86,7 @@ See a [Perpetual Memory Example here](/docs/integrations/memory/zep_cloud_chat_m
|
||||
|
||||
You can use `ZepCloudMemory` together with agents that support Memory.
|
||||
```python
|
||||
from langchain.memory import ZepCloudMemory
|
||||
from langchain_community.memory import ZepCloudMemory
|
||||
```
|
||||
|
||||
See a [Memory RAG Example here](/docs/integrations/memory/zep_memory_cloud).
|
||||
@@ -117,4 +117,4 @@ MMR search is useful for ensuring that the retrieved documents are diverse and n
|
||||
from langchain_community.vectorstores import ZepCloudVectorStore
|
||||
```
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/zep_cloud).
|
||||
See a [usage example](/docs/integrations/vectorstores/zep_cloud).
|
||||
|
||||
@@ -76,7 +76,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.retrievers import DriaRetriever\n",
|
||||
"from langchain_community.retrievers import DriaRetriever\n",
|
||||
"\n",
|
||||
"api_key = os.getenv(\"DRIA_API_KEY\")\n",
|
||||
"retriever = DriaRetriever(api_key=api_key)"
|
||||
|
||||
@@ -379,4 +379,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
}
|
||||
|
||||
@@ -85,13 +85,14 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"os.environ[\"VECTARA_API_KEY\"] = \"<YOUR_VECTARA_API_KEY>\"\n",
|
||||
"os.environ[\"VECTARA_CORPUS_ID\"] = \"<YOUR_VECTARA_CORPUS_ID>\"\n",
|
||||
"os.environ[\"VECTARA_CUSTOMER_ID\"] = \"<YOUR_VECTARA_CUSTOMER_ID>\"\n",
|
||||
"\n",
|
||||
"from langchain.chains.query_constructor.base import AttributeInfo\n",
|
||||
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain_community.vectorstores import Vectara\n",
|
||||
"from langchain_openai.chat_models import ChatOpenAI"
|
||||
]
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.retrievers import NeuralDBRetriever\n",
|
||||
"from langchain_community.retrievers import NeuralDBRetriever\n",
|
||||
"\n",
|
||||
"# From scratch\n",
|
||||
"retriever = NeuralDBRetriever.from_scratch(thirdai_key=\"your-thirdai-key\")\n",
|
||||
|
||||
@@ -60,7 +60,7 @@
|
||||
"import time\n",
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"from langchain.memory import ZepMemory\n",
|
||||
"from langchain_community.memory.zep_memory import ZepMemory\n",
|
||||
"from langchain_core.messages import AIMessage, HumanMessage\n",
|
||||
"\n",
|
||||
"# Set this to your Zep server URL\n",
|
||||
|
||||
@@ -219,4 +219,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,10 +43,12 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": "Note that you need to pass `query_instruction=\"\"` for `model_name=\"BAAI/bge-m3\"` see [FAQ BGE M3](https://huggingface.co/BAAI/bge-m3#faq). ",
|
||||
"id": "f35d54e529c4cb77"
|
||||
"id": "f35d54e529c4cb77",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note that you need to pass `query_instruction=\"\"` for `model_name=\"BAAI/bge-m3\"` see [FAQ BGE M3](https://huggingface.co/BAAI/bge-m3#faq). "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
|
||||
@@ -7,6 +7,10 @@ sidebar_class_name: hidden
|
||||
|
||||
import { CategoryTable, IndexTable } from "@theme/FeatureTables";
|
||||
|
||||
[Embedding models](/docs/concepts#embedding-models) create a vector representation of a piece of text.
|
||||
|
||||
This page documents integrations with various model providers that allow you to use embeddings in LangChain.
|
||||
|
||||
<CategoryTable category="text_embedding" />
|
||||
|
||||
## All embedding models
|
||||
|
||||
@@ -2,40 +2,45 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f4b5d823fee826c2",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"# Pinecone Embeddings\n",
|
||||
"\n",
|
||||
"Pinecone's inference API can be accessed via `PineconeEmbeddings`. Providing text embeddings via the Pinecone service. We start by installing prerequisite libraries:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "f4b5d823fee826c2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3bc5d3a5ed7f5ce3",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install -qU \"langchain-pinecone>=0.2.0\" "
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "3bc5d3a5ed7f5ce3",
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Next, we [sign up / log in to Pinecone](https://app.pinecone.io) to get our API key:"
|
||||
],
|
||||
"id": "62a77d25c3fd8bd5",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "62a77d25c3fd8bd5"
|
||||
"source": [
|
||||
"Next, we [sign up / log in to Pinecone](https://app.pinecone.io) to get our API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8162dbcbcf7d3d55",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
@@ -44,49 +49,49 @@
|
||||
"os.environ[\"PINECONE_API_KEY\"] = os.getenv(\"PINECONE_API_KEY\") or getpass(\n",
|
||||
" \"Enter your Pinecone API key: \"\n",
|
||||
")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "8162dbcbcf7d3d55",
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Check the document for available [models](https://docs.pinecone.io/models/overview). Now we initialize our embedding model like so:"
|
||||
],
|
||||
"id": "98d860a0a2d8b907",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "98d860a0a2d8b907"
|
||||
"source": [
|
||||
"Check the document for available [models](https://docs.pinecone.io/models/overview). Now we initialize our embedding model like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2b3adb72786a5275",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_pinecone import PineconeEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = PineconeEmbeddings(model=\"multilingual-e5-large\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "2b3adb72786a5275",
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"From here we can create embeddings either sync or async, let's start with sync! We embed a single text as a query embedding (ie what we search with in RAG) using `embed_query`:"
|
||||
],
|
||||
"id": "11e24da855517230",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "11e24da855517230"
|
||||
"source": [
|
||||
"From here we can create embeddings either sync or async, let's start with sync! We embed a single text as a query embedding (ie what we search with in RAG) using `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2da515e2a61ef7e9",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = [\n",
|
||||
@@ -96,39 +101,34 @@
|
||||
" \"Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.\",\n",
|
||||
" \"An apple a day keeps the doctor away, as the saying goes.\",\n",
|
||||
"]"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "2da515e2a61ef7e9",
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2897e0d570c90b2f",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"doc_embeds = embeddings.embed_documents(docs)\n",
|
||||
"doc_embeds"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "2897e0d570c90b2f",
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "510784963c0e17a",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"Tell me about the tech company known as Apple\"\n",
|
||||
"query_embed = embeddings.embed_query(query)\n",
|
||||
"query_embed"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "510784963c0e17a",
|
||||
"execution_count": null
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9f9afdec",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -21,6 +22,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0c9e4bc1",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -35,6 +37,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0938e69e",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -45,6 +48,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a362df50",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -114,6 +118,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6cd0b36e",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -126,6 +131,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b68b7aae",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -167,6 +173,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "687e6067",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -189,6 +196,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4572f574",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -208,6 +216,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1df02a16",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -218,6 +227,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bb1b72ca",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -242,6 +252,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d74ba1d3",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -252,6 +263,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d7f4d8db",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -262,6 +274,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d72ed402",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -273,6 +286,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "056204cc",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
|
||||
@@ -49,7 +49,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.agents import load_huggingface_tool\n",
|
||||
"from langchain_community.agent_toolkits.load_tools import load_huggingface_tool\n",
|
||||
"\n",
|
||||
"tool = load_huggingface_tool(\"lysandre/hf-model-downloads\")\n",
|
||||
"\n",
|
||||
|
||||
@@ -169,7 +169,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import spotipy.util as util\n",
|
||||
"from langchain.requests import RequestsWrapper\n",
|
||||
"from langchain_community.utilities.requests import RequestsWrapper\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def construct_spotify_auth_headers(raw_spec: dict):\n",
|
||||
|
||||
@@ -138,7 +138,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter()\n",
|
||||
"documents = text_splitter.split_documents(docs)"
|
||||
|
||||
@@ -355,7 +355,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b63c73c7e905001c",
|
||||
"id": "50bb4346",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
|
||||
@@ -7,6 +7,8 @@ sidebar_class_name: hidden
|
||||
|
||||
import { CategoryTable, IndexTable } from "@theme/FeatureTables";
|
||||
|
||||
A [vector store](/docs/concepts/#vector-stores) stores [embedded](/docs/concepts/#embedding-models) data and performs similarity search.
|
||||
|
||||
<CategoryTable category="vectorstores" />
|
||||
|
||||
## All Vectorstores
|
||||
|
||||
@@ -119,8 +119,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import HuggingFaceEmbeddings\n",
|
||||
"from langchain_core.embeddings import Embeddings\n",
|
||||
"from langchain_huggingface import HuggingFaceEmbeddings\n",
|
||||
"\n",
|
||||
"model_name = \"sentence-transformers/all-MiniLM-L12-v2\"\n",
|
||||
"hf = HuggingFaceEmbeddings(model_name=model_name)"
|
||||
|
||||
@@ -174,9 +174,9 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.embeddings import GPT4AllEmbeddings\n",
|
||||
"from langchain_community.vectorstores import ManticoreSearch, ManticoreSearchSettings"
|
||||
"from langchain_community.vectorstores import ManticoreSearch, ManticoreSearchSettings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -87,8 +87,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import WebBaseLoader\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n",
|
||||
"data = loader.load()\n",
|
||||
|
||||
332
docs/docs/versions/migrating_chains/constitutional_chain.ipynb
Normal file
332
docs/docs/versions/migrating_chains/constitutional_chain.ipynb
Normal file
@@ -0,0 +1,332 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b57124cc-60a0-4c18-b7ce-3e483d1024a2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"title: Migrating from ConstitutionalChain\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ce8457ed-c0b1-4a74-abbd-9d3d2211270f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[ConstitutionalChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.constitutional_ai.base.ConstitutionalChain.html) allowed for a LLM to critique and revise generations based on [principles](https://api.python.langchain.com/en/latest/chains/langchain.chains.constitutional_ai.models.ConstitutionalPrinciple.html), structured as combinations of critique and revision requests. For example, a principle might include a request to identify harmful content, and a request to rewrite the content.\n",
|
||||
"\n",
|
||||
"In `ConstitutionalChain`, this structure of critique requests and associated revisions was formatted into a LLM prompt and parsed out of string responses. This is more naturally achieved via [structured output](/docs/how_to/structured_output/) features of chat models. We can construct a simple chain in [LangGraph](https://langchain-ai.github.io/langgraph/) for this purpose. Some advantages of this approach include:\n",
|
||||
"\n",
|
||||
"- Leverage tool-calling capabilities of chat models that have been fine-tuned for this purpose;\n",
|
||||
"- Reduce parsing errors from extracting expression from a string LLM response;\n",
|
||||
"- Delegation of instructions to [message roles](/docs/concepts/#messages) (e.g., chat models can understand what a `ToolMessage` represents without the need for additional prompting);\n",
|
||||
"- Support for streaming, both of individual tokens and chain steps."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b99b47ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "717c8673",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3621b62-a037-42b8-8faa-59575608bb8b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Legacy\n",
|
||||
"\n",
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "f91c9809-8ee7-4e38-881d-0ace4f6ea883",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import ConstitutionalChain, LLMChain\n",
|
||||
"from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple\n",
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"llm = OpenAI()\n",
|
||||
"\n",
|
||||
"qa_prompt = PromptTemplate(\n",
|
||||
" template=\"Q: {question} A:\",\n",
|
||||
" input_variables=[\"question\"],\n",
|
||||
")\n",
|
||||
"qa_chain = LLMChain(llm=llm, prompt=qa_prompt)\n",
|
||||
"\n",
|
||||
"constitutional_chain = ConstitutionalChain.from_llm(\n",
|
||||
" llm=llm,\n",
|
||||
" chain=qa_chain,\n",
|
||||
" constitutional_principles=[\n",
|
||||
" ConstitutionalPrinciple(\n",
|
||||
" critique_request=\"Tell if this answer is good.\",\n",
|
||||
" revision_request=\"Give a better answer.\",\n",
|
||||
" )\n",
|
||||
" ],\n",
|
||||
" return_intermediate_steps=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"result = constitutional_chain.invoke(\"What is the meaning of life?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "fa3d11a1-ac1f-4a9a-9ab3-b7b244daa506",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'question': 'What is the meaning of life?',\n",
|
||||
" 'output': 'The meaning of life is a deeply personal and ever-evolving concept. It is a journey of self-discovery and growth, and can be different for each individual. Some may find meaning in relationships, others in achieving their goals, and some may never find a concrete answer. Ultimately, the meaning of life is what we make of it.',\n",
|
||||
" 'initial_output': ' The meaning of life is a subjective concept that can vary from person to person. Some may believe that the purpose of life is to find happiness and fulfillment, while others may see it as a journey of self-discovery and personal growth. Ultimately, the meaning of life is something that each individual must determine for themselves.',\n",
|
||||
" 'critiques_and_revisions': [('This answer is good in that it recognizes and acknowledges the subjective nature of the question and provides a valid and thoughtful response. However, it could have also mentioned that the meaning of life is a complex and deeply personal concept that can also change and evolve over time for each individual. Critique Needed.',\n",
|
||||
" 'The meaning of life is a deeply personal and ever-evolving concept. It is a journey of self-discovery and growth, and can be different for each individual. Some may find meaning in relationships, others in achieving their goals, and some may never find a concrete answer. Ultimately, the meaning of life is what we make of it.')]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "374ae108-f1a0-4723-9237-5259c8123c04",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Above, we've returned intermediate steps showing:\n",
|
||||
"\n",
|
||||
"- The original question;\n",
|
||||
"- The initial output;\n",
|
||||
"- Critiques and revisions;\n",
|
||||
"- The final output (matching a revision)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cdc3b527-c09e-4c77-9711-c3cc4506cd95",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"## LangGraph\n",
|
||||
"\n",
|
||||
"<details open>\n",
|
||||
"\n",
|
||||
"Below, we use the [.with_structured_output](/docs/how_to/structured_output/) method to simultaneously generate (1) a judgment of whether a critique is needed, and (2) the critique. We surface all prompts involved for clarity and ease of customizability.\n",
|
||||
"\n",
|
||||
"Note that we are also able to stream intermediate steps with this implementation, so we can monitor and if needed intervene during its execution."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "917fdb73-2411-4fcc-9add-c32dc5c745da",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import List, Optional, Tuple\n",
|
||||
"\n",
|
||||
"from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple\n",
|
||||
"from langchain.chains.constitutional_ai.prompts import (\n",
|
||||
" CRITIQUE_PROMPT,\n",
|
||||
" REVISION_PROMPT,\n",
|
||||
")\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"from langgraph.graph import END, START, StateGraph\n",
|
||||
"from typing_extensions import Annotated, TypedDict\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-4o-mini\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Critique(TypedDict):\n",
|
||||
" \"\"\"Generate a critique, if needed.\"\"\"\n",
|
||||
"\n",
|
||||
" critique_needed: Annotated[bool, ..., \"Whether or not a critique is needed.\"]\n",
|
||||
" critique: Annotated[str, ..., \"If needed, the critique.\"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"critique_prompt = ChatPromptTemplate.from_template(\n",
|
||||
" \"Critique this response according to the critique request. \"\n",
|
||||
" \"If no critique is needed, specify that.\\n\\n\"\n",
|
||||
" \"Query: {query}\\n\\n\"\n",
|
||||
" \"Response: {response}\\n\\n\"\n",
|
||||
" \"Critique request: {critique_request}\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"revision_prompt = ChatPromptTemplate.from_template(\n",
|
||||
" \"Revise this response according to the critique and reivsion request.\\n\\n\"\n",
|
||||
" \"Query: {query}\\n\\n\"\n",
|
||||
" \"Response: {response}\\n\\n\"\n",
|
||||
" \"Critique request: {critique_request}\\n\\n\"\n",
|
||||
" \"Critique: {critique}\\n\\n\"\n",
|
||||
" \"If the critique does not identify anything worth changing, ignore the \"\n",
|
||||
" \"revision request and return 'No revisions needed'. If the critique \"\n",
|
||||
" \"does identify something worth changing, revise the response based on \"\n",
|
||||
" \"the revision request.\\n\\n\"\n",
|
||||
" \"Revision Request: {revision_request}\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = llm | StrOutputParser()\n",
|
||||
"critique_chain = critique_prompt | llm.with_structured_output(Critique)\n",
|
||||
"revision_chain = revision_prompt | llm | StrOutputParser()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class State(TypedDict):\n",
|
||||
" query: str\n",
|
||||
" constitutional_principles: List[ConstitutionalPrinciple]\n",
|
||||
" initial_response: str\n",
|
||||
" critiques_and_revisions: List[Tuple[str, str]]\n",
|
||||
" response: str\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def generate_response(state: State):\n",
|
||||
" \"\"\"Generate initial response.\"\"\"\n",
|
||||
" response = await chain.ainvoke(state[\"query\"])\n",
|
||||
" return {\"response\": response, \"initial_response\": response}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def critique_and_revise(state: State):\n",
|
||||
" \"\"\"Critique and revise response according to principles.\"\"\"\n",
|
||||
" critiques_and_revisions = []\n",
|
||||
" response = state[\"initial_response\"]\n",
|
||||
" for principle in state[\"constitutional_principles\"]:\n",
|
||||
" critique = await critique_chain.ainvoke(\n",
|
||||
" {\n",
|
||||
" \"query\": state[\"query\"],\n",
|
||||
" \"response\": response,\n",
|
||||
" \"critique_request\": principle.critique_request,\n",
|
||||
" }\n",
|
||||
" )\n",
|
||||
" if critique[\"critique_needed\"]:\n",
|
||||
" revision = await revision_chain.ainvoke(\n",
|
||||
" {\n",
|
||||
" \"query\": state[\"query\"],\n",
|
||||
" \"response\": response,\n",
|
||||
" \"critique_request\": principle.critique_request,\n",
|
||||
" \"critique\": critique[\"critique\"],\n",
|
||||
" \"revision_request\": principle.revision_request,\n",
|
||||
" }\n",
|
||||
" )\n",
|
||||
" response = revision\n",
|
||||
" critiques_and_revisions.append((critique[\"critique\"], revision))\n",
|
||||
" else:\n",
|
||||
" critiques_and_revisions.append((critique[\"critique\"], \"\"))\n",
|
||||
" return {\n",
|
||||
" \"critiques_and_revisions\": critiques_and_revisions,\n",
|
||||
" \"response\": response,\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"graph = StateGraph(State)\n",
|
||||
"graph.add_node(\"generate_response\", generate_response)\n",
|
||||
"graph.add_node(\"critique_and_revise\", critique_and_revise)\n",
|
||||
"\n",
|
||||
"graph.add_edge(START, \"generate_response\")\n",
|
||||
"graph.add_edge(\"generate_response\", \"critique_and_revise\")\n",
|
||||
"graph.add_edge(\"critique_and_revise\", END)\n",
|
||||
"app = graph.compile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "01aac88d-464e-431f-b92e-746dcb743e1b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{}\n",
|
||||
"{'initial_response': 'Finding purpose, connection, and joy in our experiences and relationships.', 'response': 'Finding purpose, connection, and joy in our experiences and relationships.'}\n",
|
||||
"{'initial_response': 'Finding purpose, connection, and joy in our experiences and relationships.', 'critiques_and_revisions': [(\"The response exceeds the 10-word limit, providing a more elaborate answer than requested. A concise response, such as 'To seek purpose and joy in life,' would better align with the query.\", 'To seek purpose and joy in life.')], 'response': 'To seek purpose and joy in life.'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"constitutional_principles = [\n",
|
||||
" ConstitutionalPrinciple(\n",
|
||||
" critique_request=\"Tell if this answer is good.\",\n",
|
||||
" revision_request=\"Give a better answer.\",\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"query = \"What is the meaning of life? Answer in 10 words or fewer.\"\n",
|
||||
"\n",
|
||||
"async for step in app.astream(\n",
|
||||
" {\"query\": query, \"constitutional_principles\": constitutional_principles},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
"):\n",
|
||||
" subset = [\"initial_response\", \"critiques_and_revisions\", \"response\"]\n",
|
||||
" print({k: v for k, v in step.items() if k in subset})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b2717810",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"See guides for generating structured output [here](/docs/how_to/structured_output/).\n",
|
||||
"\n",
|
||||
"Check out the [LangGraph documentation](https://langchain-ai.github.io/langgraph/) for detail on building with LangGraph."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -58,11 +58,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load docs\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import WebBaseLoader\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_openai.chat_models import ChatOpenAI\n",
|
||||
"from langchain_openai.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n",
|
||||
"data = loader.load()\n",
|
||||
|
||||
@@ -45,5 +45,7 @@ The below pages assist with migration from various specific chains to LCEL and L
|
||||
- [RefineDocumentsChain](/docs/versions/migrating_chains/refine_docs_chain)
|
||||
- [LLMRouterChain](/docs/versions/migrating_chains/llm_router_chain)
|
||||
- [MultiPromptChain](/docs/versions/migrating_chains/multi_prompt_chain)
|
||||
- [LLMMathChain](/docs/versions/migrating_chains/llm_math_chain)
|
||||
- [ConstitutionalChain](/docs/versions/migrating_chains/constitutional_chain)
|
||||
|
||||
Check out the [LCEL conceptual docs](/docs/concepts/#langchain-expression-language-lcel) and [LangGraph docs](https://langchain-ai.github.io/langgraph/) for more background information.
|
||||
281
docs/docs/versions/migrating_chains/llm_math_chain.ipynb
Normal file
281
docs/docs/versions/migrating_chains/llm_math_chain.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -57,11 +57,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load docs\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import WebBaseLoader\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_openai.chat_models import ChatOpenAI\n",
|
||||
"from langchain_openai.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n",
|
||||
"data = loader.load()\n",
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user