mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-10 19:20:24 +00:00
Compare commits
2 Commits
bagatur/co
...
legacy-doc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
02b4e770d2 | ||
|
|
4725dfc644 |
7
.github/scripts/check_diff.py
vendored
7
.github/scripts/check_diff.py
vendored
@@ -68,13 +68,6 @@ def dependents_graph() -> dict:
|
||||
|
||||
if "langchain" in dep:
|
||||
dependents[dep].add(pkg_dir)
|
||||
|
||||
# remove huggingface from dependents because of CI instability
|
||||
# specifically in huggingface jobs
|
||||
# https://github.com/langchain-ai/langchain/issues/25558
|
||||
for k in dependents:
|
||||
if "libs/partners/huggingface" in dependents[k]:
|
||||
dependents[k].remove("libs/partners/huggingface")
|
||||
return dependents
|
||||
|
||||
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -172,8 +172,6 @@ docs/api_reference/*/
|
||||
!docs/api_reference/_static/
|
||||
!docs/api_reference/templates/
|
||||
!docs/api_reference/themes/
|
||||
!docs/api_reference/_extensions/
|
||||
!docs/api_reference/scripts/
|
||||
docs/docs/build
|
||||
docs/docs/node_modules
|
||||
docs/docs/yarn.lock
|
||||
|
||||
5
Makefile
5
Makefile
@@ -31,7 +31,6 @@ docs_linkcheck:
|
||||
api_docs_build:
|
||||
poetry run python docs/api_reference/create_api_rst.py
|
||||
cd docs/api_reference && poetry run make html
|
||||
poetry run python docs/api_reference/scripts/custom_formatter.py docs/api_reference/_build/html/
|
||||
|
||||
API_PKG ?= text-splitters
|
||||
|
||||
@@ -39,14 +38,12 @@ api_docs_quick_preview:
|
||||
poetry run pip install "pydantic<2"
|
||||
poetry run python docs/api_reference/create_api_rst.py $(API_PKG)
|
||||
cd docs/api_reference && poetry run make html
|
||||
poetry run python docs/api_reference/scripts/custom_formatter.py docs/api_reference/_build/html/
|
||||
open docs/api_reference/_build/html/reference.html
|
||||
open docs/api_reference/_build/html/$(shell echo $(API_PKG) | sed 's/-/_/g')_api_reference.html
|
||||
|
||||
## api_docs_clean: Clean the API Reference documentation build artifacts.
|
||||
api_docs_clean:
|
||||
find ./docs/api_reference -name '*_api_reference.rst' -delete
|
||||
git clean -fdX ./docs/api_reference
|
||||
rm docs/api_reference/index.md
|
||||
|
||||
|
||||
## api_docs_linkcheck: Run linkchecker on the API Reference documentation.
|
||||
|
||||
@@ -82,10 +82,6 @@ vercel-build: install-vercel-deps build generate-references
|
||||
rm -rf docs
|
||||
mv $(OUTPUT_NEW_DOCS_DIR) docs
|
||||
rm -rf build
|
||||
mkdir static/api_reference
|
||||
git clone --depth=1 https://github.com/baskaryan/langchain-api-docs-build.git
|
||||
mv langchain-api-docs-build/api_reference_build/html/* static/api_reference/
|
||||
rm -rf langchain-api-docs-build
|
||||
NODE_OPTIONS="--max-old-space-size=5000" yarn run docusaurus build
|
||||
mv build v0.2
|
||||
mkdir build
|
||||
|
||||
@@ -1,144 +0,0 @@
|
||||
"""A directive to generate a gallery of images from structured data.
|
||||
|
||||
Generating a gallery of images that are all the same size is a common
|
||||
pattern in documentation, and this can be cumbersome if the gallery is
|
||||
generated programmatically. This directive wraps this particular use-case
|
||||
in a helper-directive to generate it with a single YAML configuration file.
|
||||
|
||||
It currently exists for maintainers of the pydata-sphinx-theme,
|
||||
but might be abstracted into a standalone package if it proves useful.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, ClassVar, Dict, List
|
||||
|
||||
from docutils import nodes
|
||||
from docutils.parsers.rst import directives
|
||||
from sphinx.application import Sphinx
|
||||
from sphinx.util import logging
|
||||
from sphinx.util.docutils import SphinxDirective
|
||||
from yaml import safe_load
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
TEMPLATE_GRID = """
|
||||
`````{{grid}} {columns}
|
||||
{options}
|
||||
|
||||
{content}
|
||||
|
||||
`````
|
||||
"""
|
||||
|
||||
GRID_CARD = """
|
||||
````{{grid-item-card}} {title}
|
||||
{options}
|
||||
|
||||
{content}
|
||||
````
|
||||
"""
|
||||
|
||||
|
||||
class GalleryGridDirective(SphinxDirective):
|
||||
"""A directive to show a gallery of images and links in a Bootstrap grid.
|
||||
|
||||
The grid can be generated from a YAML file that contains a list of items, or
|
||||
from the content of the directive (also formatted in YAML). Use the parameter
|
||||
"class-card" to add an additional CSS class to all cards. When specifying the grid
|
||||
items, you can use all parameters from "grid-item-card" directive to customize
|
||||
individual cards + ["image", "header", "content", "title"].
|
||||
|
||||
Danger:
|
||||
This directive can only be used in the context of a Myst documentation page as
|
||||
the templates use Markdown flavored formatting.
|
||||
"""
|
||||
|
||||
name = "gallery-grid"
|
||||
has_content = True
|
||||
required_arguments = 0
|
||||
optional_arguments = 1
|
||||
final_argument_whitespace = True
|
||||
option_spec: ClassVar[dict[str, Any]] = {
|
||||
# A class to be added to the resulting container
|
||||
"grid-columns": directives.unchanged,
|
||||
"class-container": directives.unchanged,
|
||||
"class-card": directives.unchanged,
|
||||
}
|
||||
|
||||
def run(self) -> List[nodes.Node]:
|
||||
"""Create the gallery grid."""
|
||||
if self.arguments:
|
||||
# If an argument is given, assume it's a path to a YAML file
|
||||
# Parse it and load it into the directive content
|
||||
path_data_rel = Path(self.arguments[0])
|
||||
path_doc, _ = self.get_source_info()
|
||||
path_doc = Path(path_doc).parent
|
||||
path_data = (path_doc / path_data_rel).resolve()
|
||||
if not path_data.exists():
|
||||
logger.info(f"Could not find grid data at {path_data}.")
|
||||
nodes.text("No grid data found at {path_data}.")
|
||||
return
|
||||
yaml_string = path_data.read_text()
|
||||
else:
|
||||
yaml_string = "\n".join(self.content)
|
||||
|
||||
# Use all the element with an img-bottom key as sites to show
|
||||
# and generate a card item for each of them
|
||||
grid_items = []
|
||||
for item in safe_load(yaml_string):
|
||||
# remove parameters that are not needed for the card options
|
||||
title = item.pop("title", "")
|
||||
|
||||
# build the content of the card using some extra parameters
|
||||
header = f"{item.pop('header')} \n^^^ \n" if "header" in item else ""
|
||||
image = f"}) \n" if "image" in item else ""
|
||||
content = f"{item.pop('content')} \n" if "content" in item else ""
|
||||
|
||||
# optional parameter that influence all cards
|
||||
if "class-card" in self.options:
|
||||
item["class-card"] = self.options["class-card"]
|
||||
|
||||
loc_options_str = "\n".join(f":{k}: {v}" for k, v in item.items()) + " \n"
|
||||
|
||||
card = GRID_CARD.format(
|
||||
options=loc_options_str, content=header + image + content, title=title
|
||||
)
|
||||
grid_items.append(card)
|
||||
|
||||
# Parse the template with Sphinx Design to create an output container
|
||||
# Prep the options for the template grid
|
||||
class_ = "gallery-directive" + f' {self.options.get("class-container", "")}'
|
||||
options = {"gutter": 2, "class-container": class_}
|
||||
options_str = "\n".join(f":{k}: {v}" for k, v in options.items())
|
||||
|
||||
# Create the directive string for the grid
|
||||
grid_directive = TEMPLATE_GRID.format(
|
||||
columns=self.options.get("grid-columns", "1 2 3 4"),
|
||||
options=options_str,
|
||||
content="\n".join(grid_items),
|
||||
)
|
||||
|
||||
# Parse content as a directive so Sphinx Design processes it
|
||||
container = nodes.container()
|
||||
self.state.nested_parse([grid_directive], 0, container)
|
||||
|
||||
# Sphinx Design outputs a container too, so just use that
|
||||
return [container.children[0]]
|
||||
|
||||
|
||||
def setup(app: Sphinx) -> Dict[str, Any]:
|
||||
"""Add custom configuration to sphinx app.
|
||||
|
||||
Args:
|
||||
app: the Sphinx application
|
||||
|
||||
Returns:
|
||||
the 2 parallel parameters set to ``True``.
|
||||
"""
|
||||
app.add_directive("gallery-grid", GalleryGridDirective)
|
||||
|
||||
return {
|
||||
"parallel_read_safe": True,
|
||||
"parallel_write_safe": True,
|
||||
}
|
||||
@@ -1,411 +1,51 @@
|
||||
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;700&display=swap');
|
||||
|
||||
/*******************************************************************************
|
||||
* master color map. Only the colors that actually differ between light and dark
|
||||
* themes are specified separately.
|
||||
*
|
||||
* To see the full list of colors see https://www.figma.com/file/rUrrHGhUBBIAAjQ82x6pz9/PyData-Design-system---proposal-for-implementation-(2)?node-id=1234%3A765&t=ifcFT1JtnrSshGfi-1
|
||||
*/
|
||||
/**
|
||||
* Function to get items from nested maps
|
||||
*/
|
||||
/* Assign base colors for the PyData theme */
|
||||
:root {
|
||||
--pst-teal-50: #f4fbfc;
|
||||
--pst-teal-100: #e9f6f8;
|
||||
--pst-teal-200: #d0ecf1;
|
||||
--pst-teal-300: #abdde6;
|
||||
--pst-teal-400: #3fb1c5;
|
||||
--pst-teal-500: #0a7d91;
|
||||
--pst-teal-600: #085d6c;
|
||||
--pst-teal-700: #064752;
|
||||
--pst-teal-800: #042c33;
|
||||
--pst-teal-900: #021b1f;
|
||||
--pst-violet-50: #f4eefb;
|
||||
--pst-violet-100: #e0c7ff;
|
||||
--pst-violet-200: #d5b4fd;
|
||||
--pst-violet-300: #b780ff;
|
||||
--pst-violet-400: #9c5ffd;
|
||||
--pst-violet-500: #8045e5;
|
||||
--pst-violet-600: #6432bd;
|
||||
--pst-violet-700: #4b258f;
|
||||
--pst-violet-800: #341a61;
|
||||
--pst-violet-900: #1e0e39;
|
||||
--pst-gray-50: #f9f9fa;
|
||||
--pst-gray-100: #f3f4f5;
|
||||
--pst-gray-200: #e5e7ea;
|
||||
--pst-gray-300: #d1d5da;
|
||||
--pst-gray-400: #9ca4af;
|
||||
--pst-gray-500: #677384;
|
||||
--pst-gray-600: #48566b;
|
||||
--pst-gray-700: #29313d;
|
||||
--pst-gray-800: #222832;
|
||||
--pst-gray-900: #14181e;
|
||||
--pst-pink-50: #fcf8fd;
|
||||
--pst-pink-100: #fcf0fa;
|
||||
--pst-pink-200: #f8dff5;
|
||||
--pst-pink-300: #f3c7ee;
|
||||
--pst-pink-400: #e47fd7;
|
||||
--pst-pink-500: #c132af;
|
||||
--pst-pink-600: #912583;
|
||||
--pst-pink-700: #6e1c64;
|
||||
--pst-pink-800: #46123f;
|
||||
--pst-pink-900: #2b0b27;
|
||||
--pst-foundation-white: #ffffff;
|
||||
--pst-foundation-black: #14181e;
|
||||
--pst-green-10: #f1fdfd;
|
||||
--pst-green-50: #E0F7F6;
|
||||
--pst-green-100: #B3E8E6;
|
||||
--pst-green-200: #80D6D3;
|
||||
--pst-green-300: #4DC4C0;
|
||||
--pst-green-400: #4FB2AD;
|
||||
--pst-green-500: #287977;
|
||||
--pst-green-600: #246161;
|
||||
--pst-green-700: #204F4F;
|
||||
--pst-green-800: #1C3C3C;
|
||||
--pst-green-900: #0D2427;
|
||||
--pst-lilac-50: #f4eefb;
|
||||
--pst-lilac-100: #DAD6FE;
|
||||
--pst-lilac-200: #BCB2FD;
|
||||
--pst-lilac-300: #9F8BFA;
|
||||
--pst-lilac-400: #7F5CF6;
|
||||
--pst-lilac-500: #6F3AED;
|
||||
--pst-lilac-600: #6028D9;
|
||||
--pst-lilac-700: #5021B6;
|
||||
--pst-lilac-800: #431D95;
|
||||
--pst-lilac-900: #1e0e39;
|
||||
--pst-header-height: 2.5rem;
|
||||
pre {
|
||||
white-space: break-spaces;
|
||||
}
|
||||
|
||||
html {
|
||||
--pst-font-family-base: 'Inter';
|
||||
--pst-font-family-heading: 'Inter Tight', sans-serif;
|
||||
@media (min-width: 1200px) {
|
||||
.container,
|
||||
.container-lg,
|
||||
.container-md,
|
||||
.container-sm,
|
||||
.container-xl {
|
||||
max-width: 2560px !important;
|
||||
}
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
* write the color rules for each theme (light/dark)
|
||||
*/
|
||||
/* NOTE:
|
||||
* Mixins enable us to reuse the same definitions for the different modes
|
||||
* https://sass-lang.com/documentation/at-rules/mixin
|
||||
* something inserts a variable into a CSS selector or property name
|
||||
* https://sass-lang.com/documentation/interpolation
|
||||
*/
|
||||
/* Defaults to light mode if data-theme is not set */
|
||||
html:not([data-theme]) {
|
||||
--pst-color-primary: #287977;
|
||||
--pst-color-primary-bg: #80D6D3;
|
||||
--pst-color-secondary: #6F3AED;
|
||||
--pst-color-secondary-bg: #DAD6FE;
|
||||
--pst-color-accent: #c132af;
|
||||
--pst-color-accent-bg: #f8dff5;
|
||||
--pst-color-info: #276be9;
|
||||
--pst-color-info-bg: #dce7fc;
|
||||
--pst-color-warning: #f66a0a;
|
||||
--pst-color-warning-bg: #f8e3d0;
|
||||
--pst-color-success: #00843f;
|
||||
--pst-color-success-bg: #d6ece1;
|
||||
--pst-color-attention: var(--pst-color-warning);
|
||||
--pst-color-attention-bg: var(--pst-color-warning-bg);
|
||||
--pst-color-danger: #d72d47;
|
||||
--pst-color-danger-bg: #f9e1e4;
|
||||
--pst-color-text-base: #222832;
|
||||
--pst-color-text-muted: #48566b;
|
||||
--pst-color-heading-color: #ffffff;
|
||||
--pst-color-shadow: rgba(0, 0, 0, 0.1);
|
||||
--pst-color-border: #d1d5da;
|
||||
--pst-color-border-muted: rgba(23, 23, 26, 0.2);
|
||||
--pst-color-inline-code: #912583;
|
||||
--pst-color-inline-code-links: #246161;
|
||||
--pst-color-target: #f3cf95;
|
||||
--pst-color-background: #ffffff;
|
||||
--pst-color-on-background: #F4F9F8;
|
||||
--pst-color-surface: #F4F9F8;
|
||||
--pst-color-on-surface: #222832;
|
||||
}
|
||||
html:not([data-theme]) {
|
||||
--pst-color-link: var(--pst-color-primary);
|
||||
--pst-color-link-hover: var(--pst-color-secondary);
|
||||
}
|
||||
html:not([data-theme]) .only-dark,
|
||||
html:not([data-theme]) .only-dark ~ figcaption {
|
||||
display: none !important;
|
||||
#my-component-root *,
|
||||
#headlessui-portal-root * {
|
||||
z-index: 10000;
|
||||
}
|
||||
|
||||
/* NOTE: @each {...} is like a for-loop
|
||||
* https://sass-lang.com/documentation/at-rules/control/each
|
||||
*/
|
||||
html[data-theme=light] {
|
||||
--pst-color-primary: #287977;
|
||||
--pst-color-primary-bg: #80D6D3;
|
||||
--pst-color-secondary: #6F3AED;
|
||||
--pst-color-secondary-bg: #DAD6FE;
|
||||
--pst-color-accent: #c132af;
|
||||
--pst-color-accent-bg: #f8dff5;
|
||||
--pst-color-info: #276be9;
|
||||
--pst-color-info-bg: #dce7fc;
|
||||
--pst-color-warning: #f66a0a;
|
||||
--pst-color-warning-bg: #f8e3d0;
|
||||
--pst-color-success: #00843f;
|
||||
--pst-color-success-bg: #d6ece1;
|
||||
--pst-color-attention: var(--pst-color-warning);
|
||||
--pst-color-attention-bg: var(--pst-color-warning-bg);
|
||||
--pst-color-danger: #d72d47;
|
||||
--pst-color-danger-bg: #f9e1e4;
|
||||
--pst-color-text-base: #222832;
|
||||
--pst-color-text-muted: #48566b;
|
||||
--pst-color-heading-color: #ffffff;
|
||||
--pst-color-shadow: rgba(0, 0, 0, 0.1);
|
||||
--pst-color-border: #d1d5da;
|
||||
--pst-color-border-muted: rgba(23, 23, 26, 0.2);
|
||||
--pst-color-inline-code: #912583;
|
||||
--pst-color-inline-code-links: #246161;
|
||||
--pst-color-target: #f3cf95;
|
||||
--pst-color-background: #ffffff;
|
||||
--pst-color-on-background: #F4F9F8;
|
||||
--pst-color-surface: #F4F9F8;
|
||||
--pst-color-on-surface: #222832;
|
||||
color-scheme: light;
|
||||
}
|
||||
html[data-theme=light] {
|
||||
--pst-color-link: var(--pst-color-primary);
|
||||
--pst-color-link-hover: var(--pst-color-secondary);
|
||||
}
|
||||
html[data-theme=light] .only-dark,
|
||||
html[data-theme=light] .only-dark ~ figcaption {
|
||||
display: none !important;
|
||||
table.longtable code {
|
||||
white-space: normal;
|
||||
}
|
||||
|
||||
html[data-theme=dark] {
|
||||
--pst-color-primary: #4FB2AD;
|
||||
--pst-color-primary-bg: #1C3C3C;
|
||||
--pst-color-secondary: #7F5CF6;
|
||||
--pst-color-secondary-bg: #431D95;
|
||||
--pst-color-accent: #e47fd7;
|
||||
--pst-color-accent-bg: #46123f;
|
||||
--pst-color-info: #79a3f2;
|
||||
--pst-color-info-bg: #06245d;
|
||||
--pst-color-warning: #ff9245;
|
||||
--pst-color-warning-bg: #652a02;
|
||||
--pst-color-success: #5fb488;
|
||||
--pst-color-success-bg: #002f17;
|
||||
--pst-color-attention: var(--pst-color-warning);
|
||||
--pst-color-attention-bg: var(--pst-color-warning-bg);
|
||||
--pst-color-danger: #e78894;
|
||||
--pst-color-danger-bg: #4e111b;
|
||||
--pst-color-text-base: #ced6dd;
|
||||
--pst-color-text-muted: #9ca4af;
|
||||
--pst-color-heading-color: #14181e;
|
||||
--pst-color-shadow: rgba(0, 0, 0, 0.2);
|
||||
--pst-color-border: #48566b;
|
||||
--pst-color-border-muted: #29313d;
|
||||
--pst-color-inline-code: #f3c7ee;
|
||||
--pst-color-inline-code-links: #4FB2AD;
|
||||
--pst-color-target: #675c04;
|
||||
--pst-color-background: #14181e;
|
||||
--pst-color-on-background: #222832;
|
||||
--pst-color-surface: #29313d;
|
||||
--pst-color-on-surface: #f3f4f5;
|
||||
/* Adjust images in dark mode (unless they have class .only-dark or
|
||||
* .dark-light, in which case assume they're already optimized for dark
|
||||
* mode).
|
||||
*/
|
||||
/* Give images a light background in dark mode in case they have
|
||||
* transparency and black text (unless they have class .only-dark or .dark-light, in
|
||||
* which case assume they're already optimized for dark mode).
|
||||
*/
|
||||
color-scheme: dark;
|
||||
}
|
||||
html[data-theme=dark] {
|
||||
--pst-color-link: var(--pst-color-primary);
|
||||
--pst-color-link-hover: var(--pst-color-secondary);
|
||||
}
|
||||
html[data-theme=dark] .only-light,
|
||||
html[data-theme=dark] .only-light ~ figcaption {
|
||||
display: none !important;
|
||||
}
|
||||
html[data-theme=dark] img:not(.only-dark):not(.dark-light) {
|
||||
filter: brightness(0.8) contrast(1.2);
|
||||
}
|
||||
html[data-theme=dark] .bd-content img:not(.only-dark):not(.dark-light) {
|
||||
background: rgb(255, 255, 255);
|
||||
border-radius: 0.25rem;
|
||||
}
|
||||
html[data-theme=dark] .MathJax_SVG * {
|
||||
fill: var(--pst-color-text-base);
|
||||
table.longtable td {
|
||||
max-width: 600px;
|
||||
}
|
||||
|
||||
.pst-color-primary {
|
||||
color: var(--pst-color-primary);
|
||||
/* Banner Styles */
|
||||
.banner {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
background-color: #ffcc00;
|
||||
color: #000;
|
||||
text-align: center;
|
||||
padding: 5px 0;
|
||||
z-index: 1003; /* Highest z-index to sit above everything */
|
||||
height: 30px; /* Fixed height */
|
||||
line-height: 20px; /* Vertically center the text */
|
||||
}
|
||||
|
||||
.pst-color-secondary {
|
||||
color: var(--pst-color-secondary);
|
||||
/* Navbar Styles */
|
||||
#navbar {
|
||||
position: fixed;
|
||||
top: 30px; /* Positioned right below the banner */
|
||||
left: 0;
|
||||
right: 0;
|
||||
z-index: 1002; /* Below the banner */
|
||||
height: 50px; /* Fixed height */
|
||||
}
|
||||
|
||||
.pst-color-accent {
|
||||
color: var(--pst-color-accent);
|
||||
}
|
||||
|
||||
.pst-color-info {
|
||||
color: var(--pst-color-info);
|
||||
}
|
||||
|
||||
.pst-color-warning {
|
||||
color: var(--pst-color-warning);
|
||||
}
|
||||
|
||||
.pst-color-success {
|
||||
color: var(--pst-color-success);
|
||||
}
|
||||
|
||||
.pst-color-attention {
|
||||
color: var(--pst-color-attention);
|
||||
}
|
||||
|
||||
.pst-color-danger {
|
||||
color: var(--pst-color-danger);
|
||||
}
|
||||
|
||||
.pst-color-text-base {
|
||||
color: var(--pst-color-text-base);
|
||||
}
|
||||
|
||||
.pst-color-text-muted {
|
||||
color: var(--pst-color-text-muted);
|
||||
}
|
||||
|
||||
.pst-color-heading-color {
|
||||
color: var(--pst-color-heading-color);
|
||||
}
|
||||
|
||||
.pst-color-shadow {
|
||||
color: var(--pst-color-shadow);
|
||||
}
|
||||
|
||||
.pst-color-border {
|
||||
color: var(--pst-color-border);
|
||||
}
|
||||
|
||||
.pst-color-border-muted {
|
||||
color: var(--pst-color-border-muted);
|
||||
}
|
||||
|
||||
.pst-color-inline-code {
|
||||
color: var(--pst-color-inline-code);
|
||||
}
|
||||
|
||||
.pst-color-inline-code-links {
|
||||
color: var(--pst-color-inline-code-links);
|
||||
}
|
||||
|
||||
.pst-color-target {
|
||||
color: var(--pst-color-target);
|
||||
}
|
||||
|
||||
.pst-color-background {
|
||||
color: var(--pst-color-background);
|
||||
}
|
||||
|
||||
.pst-color-on-background {
|
||||
color: var(--pst-color-on-background);
|
||||
}
|
||||
|
||||
.pst-color-surface {
|
||||
color: var(--pst-color-surface);
|
||||
}
|
||||
|
||||
.pst-color-on-surface {
|
||||
color: var(--pst-color-on-surface);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Adjust the height of the navbar */
|
||||
.bd-header .bd-header__inner{
|
||||
height: 52px; /* Adjust this value as needed */
|
||||
}
|
||||
|
||||
.navbar-nav > li > a {
|
||||
line-height: 52px; /* Vertically center the navbar links */
|
||||
}
|
||||
|
||||
/* Make sure the navbar items align properly */
|
||||
.navbar-nav {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
|
||||
.bd-header .navbar-header-items__start{
|
||||
margin-left: 0rem
|
||||
}
|
||||
|
||||
.bd-header button.primary-toggle {
|
||||
margin-right: 0rem;
|
||||
}
|
||||
|
||||
.bd-header ul.navbar-nav .dropdown .dropdown-menu {
|
||||
overflow-y: auto; /* Enable vertical scrolling */
|
||||
max-height: 80vh
|
||||
}
|
||||
|
||||
.bd-sidebar-primary {
|
||||
width: 22%; /* Adjust this value to your preference */
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.bd-sidebar-secondary {
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.toc-entry a.nav-link, .toc-entry a>code {
|
||||
background-color: transparent;
|
||||
border-color: transparent;
|
||||
}
|
||||
|
||||
.bd-sidebar-primary code{
|
||||
background-color: transparent;
|
||||
border-color: transparent;
|
||||
}
|
||||
|
||||
|
||||
.toctree-wrapper li[class^=toctree-l1]>a {
|
||||
font-size: 1.3em
|
||||
}
|
||||
|
||||
.toctree-wrapper li[class^=toctree-l1] {
|
||||
margin-bottom: 2em;
|
||||
}
|
||||
|
||||
.toctree-wrapper li[class^=toctree-l]>ul {
|
||||
margin-top: 0.5em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
*, :after, :before {
|
||||
font-style: normal;
|
||||
}
|
||||
|
||||
div.deprecated {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 2em;
|
||||
}
|
||||
|
||||
.admonition-beta.admonition, div.admonition-beta.admonition {
|
||||
border-color: var(--pst-color-warning);
|
||||
margin-top:0.5em;
|
||||
margin-bottom: 2em;
|
||||
}
|
||||
|
||||
.admonition-beta>.admonition-title, div.admonition-beta>.admonition-title {
|
||||
background-color: var(--pst-color-warning-bg);
|
||||
}
|
||||
|
||||
dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) dd {
|
||||
margin-left: 1rem;
|
||||
}
|
||||
|
||||
p {
|
||||
font-size: 0.9rem;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 777 B |
@@ -1,11 +0,0 @@
|
||||
<svg width="72" height="19" viewBox="0 0 72 19" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<g clip-path="url(#clip0_4019_2020)">
|
||||
<path d="M29.4038 5.84477C30.1256 6.56657 30.1256 7.74117 29.4038 8.46296L27.7869 10.0538L27.7704 9.96259C27.6524 9.30879 27.3415 8.71552 26.8723 8.24627C26.5189 7.8936 26.1012 7.63282 25.6305 7.47143C25.3383 7.76508 25.1777 8.14989 25.1777 8.55487C25.1777 8.63706 25.1851 8.72224 25.2001 8.80742C25.4593 8.90082 25.6887 9.04503 25.8815 9.23781C26.6033 9.9596 26.6033 11.1342 25.8815 11.856L24.4738 13.2637C24.1129 13.6246 23.6392 13.8047 23.1647 13.8047C22.6902 13.8047 22.2165 13.6246 21.8556 13.2637C21.1338 12.5419 21.1338 11.3673 21.8556 10.6455L23.4725 9.05549L23.489 9.14665C23.6063 9.79896 23.9171 10.3922 24.3879 10.8622C24.742 11.2164 25.1343 11.4518 25.6043 11.6124L25.691 11.5257C25.954 11.2627 26.0982 10.913 26.0982 10.5402C26.0982 10.4572 26.0907 10.3743 26.0765 10.2929C25.8053 10.2032 25.5819 10.0754 25.3786 9.87218C25.0857 9.57928 24.9034 9.20493 24.8526 8.79024C24.8489 8.76035 24.8466 8.73121 24.8437 8.70132C24.8033 8.16109 24.9983 7.63357 25.3786 7.25399L26.7864 5.84627C27.1353 5.49733 27.6001 5.30455 28.0955 5.30455C28.5909 5.30455 29.0556 5.49658 29.4046 5.84627L29.4038 5.84477ZM36.7548 9.56583C36.7548 14.7163 32.5645 18.9058 27.4148 18.9058H9.34C4.1903 18.9058 0 14.7163 0 9.56583C0 4.41538 4.1903 0.22583 9.34 0.22583H27.4148C32.5652 0.22583 36.7548 4.41613 36.7548 9.56583ZM18 14.25C18.1472 14.0714 17.4673 13.5686 17.3283 13.384C17.0459 13.0777 17.0444 12.6368 16.8538 12.2789C16.3876 11.1985 15.8518 10.1262 15.1024 9.21166C14.3104 8.21116 13.333 7.38326 12.4745 6.44403C11.8371 5.78873 11.6668 4.85548 11.1041 4.15087C10.3285 3.00541 7.87624 2.69308 7.51683 4.31077C7.51833 4.36158 7.50264 4.39371 7.45855 4.42584C7.2598 4.57005 7.08271 4.73518 6.93402 4.93468C6.57013 5.44129 6.51409 6.30057 6.96839 6.75561C6.98333 6.51576 6.99155 6.28936 7.18134 6.1175C7.53252 6.41862 8.06304 6.52547 8.47026 6.30057C9.36989 7.585 9.14573 9.36184 9.86005 10.7457C10.0573 11.0729 10.2561 11.4069 10.5094 11.6939C10.7148 12.0137 11.4247 12.391 11.4665 12.6869C11.474 13.195 11.4142 13.7502 11.7475 14.1753C11.9044 14.4936 11.5188 14.8134 11.208 14.7738C10.8045 14.8291 10.3121 14.5026 9.95868 14.7036C9.8339 14.8388 9.58957 14.6894 9.48197 14.8769C9.44461 14.9741 9.24286 15.1108 9.36316 15.2042C9.49691 15.1026 9.62095 14.9965 9.80102 15.057C9.77412 15.2035 9.88994 15.2244 9.98184 15.267C9.97886 15.3663 9.92057 15.468 9.99679 15.5524C10.0857 15.4627 10.1388 15.3357 10.28 15.2983C10.7492 15.9238 11.2267 14.6655 12.2421 15.2318C12.0359 15.2214 11.8528 15.2475 11.7139 15.4172C11.6795 15.4553 11.6503 15.5001 11.7109 15.5494C12.2586 15.196 12.2556 15.6705 12.6112 15.5248C12.8847 15.382 13.1567 15.2035 13.4817 15.2543C13.1657 15.3454 13.153 15.5995 12.9677 15.8139C12.9363 15.8468 12.9213 15.8842 12.9579 15.9387C13.614 15.8834 13.6678 15.6652 14.1975 15.3977C14.5928 15.1564 14.9866 15.7414 15.3288 15.4082C15.4043 15.3357 15.5074 15.3604 15.6008 15.3507C15.4812 14.7133 14.1669 15.4672 14.1878 14.6124C14.6107 14.3247 14.5136 13.7741 14.542 13.3295C15.0284 13.5992 15.5694 13.7561 16.0461 14.0139C16.2867 14.4025 16.6641 14.9158 17.1669 14.8822C17.1804 14.8433 17.1923 14.8089 17.2065 14.7693C17.359 14.7955 17.5547 14.8964 17.6384 14.7036C17.8663 14.9419 18.201 14.93 18.4992 14.8687C18.7196 14.6894 18.0845 14.4338 17.9993 14.2493L18 14.25ZM31.3458 7.15387C31.3458 6.28413 31.0081 5.46744 30.3946 4.85399C29.7812 4.24054 28.9645 3.9028 28.094 3.9028C27.2235 3.9028 26.4068 4.24054 25.7933 4.85399L24.3856 6.26171C24.0569 6.59048 23.8073 6.97678 23.6436 7.40941L23.6339 7.43407L23.6085 7.44154C23.0974 7.5992 22.6469 7.86969 22.2696 8.24702L20.8618 9.65475C19.5938 10.9235 19.5938 12.9873 20.8618 14.2553C21.4753 14.8687 22.292 15.2064 23.1617 15.2064C24.0314 15.2064 24.8489 14.8687 25.4623 14.2553L26.8701 12.8475C27.1973 12.5203 27.4454 12.1355 27.609 11.7036L27.6188 11.6789L27.6442 11.6707C28.1463 11.5168 28.6095 11.2373 28.9854 10.8622L30.3931 9.4545C31.0066 8.84105 31.3443 8.02436 31.3443 7.15387H31.3458ZM12.8802 13.1972C12.7592 13.6695 12.7196 14.4742 12.1054 14.4974C12.0546 14.7701 12.2944 14.8724 12.5119 14.785C12.7278 14.6856 12.8302 14.8635 12.9026 15.0406C13.2359 15.0891 13.7291 14.9292 13.7477 14.5347C13.2501 14.2478 13.0962 13.7023 12.8795 13.1972H12.8802Z" fill="#F4F3FF"/>
|
||||
<path d="M43.5142 15.2258L47.1462 3.70583H49.9702L53.6022 15.2258H51.6182L48.3222 4.88983H48.7542L45.4982 15.2258H43.5142ZM45.5382 12.7298V10.9298H51.5862V12.7298H45.5382ZM55.0486 15.2258V3.70583H59.8086C59.9206 3.70583 60.0646 3.71116 60.2406 3.72183C60.4166 3.72716 60.5792 3.74316 60.7286 3.76983C61.3952 3.87116 61.9446 4.0925 62.3766 4.43383C62.8139 4.77516 63.1366 5.20716 63.3446 5.72983C63.5579 6.24716 63.6646 6.82316 63.6646 7.45783C63.6646 8.08716 63.5579 8.66316 63.3446 9.18583C63.1312 9.70316 62.8059 10.1325 62.3686 10.4738C61.9366 10.8152 61.3899 11.0365 60.7286 11.1378C60.5792 11.1592 60.4139 11.1752 60.2326 11.1858C60.0566 11.1965 59.9152 11.2018 59.8086 11.2018H56.9766V15.2258H55.0486ZM56.9766 9.40183H59.7286C59.8352 9.40183 59.9552 9.3965 60.0886 9.38583C60.2219 9.37516 60.3446 9.35383 60.4566 9.32183C60.7766 9.24183 61.0272 9.1005 61.2086 8.89783C61.3952 8.69516 61.5259 8.46583 61.6006 8.20983C61.6806 7.95383 61.7206 7.70316 61.7206 7.45783C61.7206 7.2125 61.6806 6.96183 61.6006 6.70583C61.5259 6.4445 61.3952 6.2125 61.2086 6.00983C61.0272 5.80716 60.7766 5.66583 60.4566 5.58583C60.3446 5.55383 60.2219 5.53516 60.0886 5.52983C59.9552 5.51916 59.8352 5.51383 59.7286 5.51383H56.9766V9.40183ZM65.4273 15.2258V3.70583H67.3553V15.2258H65.4273Z" fill="#F4F3FF"/>
|
||||
</g>
|
||||
<defs>
|
||||
<clipPath id="clip0_4019_2020">
|
||||
<rect width="71.0711" height="18.68" fill="white" transform="translate(0 0.22583)"/>
|
||||
</clipPath>
|
||||
</defs>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 5.7 KiB |
@@ -1,11 +0,0 @@
|
||||
<svg width="72" height="20" viewBox="0 0 72 20" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<g clip-path="url(#clip0_4019_689)">
|
||||
<path d="M29.4038 5.97905C30.1256 6.70085 30.1256 7.87545 29.4038 8.59724L27.7869 10.188L27.7704 10.0969C27.6524 9.44307 27.3415 8.84979 26.8723 8.38055C26.5189 8.02787 26.1012 7.7671 25.6305 7.60571C25.3383 7.89936 25.1777 8.28416 25.1777 8.68915C25.1777 8.77134 25.1851 8.85652 25.2001 8.9417C25.4593 9.0351 25.6887 9.17931 25.8815 9.37209C26.6033 10.0939 26.6033 11.2685 25.8815 11.9903L24.4738 13.398C24.1129 13.7589 23.6392 13.939 23.1647 13.939C22.6902 13.939 22.2165 13.7589 21.8556 13.398C21.1338 12.6762 21.1338 11.5016 21.8556 10.7798L23.4725 9.18977L23.489 9.28093C23.6063 9.93323 23.9171 10.5265 24.3879 10.9965C24.742 11.3507 25.1343 11.586 25.6043 11.7467L25.691 11.66C25.954 11.397 26.0982 11.0473 26.0982 10.6745C26.0982 10.5915 26.0907 10.5086 26.0765 10.4271C25.8053 10.3375 25.5819 10.2097 25.3786 10.0065C25.0857 9.71356 24.9034 9.33921 24.8526 8.92451C24.8489 8.89463 24.8466 8.86549 24.8437 8.8356C24.8033 8.29537 24.9983 7.76785 25.3786 7.38827L26.7864 5.98055C27.1353 5.6316 27.6001 5.43883 28.0955 5.43883C28.5909 5.43883 29.0556 5.63086 29.4046 5.98055L29.4038 5.97905ZM36.7548 9.70011C36.7548 14.8506 32.5645 19.0401 27.4148 19.0401H9.34C4.1903 19.0401 0 14.8506 0 9.70011C0 4.54966 4.1903 0.360107 9.34 0.360107H27.4148C32.5652 0.360107 36.7548 4.55041 36.7548 9.70011ZM18 14.3843C18.1472 14.2057 17.4673 13.7029 17.3283 13.5183C17.0459 13.2119 17.0444 12.7711 16.8538 12.4132C16.3876 11.3327 15.8518 10.2605 15.1024 9.34594C14.3104 8.34543 13.333 7.51754 12.4745 6.57831C11.8371 5.92301 11.6668 4.98976 11.1041 4.28515C10.3285 3.13969 7.87624 2.82736 7.51683 4.44505C7.51833 4.49586 7.50264 4.52799 7.45855 4.56012C7.2598 4.70433 7.08271 4.86946 6.93402 5.06896C6.57013 5.57556 6.51409 6.43484 6.96839 6.88989C6.98333 6.65004 6.99155 6.42364 7.18134 6.25178C7.53252 6.5529 8.06304 6.65975 8.47026 6.43484C9.36989 7.71928 9.14573 9.49612 9.86005 10.8799C10.0573 11.2072 10.2561 11.5412 10.5094 11.8281C10.7148 12.1479 11.4247 12.5253 11.4665 12.8212C11.474 13.3293 11.4142 13.8844 11.7475 14.3096C11.9044 14.6279 11.5188 14.9477 11.208 14.9081C10.8045 14.9634 10.3121 14.6369 9.95868 14.8379C9.8339 14.9731 9.58957 14.8237 9.48197 15.0112C9.44461 15.1083 9.24286 15.2451 9.36316 15.3385C9.49691 15.2369 9.62095 15.1308 9.80102 15.1913C9.77412 15.3377 9.88994 15.3587 9.98184 15.4012C9.97886 15.5006 9.92057 15.6022 9.99679 15.6867C10.0857 15.597 10.1388 15.47 10.28 15.4326C10.7492 16.058 11.2267 14.7997 12.2421 15.3661C12.0359 15.3557 11.8528 15.3818 11.7139 15.5514C11.6795 15.5895 11.6503 15.6344 11.7109 15.6837C12.2586 15.3303 12.2556 15.8047 12.6112 15.659C12.8847 15.5163 13.1567 15.3377 13.4817 15.3885C13.1657 15.4797 13.153 15.7337 12.9677 15.9482C12.9363 15.9811 12.9213 16.0184 12.9579 16.073C13.614 16.0177 13.6678 15.7995 14.1975 15.532C14.5928 15.2907 14.9866 15.8757 15.3288 15.5425C15.4043 15.47 15.5074 15.4946 15.6008 15.4849C15.4812 14.8476 14.1669 15.6015 14.1878 14.7467C14.6107 14.459 14.5136 13.9083 14.542 13.4638C15.0284 13.7335 15.5694 13.8904 16.0461 14.1482C16.2867 14.5367 16.6641 15.0501 17.1669 15.0164C17.1804 14.9776 17.1923 14.9432 17.2065 14.9036C17.359 14.9298 17.5547 15.0306 17.6384 14.8379C17.8663 15.0762 18.201 15.0643 18.4992 15.003C18.7196 14.8237 18.0845 14.5681 17.9993 14.3836L18 14.3843ZM31.3458 7.28815C31.3458 6.41841 31.0081 5.60172 30.3946 4.98826C29.7812 4.37481 28.9645 4.03708 28.094 4.03708C27.2235 4.03708 26.4068 4.37481 25.7933 4.98826L24.3856 6.39599C24.0569 6.72476 23.8073 7.11106 23.6436 7.54369L23.6339 7.56835L23.6085 7.57582C23.0974 7.73348 22.6469 8.00396 22.2696 8.3813L20.8618 9.78902C19.5938 11.0578 19.5938 13.1215 20.8618 14.3895C21.4753 15.003 22.292 15.3407 23.1617 15.3407C24.0314 15.3407 24.8489 15.003 25.4623 14.3895L26.8701 12.9818C27.1973 12.6545 27.4454 12.2697 27.609 11.8378L27.6188 11.8132L27.6442 11.805C28.1463 11.651 28.6095 11.3716 28.9854 10.9965L30.3931 9.58878C31.0066 8.97532 31.3443 8.15863 31.3443 7.28815H31.3458ZM12.8802 13.3315C12.7592 13.8037 12.7196 14.6085 12.1054 14.6316C12.0546 14.9044 12.2944 15.0067 12.5119 14.9193C12.7278 14.8199 12.8302 14.9978 12.9026 15.1748C13.2359 15.2234 13.7291 15.0635 13.7477 14.669C13.2501 14.3821 13.0962 13.8366 12.8795 13.3315H12.8802Z" fill="#246161"/>
|
||||
<path d="M43.5142 15.3601L47.1462 3.84011H49.9702L53.6022 15.3601H51.6182L48.3222 5.02411H48.7542L45.4982 15.3601H43.5142ZM45.5382 12.8641V11.0641H51.5862V12.8641H45.5382ZM55.0486 15.3601V3.84011H59.8086C59.9206 3.84011 60.0646 3.84544 60.2406 3.85611C60.4166 3.86144 60.5792 3.87744 60.7286 3.90411C61.3952 4.00544 61.9446 4.22677 62.3766 4.56811C62.8139 4.90944 63.1366 5.34144 63.3446 5.86411C63.5579 6.38144 63.6646 6.95744 63.6646 7.59211C63.6646 8.22144 63.5579 8.79744 63.3446 9.32011C63.1312 9.83744 62.8059 10.2668 62.3686 10.6081C61.9366 10.9494 61.3899 11.1708 60.7286 11.2721C60.5792 11.2934 60.4139 11.3094 60.2326 11.3201C60.0566 11.3308 59.9152 11.3361 59.8086 11.3361H56.9766V15.3601H55.0486ZM56.9766 9.53611H59.7286C59.8352 9.53611 59.9552 9.53077 60.0886 9.52011C60.2219 9.50944 60.3446 9.48811 60.4566 9.45611C60.7766 9.37611 61.0272 9.23477 61.2086 9.03211C61.3952 8.82944 61.5259 8.60011 61.6006 8.34411C61.6806 8.08811 61.7206 7.83744 61.7206 7.59211C61.7206 7.34677 61.6806 7.09611 61.6006 6.84011C61.5259 6.57877 61.3952 6.34677 61.2086 6.14411C61.0272 5.94144 60.7766 5.80011 60.4566 5.72011C60.3446 5.68811 60.2219 5.66944 60.0886 5.66411C59.9552 5.65344 59.8352 5.64811 59.7286 5.64811H56.9766V9.53611ZM65.4273 15.3601V3.84011H67.3553V15.3601H65.4273Z" fill="#246161"/>
|
||||
</g>
|
||||
<defs>
|
||||
<clipPath id="clip0_4019_689">
|
||||
<rect width="71.0711" height="18.68" fill="white" transform="translate(0 0.360107)"/>
|
||||
</clipPath>
|
||||
</defs>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 5.7 KiB |
@@ -62,7 +62,7 @@ class ExampleLinksDirective(SphinxDirective):
|
||||
item_node.append(para_node)
|
||||
list_node.append(item_node)
|
||||
if list_node.children:
|
||||
title_node = nodes.rubric()
|
||||
title_node = nodes.title()
|
||||
title_node.append(nodes.Text(f"Examples using {class_or_func_name}"))
|
||||
return [title_node, list_node]
|
||||
return [list_node]
|
||||
@@ -75,10 +75,7 @@ class Beta(BaseAdmonition):
|
||||
def run(self):
|
||||
self.content = self.content or StringList(
|
||||
[
|
||||
(
|
||||
"This feature is in beta. It is actively being worked on, so the "
|
||||
"API may change."
|
||||
)
|
||||
"This feature is in beta. It is actively being worked on, so the API may change."
|
||||
]
|
||||
)
|
||||
self.arguments = self.arguments or ["Beta"]
|
||||
@@ -93,10 +90,13 @@ def setup(app):
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = "🦜🔗 LangChain"
|
||||
copyright = "2023, LangChain Inc"
|
||||
author = "LangChain, Inc"
|
||||
copyright = "2023, LangChain, Inc."
|
||||
author = "LangChain, Inc."
|
||||
|
||||
html_favicon = "_static/img/brand/favicon.png"
|
||||
version = data["tool"]["poetry"]["version"]
|
||||
release = version
|
||||
|
||||
html_title = project + " " + version
|
||||
html_last_updated_fmt = "%b %d, %Y"
|
||||
|
||||
|
||||
@@ -112,13 +112,11 @@ extensions = [
|
||||
"sphinx.ext.napoleon",
|
||||
"sphinx.ext.viewcode",
|
||||
"sphinxcontrib.autodoc_pydantic",
|
||||
"IPython.sphinxext.ipython_console_highlighting",
|
||||
"myst_parser",
|
||||
"_extensions.gallery_directive",
|
||||
"sphinx_design",
|
||||
"sphinx_copybutton",
|
||||
"sphinx_panels",
|
||||
"IPython.sphinxext.ipython_console_highlighting",
|
||||
]
|
||||
source_suffix = [".rst", ".md"]
|
||||
source_suffix = [".rst"]
|
||||
|
||||
# some autodoc pydantic options are repeated in the actual template.
|
||||
# potentially user error, but there may be bugs in the sphinx extension
|
||||
@@ -150,84 +148,23 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
# The theme to use for HTML and HTML Help pages.
|
||||
html_theme = "pydata_sphinx_theme"
|
||||
html_theme = "scikit-learn-modern"
|
||||
html_theme_path = ["themes"]
|
||||
|
||||
# Theme options are theme-specific and customize the look and feel of a theme
|
||||
# further. For a list of options available for each theme, see the
|
||||
# documentation.
|
||||
html_theme_options = {
|
||||
# # -- General configuration ------------------------------------------------
|
||||
"sidebar_includehidden": True,
|
||||
"use_edit_page_button": False,
|
||||
# # "analytics": {
|
||||
# # "plausible_analytics_domain": "scikit-learn.org",
|
||||
# # "plausible_analytics_url": "https://views.scientific-python.org/js/script.js",
|
||||
# # },
|
||||
# # If "prev-next" is included in article_footer_items, then setting show_prev_next
|
||||
# # to True would repeat prev and next links. See
|
||||
# # https://github.com/pydata/pydata-sphinx-theme/blob/b731dc230bc26a3d1d1bb039c56c977a9b3d25d8/src/pydata_sphinx_theme/theme/pydata_sphinx_theme/layout.html#L118-L129
|
||||
"show_prev_next": False,
|
||||
"search_bar_text": "Search",
|
||||
"navigation_with_keys": True,
|
||||
"collapse_navigation": True,
|
||||
"navigation_depth": 3,
|
||||
"show_nav_level": 1,
|
||||
"show_toc_level": 3,
|
||||
"navbar_align": "left",
|
||||
"header_links_before_dropdown": 5,
|
||||
"header_dropdown_text": "Integrations",
|
||||
"logo": {
|
||||
"image_light": "_static/wordmark-api.svg",
|
||||
"image_dark": "_static/wordmark-api-dark.svg",
|
||||
},
|
||||
"surface_warnings": True,
|
||||
# # -- Template placement in theme layouts ----------------------------------
|
||||
"navbar_start": ["navbar-logo"],
|
||||
# # Note that the alignment of navbar_center is controlled by navbar_align
|
||||
"navbar_center": ["navbar-nav"],
|
||||
"navbar_end": ["langchain_docs", "theme-switcher", "navbar-icon-links"],
|
||||
# # navbar_persistent is persistent right (even when on mobiles)
|
||||
"navbar_persistent": ["search-field"],
|
||||
"article_header_start": ["breadcrumbs"],
|
||||
"article_header_end": [],
|
||||
"article_footer_items": [],
|
||||
"content_footer_items": [],
|
||||
# # Use html_sidebars that map page patterns to list of sidebar templates
|
||||
# "primary_sidebar_end": [],
|
||||
"footer_start": ["copyright"],
|
||||
"footer_center": [],
|
||||
"footer_end": [],
|
||||
# # When specified as a dictionary, the keys should follow glob-style patterns, as in
|
||||
# # https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-exclude_patterns
|
||||
# # In particular, "**" specifies the default for all pages
|
||||
# # Use :html_theme.sidebar_secondary.remove: for file-wide removal
|
||||
# "secondary_sidebar_items": {"**": ["page-toc", "sourcelink"]},
|
||||
# "show_version_warning_banner": True,
|
||||
# "announcement": None,
|
||||
"icon_links": [
|
||||
{
|
||||
# Label for this link
|
||||
"name": "GitHub",
|
||||
# URL where the link will redirect
|
||||
"url": "https://github.com/langchain-ai/langchain", # required
|
||||
# Icon class (if "type": "fontawesome"), or path to local image (if "type": "local")
|
||||
"icon": "fa-brands fa-square-github",
|
||||
# The type of image to be used (see below for details)
|
||||
"type": "fontawesome",
|
||||
},
|
||||
{
|
||||
"name": "X / Twitter",
|
||||
"url": "https://twitter.com/langchainai",
|
||||
"icon": "fab fa-twitter-square",
|
||||
},
|
||||
],
|
||||
"icon_links_label": "Quick Links",
|
||||
"external_links": [
|
||||
{"name": "Legacy reference", "url": "https://api.python.langchain.com/"},
|
||||
],
|
||||
# redirects dictionary maps from old links to new links
|
||||
html_additional_pages = {}
|
||||
redirects = {
|
||||
"index": "langchain_api_reference",
|
||||
}
|
||||
for old_link in redirects:
|
||||
html_additional_pages[old_link] = "redirects.html"
|
||||
|
||||
partners_dir = Path(__file__).parent.parent.parent / "libs/partners"
|
||||
partners = [
|
||||
(p.name, p.name.replace("-", "_") + "_api_reference")
|
||||
for p in partners_dir.iterdir()
|
||||
]
|
||||
partners = sorted(partners)
|
||||
|
||||
html_context = {
|
||||
"display_github": True, # Integrate GitHub
|
||||
@@ -235,6 +172,8 @@ html_context = {
|
||||
"github_repo": "langchain", # Repo name
|
||||
"github_version": "master", # Version
|
||||
"conf_py_path": "/docs/api_reference", # Path in the checkout to the docs root
|
||||
"redirects": redirects,
|
||||
"partners": partners,
|
||||
}
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
@@ -244,7 +183,9 @@ html_static_path = ["_static"]
|
||||
|
||||
# These paths are either relative to html_static_path
|
||||
# or fully qualified paths (e.g. https://...)
|
||||
html_css_files = ["css/custom.css"]
|
||||
html_css_files = [
|
||||
"css/custom.css",
|
||||
]
|
||||
html_use_index = False
|
||||
|
||||
myst_enable_extensions = ["colon_fence"]
|
||||
@@ -261,5 +202,3 @@ html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "")
|
||||
# Tell Jinja2 templates the build is running on Read the Docs
|
||||
if os.environ.get("READTHEDOCS", "") == "True":
|
||||
html_context["READTHEDOCS"] = True
|
||||
|
||||
master_doc = "index"
|
||||
|
||||
@@ -239,7 +239,7 @@ def _construct_doc(
|
||||
package_namespace: str,
|
||||
members_by_namespace: Dict[str, ModuleMembers],
|
||||
package_version: str,
|
||||
) -> List[typing.Tuple[str, str]]:
|
||||
) -> str:
|
||||
"""Construct the contents of the reference.rst file for the given package.
|
||||
|
||||
Args:
|
||||
@@ -251,38 +251,15 @@ def _construct_doc(
|
||||
Returns:
|
||||
The contents of the reference.rst file.
|
||||
"""
|
||||
docs = []
|
||||
index_doc = f"""\
|
||||
:html_theme.sidebar_secondary.remove:
|
||||
full_doc = f"""\
|
||||
=======================
|
||||
``{package_namespace}`` {package_version}
|
||||
=======================
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. _{package_namespace}:
|
||||
|
||||
======================================
|
||||
{package_namespace.replace('_', '-')}: {package_version}
|
||||
======================================
|
||||
|
||||
.. automodule:: {package_namespace}
|
||||
:no-members:
|
||||
:no-inherited-members:
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
:maxdepth: 2
|
||||
|
||||
"""
|
||||
index_autosummary = """
|
||||
"""
|
||||
namespaces = sorted(members_by_namespace)
|
||||
|
||||
for module in namespaces:
|
||||
index_doc += f" {module}\n"
|
||||
module_doc = f"""\
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. _{package_namespace}_{module}:
|
||||
"""
|
||||
_members = members_by_namespace[module]
|
||||
classes = [
|
||||
el
|
||||
@@ -304,9 +281,9 @@ def _construct_doc(
|
||||
]
|
||||
if not (classes or functions):
|
||||
continue
|
||||
section = f":mod:`{module}`"
|
||||
section = f":mod:`{package_namespace}.{module}`"
|
||||
underline = "=" * (len(section) + 1)
|
||||
module_doc += f"""
|
||||
full_doc += f"""\
|
||||
{section}
|
||||
{underline}
|
||||
|
||||
@@ -314,26 +291,16 @@ def _construct_doc(
|
||||
:no-members:
|
||||
:no-inherited-members:
|
||||
|
||||
"""
|
||||
|
||||
index_autosummary += f"""
|
||||
:ref:`{package_namespace}_{module}`
|
||||
{'^' * (len(package_namespace) + len(module) + 8)}
|
||||
"""
|
||||
|
||||
if classes:
|
||||
module_doc += f"""\
|
||||
**Classes**
|
||||
|
||||
full_doc += f"""\
|
||||
Classes
|
||||
--------------
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. autosummary::
|
||||
:toctree: {module}
|
||||
"""
|
||||
index_autosummary += """
|
||||
**Classes**
|
||||
|
||||
.. autosummary::
|
||||
"""
|
||||
|
||||
for class_ in sorted(classes, key=lambda c: c["qualified_name"]):
|
||||
@@ -350,22 +317,19 @@ def _construct_doc(
|
||||
else:
|
||||
template = "class.rst"
|
||||
|
||||
module_doc += f"""\
|
||||
full_doc += f"""\
|
||||
:template: {template}
|
||||
|
||||
{class_["qualified_name"]}
|
||||
|
||||
"""
|
||||
index_autosummary += f"""
|
||||
{class_['qualified_name']}
|
||||
"""
|
||||
|
||||
if functions:
|
||||
_functions = [f["qualified_name"] for f in functions]
|
||||
fstring = "\n ".join(sorted(_functions))
|
||||
module_doc += f"""\
|
||||
**Functions**
|
||||
|
||||
full_doc += f"""\
|
||||
Functions
|
||||
--------------
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. autosummary::
|
||||
@@ -374,18 +338,11 @@ def _construct_doc(
|
||||
|
||||
{fstring}
|
||||
|
||||
"""
|
||||
|
||||
index_autosummary += f"""
|
||||
**Functions**
|
||||
|
||||
.. autosummary::
|
||||
|
||||
{fstring}
|
||||
"""
|
||||
if deprecated_classes:
|
||||
module_doc += f"""\
|
||||
**Deprecated classes**
|
||||
full_doc += f"""\
|
||||
Deprecated classes
|
||||
--------------
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
@@ -393,12 +350,6 @@ def _construct_doc(
|
||||
:toctree: {module}
|
||||
"""
|
||||
|
||||
index_autosummary += """
|
||||
**Deprecated classes**
|
||||
|
||||
.. autosummary::
|
||||
"""
|
||||
|
||||
for class_ in sorted(deprecated_classes, key=lambda c: c["qualified_name"]):
|
||||
if class_["kind"] == "TypedDict":
|
||||
template = "typeddict.rst"
|
||||
@@ -413,21 +364,19 @@ def _construct_doc(
|
||||
else:
|
||||
template = "class.rst"
|
||||
|
||||
module_doc += f"""\
|
||||
full_doc += f"""\
|
||||
:template: {template}
|
||||
|
||||
{class_["qualified_name"]}
|
||||
|
||||
"""
|
||||
index_autosummary += f"""
|
||||
{class_['qualified_name']}
|
||||
"""
|
||||
|
||||
if deprecated_functions:
|
||||
_functions = [f["qualified_name"] for f in deprecated_functions]
|
||||
fstring = "\n ".join(sorted(_functions))
|
||||
module_doc += f"""\
|
||||
**Deprecated functions**
|
||||
full_doc += f"""\
|
||||
Deprecated functions
|
||||
--------------
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
@@ -438,17 +387,7 @@ def _construct_doc(
|
||||
{fstring}
|
||||
|
||||
"""
|
||||
index_autosummary += f"""
|
||||
**Deprecated functions**
|
||||
|
||||
.. autosummary::
|
||||
|
||||
{fstring}
|
||||
|
||||
"""
|
||||
docs.append((f"{module}.rst", module_doc))
|
||||
docs.append(("index.rst", index_doc + index_autosummary))
|
||||
return docs
|
||||
return full_doc
|
||||
|
||||
|
||||
def _build_rst_file(package_name: str = "langchain") -> None:
|
||||
@@ -460,14 +399,13 @@ def _build_rst_file(package_name: str = "langchain") -> None:
|
||||
package_dir = _package_dir(package_name)
|
||||
package_members = _load_package_modules(package_dir)
|
||||
package_version = _get_package_version(package_dir)
|
||||
output_dir = _out_file_path(package_name)
|
||||
os.mkdir(output_dir)
|
||||
rsts = _construct_doc(
|
||||
_package_namespace(package_name), package_members, package_version
|
||||
)
|
||||
for name, rst in rsts:
|
||||
with open(output_dir / name, "w") as f:
|
||||
f.write(rst)
|
||||
with open(_out_file_path(package_name), "w") as f:
|
||||
f.write(
|
||||
_doc_first_line(package_name)
|
||||
+ _construct_doc(
|
||||
_package_namespace(package_name), package_members, package_version
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _package_namespace(package_name: str) -> str:
|
||||
@@ -517,119 +455,12 @@ def _get_package_version(package_dir: Path) -> str:
|
||||
|
||||
def _out_file_path(package_name: str) -> Path:
|
||||
"""Return the path to the file containing the documentation."""
|
||||
return HERE / f"{package_name.replace('-', '_')}"
|
||||
return HERE / f"{package_name.replace('-', '_')}_api_reference.rst"
|
||||
|
||||
|
||||
def _build_index(dirs: List[str]) -> None:
|
||||
custom_names = {
|
||||
"airbyte": "Airbyte",
|
||||
"aws": "AWS",
|
||||
"ai21": "AI21",
|
||||
}
|
||||
ordered = ["core", "langchain", "text-splitters", "community", "experimental"]
|
||||
main_ = [dir_ for dir_ in ordered if dir_ in dirs]
|
||||
integrations = sorted(dir_ for dir_ in dirs if dir_ not in main_)
|
||||
doc = """# LangChain Python API Reference
|
||||
|
||||
Welcome to the LangChain Python API reference. This is a reference for all
|
||||
`langchain-x` packages.
|
||||
|
||||
For user guides see [https://python.langchain.com](https://python.langchain.com).
|
||||
|
||||
For the legacy API reference hosted on ReadTheDocs see [https://api.python.langchain.com/](https://api.python.langchain.com/).
|
||||
"""
|
||||
|
||||
if main_:
|
||||
main_headers = [
|
||||
" ".join(custom_names.get(x, x.title()) for x in dir_.split("-"))
|
||||
for dir_ in main_
|
||||
]
|
||||
main_tree = "\n".join(
|
||||
f"{header_name}<{dir_.replace('-', '_')}/index>"
|
||||
for header_name, dir_ in zip(main_headers, main_)
|
||||
)
|
||||
main_grid = "\n".join(
|
||||
f'- header: "**{header_name}**"\n content: "{_package_namespace(dir_).replace("_", "-")}: {_get_package_version(_package_dir(dir_))}"\n link: {dir_.replace("-", "_")}/index.html'
|
||||
for header_name, dir_ in zip(main_headers, main_)
|
||||
)
|
||||
doc += f"""## Base packages
|
||||
|
||||
```{{gallery-grid}}
|
||||
:grid-columns: "1 2 2 3"
|
||||
|
||||
{main_grid}
|
||||
```
|
||||
|
||||
```{{toctree}}
|
||||
:maxdepth: 2
|
||||
:hidden:
|
||||
:caption: Base packages
|
||||
|
||||
{main_tree}
|
||||
```
|
||||
"""
|
||||
if integrations:
|
||||
integration_headers = [
|
||||
" ".join(
|
||||
custom_names.get(x, x.title().replace("ai", "AI").replace("db", "DB"))
|
||||
for x in dir_.split("-")
|
||||
)
|
||||
for dir_ in integrations
|
||||
]
|
||||
integration_tree = "\n".join(
|
||||
f"{header_name}<{dir_.replace('-', '_')}/index>"
|
||||
for header_name, dir_ in zip(integration_headers, integrations)
|
||||
)
|
||||
|
||||
integration_grid = ""
|
||||
integrations_to_show = [
|
||||
"openai",
|
||||
"anthropic",
|
||||
"google-vertexai",
|
||||
"aws",
|
||||
"huggingface",
|
||||
"mistralai",
|
||||
]
|
||||
for header_name, dir_ in sorted(
|
||||
zip(integration_headers, integrations),
|
||||
key=lambda h_d: integrations_to_show.index(h_d[1])
|
||||
if h_d[1] in integrations_to_show
|
||||
else len(integrations_to_show),
|
||||
)[: len(integrations_to_show)]:
|
||||
integration_grid += f'\n- header: "**{header_name}**"\n content: {_package_namespace(dir_).replace("_", "-")} {_get_package_version(_package_dir(dir_))}\n link: {dir_.replace("-", "_")}/index.html'
|
||||
doc += f"""## Integrations
|
||||
|
||||
```{{gallery-grid}}
|
||||
:grid-columns: "1 2 2 3"
|
||||
|
||||
{integration_grid}
|
||||
```
|
||||
|
||||
See the full list of integrations in the Section Navigation.
|
||||
|
||||
```{{toctree}}
|
||||
:maxdepth: 2
|
||||
:hidden:
|
||||
:caption: Integrations
|
||||
|
||||
{integration_tree}
|
||||
```
|
||||
"""
|
||||
with open(HERE / "reference.md", "w") as f:
|
||||
f.write(doc)
|
||||
|
||||
dummy_index = """\
|
||||
# API reference
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 3
|
||||
:hidden:
|
||||
|
||||
Reference<reference>
|
||||
```
|
||||
"""
|
||||
with open(HERE / "index.md", "w") as f:
|
||||
f.write(dummy_index)
|
||||
def _doc_first_line(package_name: str) -> str:
|
||||
"""Return the path to the file containing the documentation."""
|
||||
return f".. {package_name.replace('-', '_')}_api_reference:\n\n"
|
||||
|
||||
|
||||
def main(dirs: Optional[list] = None) -> None:
|
||||
@@ -657,8 +488,6 @@ def main(dirs: Optional[list] = None) -> None:
|
||||
else:
|
||||
print("Building package:", dir_)
|
||||
_build_rst_file(package_name=dir_)
|
||||
|
||||
_build_index(dirs)
|
||||
print("API reference files built.")
|
||||
|
||||
|
||||
|
||||
8
docs/api_reference/index.rst
Normal file
8
docs/api_reference/index.rst
Normal file
@@ -0,0 +1,8 @@
|
||||
=============
|
||||
LangChain API
|
||||
=============
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
api_reference.rst
|
||||
@@ -1,11 +1,17 @@
|
||||
autodoc_pydantic>=1,<2
|
||||
sphinx<=7
|
||||
myst-parser>=3
|
||||
sphinx-autobuild>=2024
|
||||
pydata-sphinx-theme>=0.15
|
||||
toml>=0.10.2
|
||||
myst-nb>=1.1.1
|
||||
pyyaml
|
||||
sphinx-design
|
||||
sphinx-copybutton
|
||||
beautifulsoup4
|
||||
-e libs/experimental
|
||||
-e libs/langchain
|
||||
-e libs/core
|
||||
-e libs/community
|
||||
pydantic<2
|
||||
autodoc_pydantic==1.8.0
|
||||
myst_parser
|
||||
nbsphinx==0.8.9
|
||||
sphinx>=5
|
||||
sphinx-autobuild==2021.3.14
|
||||
sphinx_rtd_theme==1.0.0
|
||||
sphinx-typlog-theme==0.8.0
|
||||
sphinx-panels
|
||||
toml
|
||||
myst_nb
|
||||
sphinx_copybutton
|
||||
pydata-sphinx-theme==0.13.1
|
||||
@@ -1,41 +0,0 @@
|
||||
import sys
|
||||
from glob import glob
|
||||
from pathlib import Path
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
CUR_DIR = Path(__file__).parents[1]
|
||||
|
||||
|
||||
def process_toc_h3_elements(html_content: str) -> str:
|
||||
"""Update Class.method() TOC headers to just method()."""
|
||||
# Create a BeautifulSoup object
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
|
||||
# Find all <li> elements with class "toc-h3"
|
||||
toc_h3_elements = soup.find_all("li", class_="toc-h3")
|
||||
|
||||
# Process each element
|
||||
for element in toc_h3_elements:
|
||||
element = element.a.code.span
|
||||
# Get the text content of the element
|
||||
content = element.get_text()
|
||||
|
||||
# Apply the regex substitution
|
||||
modified_content = content.split(".")[-1]
|
||||
|
||||
# Update the element's content
|
||||
element.string = modified_content
|
||||
|
||||
# Return the modified HTML
|
||||
return str(soup)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
dir = sys.argv[1]
|
||||
for fn in glob(str(f"{dir.rstrip('/')}/**/*.html"), recursive=True):
|
||||
with open(fn, "r") as f:
|
||||
html = f.read()
|
||||
processed_html = process_toc_h3_elements(html)
|
||||
with open(fn, "w") as f:
|
||||
f.write(processed_html)
|
||||
@@ -1,4 +1,4 @@
|
||||
{{ objname }}
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
.. autosummary::
|
||||
{% for item in attributes %}
|
||||
~{{ item }}
|
||||
~{{ name }}.{{ item }}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
@@ -22,11 +22,11 @@
|
||||
|
||||
.. autosummary::
|
||||
{% for item in methods %}
|
||||
~{{ item }}
|
||||
~{{ name }}.{{ item }}
|
||||
{%- endfor %}
|
||||
|
||||
{% for item in methods %}
|
||||
.. automethod:: {{ item }}
|
||||
.. automethod:: {{ name }}.{{ item }}
|
||||
{%- endfor %}
|
||||
|
||||
{% endif %}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
{{ objname }}
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
{{ objname }}
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
<!-- This will display a link to LangChain docs -->
|
||||
<head>
|
||||
<style>
|
||||
.text-link {
|
||||
text-decoration: none; /* Remove underline */
|
||||
color: inherit; /* Inherit color from parent element */
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<a href="https://python.langchain.com/" class='text-link'>Docs</a>
|
||||
</body>
|
||||
@@ -1,4 +1,4 @@
|
||||
{{ objname }}
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
{{ objname }}
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
.. autoclass:: {{ objname }}
|
||||
|
||||
.. NOTE:: {{objname}} implements the standard :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>`. 🏃
|
||||
|
||||
The :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>` has additional methods that are available on runnables, such as :py:meth:`with_types <langchain_core.runnables.base.Runnable.with_types>`, :py:meth:`with_retry <langchain_core.runnables.base.Runnable.with_retry>`, :py:meth:`assign <langchain_core.runnables.base.Runnable.assign>`, :py:meth:`bind <langchain_core.runnables.base.Runnable.bind>`, :py:meth:`get_graph <langchain_core.runnables.base.Runnable.get_graph>`, and more.
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
.. autoclass:: {{ objname }}
|
||||
|
||||
{% block attributes %}
|
||||
{% if attributes %}
|
||||
.. rubric:: {{ _('Attributes') }}
|
||||
|
||||
.. autosummary::
|
||||
{% for item in attributes %}
|
||||
~{{ item }}
|
||||
~{{ name }}.{{ item }}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
@@ -26,11 +26,11 @@
|
||||
|
||||
.. autosummary::
|
||||
{% for item in methods %}
|
||||
~{{ item }}
|
||||
~{{ name }}.{{ item }}
|
||||
{%- endfor %}
|
||||
|
||||
{% for item in methods %}
|
||||
.. automethod:: {{ item }}
|
||||
.. automethod:: {{ name }}.{{ item }}
|
||||
{%- endfor %}
|
||||
|
||||
{% endif %}
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
{{ objname }}
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ underline }}==============
|
||||
|
||||
.. NOTE:: {{objname}} implements the standard :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>`. 🏃
|
||||
|
||||
The :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>` has additional methods that are available on runnables, such as :py:meth:`with_types <langchain_core.runnables.base.Runnable.with_types>`, :py:meth:`with_retry <langchain_core.runnables.base.Runnable.with_retry>`, :py:meth:`assign <langchain_core.runnables.base.Runnable.assign>`, :py:meth:`bind <langchain_core.runnables.base.Runnable.bind>`, :py:meth:`get_graph <langchain_core.runnables.base.Runnable.get_graph>`, and more.
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
.. autopydantic_model:: {{ objname }}
|
||||
@@ -15,10 +19,6 @@
|
||||
:member-order: groupwise
|
||||
:show-inheritance: True
|
||||
:special-members: __call__
|
||||
:exclude-members: construct, copy, dict, from_orm, parse_file, parse_obj, parse_raw, schema, schema_json, update_forward_refs, validate, json, is_lc_serializable, to_json_not_implemented, lc_secrets, lc_attributes, lc_id, get_lc_namespace, astream_log, transform, atransform, get_output_schema, get_prompts, config_schema, map, pick, pipe, with_listeners, with_alisteners, with_config, with_fallbacks, with_types, with_retry, InputType, OutputType, config_specs, output_schema, get_input_schema, get_graph, get_name, input_schema, name, bind, assign, as_tool
|
||||
|
||||
.. NOTE:: {{objname}} implements the standard :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>`. 🏃
|
||||
|
||||
The :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>` has additional methods that are available on runnables, such as :py:meth:`with_types <langchain_core.runnables.base.Runnable.with_types>`, :py:meth:`with_retry <langchain_core.runnables.base.Runnable.with_retry>`, :py:meth:`assign <langchain_core.runnables.base.Runnable.assign>`, :py:meth:`bind <langchain_core.runnables.base.Runnable.bind>`, :py:meth:`get_graph <langchain_core.runnables.base.Runnable.get_graph>`, and more.
|
||||
:exclude-members: construct, copy, dict, from_orm, parse_file, parse_obj, parse_raw, schema, schema_json, update_forward_refs, validate, json, is_lc_serializable, to_json_not_implemented, lc_secrets, lc_attributes, lc_id, get_lc_namespace, astream_log, transform, atransform, get_output_schema, get_prompts, config_schema, map, pick, pipe, with_listeners, with_alisteners, with_config, with_fallbacks, with_types, with_retry, InputType, OutputType, config_specs, output_schema, get_input_schema, get_graph, get_name, input_schema, name, bind, assign
|
||||
|
||||
.. example_links:: {{ objname }}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
{{ objname }}
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
27
docs/api_reference/themes/COPYRIGHT.txt
Normal file
27
docs/api_reference/themes/COPYRIGHT.txt
Normal file
@@ -0,0 +1,27 @@
|
||||
Copyright (c) 2007-2023 The scikit-learn developers.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
@@ -0,0 +1,67 @@
|
||||
<script>
|
||||
$(document).ready(function() {
|
||||
/* Add a [>>>] button on the top-right corner of code samples to hide
|
||||
* the >>> and ... prompts and the output and thus make the code
|
||||
* copyable. */
|
||||
var div = $('.highlight-python .highlight,' +
|
||||
'.highlight-python3 .highlight,' +
|
||||
'.highlight-pycon .highlight,' +
|
||||
'.highlight-default .highlight')
|
||||
var pre = div.find('pre');
|
||||
|
||||
// get the styles from the current theme
|
||||
pre.parent().parent().css('position', 'relative');
|
||||
var hide_text = 'Hide prompts and outputs';
|
||||
var show_text = 'Show prompts and outputs';
|
||||
|
||||
// create and add the button to all the code blocks that contain >>>
|
||||
div.each(function(index) {
|
||||
var jthis = $(this);
|
||||
if (jthis.find('.gp').length > 0) {
|
||||
var button = $('<span class="copybutton">>>></span>');
|
||||
button.attr('title', hide_text);
|
||||
button.data('hidden', 'false');
|
||||
jthis.prepend(button);
|
||||
}
|
||||
// tracebacks (.gt) contain bare text elements that need to be
|
||||
// wrapped in a span to work with .nextUntil() (see later)
|
||||
jthis.find('pre:has(.gt)').contents().filter(function() {
|
||||
return ((this.nodeType == 3) && (this.data.trim().length > 0));
|
||||
}).wrap('<span>');
|
||||
});
|
||||
|
||||
// define the behavior of the button when it's clicked
|
||||
$('.copybutton').click(function(e){
|
||||
e.preventDefault();
|
||||
var button = $(this);
|
||||
if (button.data('hidden') === 'false') {
|
||||
// hide the code output
|
||||
button.parent().find('.go, .gp, .gt').hide();
|
||||
button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'hidden');
|
||||
button.css('text-decoration', 'line-through');
|
||||
button.attr('title', show_text);
|
||||
button.data('hidden', 'true');
|
||||
} else {
|
||||
// show the code output
|
||||
button.parent().find('.go, .gp, .gt').show();
|
||||
button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'visible');
|
||||
button.css('text-decoration', 'none');
|
||||
button.attr('title', hide_text);
|
||||
button.data('hidden', 'false');
|
||||
}
|
||||
});
|
||||
|
||||
/*** Add permalink buttons next to glossary terms ***/
|
||||
$('dl.glossary > dt[id]').append(function() {
|
||||
return ('<a class="headerlink" href="#' +
|
||||
this.getAttribute('id') +
|
||||
'" title="Permalink to this term">¶</a>');
|
||||
});
|
||||
});
|
||||
|
||||
</script>
|
||||
{%- if pagename != 'index' and pagename != 'documentation' %}
|
||||
{% if theme_mathjax_path %}
|
||||
<script id="MathJax-script" async src="{{ theme_mathjax_path }}"></script>
|
||||
{% endif %}
|
||||
{%- endif %}
|
||||
135
docs/api_reference/themes/scikit-learn-modern/layout.html
Normal file
135
docs/api_reference/themes/scikit-learn-modern/layout.html
Normal file
@@ -0,0 +1,135 @@
|
||||
{# TEMPLATE VAR SETTINGS #}
|
||||
{%- set url_root = pathto('', 1) %}
|
||||
{%- if url_root == '#' %}{% set url_root = '' %}{% endif %}
|
||||
{%- if not embedded and docstitle %}
|
||||
{%- set titlesuffix = " — "|safe + docstitle|e %}
|
||||
{%- else %}
|
||||
{%- set titlesuffix = "" %}
|
||||
{%- endif %}
|
||||
{%- set lang_attr = 'en' %}
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="{{ lang_attr }}" > <![endif]-->
|
||||
<!--[if gt IE 8]><!-->
|
||||
<html class="no-js" lang="{{ lang_attr }}"> <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
{{ metatags }}
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
{% block htmltitle %}
|
||||
<title>{{ title|striptags|e }}{{ titlesuffix }}</title>
|
||||
{% endblock %}
|
||||
<link rel="canonical"
|
||||
href="https://api.python.langchain.com/en/latest/{{ pagename }}.html"/>
|
||||
|
||||
{% if favicon_url %}
|
||||
<link rel="shortcut icon" href="{{ favicon_url|e }}"/>
|
||||
{% endif %}
|
||||
|
||||
<link rel="stylesheet"
|
||||
href="{{ pathto('_static/css/vendor/bootstrap.min.css', 1) }}"
|
||||
type="text/css"/>
|
||||
{%- for css in css_files %}
|
||||
{%- if css|attr("rel") %}
|
||||
<link rel="{{ css.rel }}" href="{{ pathto(css.filename, 1) }}"
|
||||
type="text/css"{% if css.title is not none %}
|
||||
title="{{ css.title }}"{% endif %} />
|
||||
{%- else %}
|
||||
<link rel="stylesheet" href="{{ pathto(css, 1) }}" type="text/css"/>
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
<link rel="stylesheet" href="{{ pathto('_static/' + style, 1) }}" type="text/css"/>
|
||||
<script id="documentation_options" data-url_root="{{ pathto('', 1) }}"
|
||||
src="{{ pathto('_static/documentation_options.js', 1) }}"></script>
|
||||
<script src="{{ pathto('_static/jquery.js', 1) }}"></script>
|
||||
{%- block extrahead %} {% endblock %}
|
||||
</head>
|
||||
<body>
|
||||
<div class="banner">
|
||||
<p>This is a legacy site. Please use the latest <a href="https://python.langchain.com/v0.2/api_reference/reference.html">v0.2</a> and <a href="https://python.langchain.com/api_reference/">v0.3</a> API references instead.</p>
|
||||
</div>
|
||||
{% include "nav.html" %}
|
||||
{%- block content %}
|
||||
<div class="d-flex" id="sk-doc-wrapper">
|
||||
<input type="checkbox" name="sk-toggle-checkbox" id="sk-toggle-checkbox">
|
||||
<label id="sk-sidemenu-toggle" class="sk-btn-toggle-toc btn sk-btn-primary"
|
||||
for="sk-toggle-checkbox">Toggle Menu</label>
|
||||
<div id="sk-sidebar-wrapper" class="border-right">
|
||||
<div class="sk-sidebar-toc-wrapper">
|
||||
{%- if meta and meta['parenttoc']|tobool %}
|
||||
<div class="sk-sidebar-toc">
|
||||
{% set nav = get_nav_object(maxdepth=3, collapse=True, numbered=True) %}
|
||||
<ul>
|
||||
{% for main_nav_item in nav %}
|
||||
{% if main_nav_item.active %}
|
||||
<li>
|
||||
<a href="{{ main_nav_item.url }}"
|
||||
class="sk-toc-active">{{ main_nav_item.title }}</a>
|
||||
</li>
|
||||
<ul>
|
||||
{% for nav_item in main_nav_item.children %}
|
||||
<li>
|
||||
<a href="{{ nav_item.url }}"
|
||||
class="{% if nav_item.active %}sk-toc-active{% endif %}">{{ nav_item.title }}</a>
|
||||
{% if nav_item.children %}
|
||||
<ul>
|
||||
{% for inner_child in nav_item.children %}
|
||||
<li class="sk-toctree-l3">
|
||||
<a href="{{ inner_child.url }}">{{ inner_child.title }}</a>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{%- elif meta and meta['globalsidebartoc']|tobool %}
|
||||
<div class="sk-sidebar-toc sk-sidebar-global-toc">
|
||||
{{ toctree(maxdepth=2, titles_only=True) }}
|
||||
</div>
|
||||
{%- else %}
|
||||
<div class="sk-sidebar-toc">
|
||||
{{ toc }}
|
||||
</div>
|
||||
{%- endif %}
|
||||
</div>
|
||||
</div>
|
||||
<div id="sk-page-content-wrapper">
|
||||
<div class="sk-page-content container-fluid body px-md-3" role="main">
|
||||
{% block body %}{% endblock %}
|
||||
</div>
|
||||
<div class="container">
|
||||
<footer class="sk-content-footer">
|
||||
{%- if pagename != 'index' %}
|
||||
{%- if show_copyright %}
|
||||
{%- if hasdoc('copyright') %}
|
||||
{% trans path=pathto('copyright'), copyright=copyright|e %}
|
||||
© {{ copyright }}.{% endtrans %}
|
||||
{%- else %}
|
||||
{% trans copyright=copyright|e %}© {{ copyright }}
|
||||
.{% endtrans %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if last_updated %}
|
||||
{% trans last_updated=last_updated|e %}Last updated
|
||||
on {{ last_updated }}.{% endtrans %}
|
||||
{%- endif %}
|
||||
{%- if show_source and has_source and sourcename %}
|
||||
<a href="{{ pathto('_sources/' + sourcename, true)|e }}"
|
||||
rel="nofollow">{{ _('Show this page source') }}</a>
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{%- endblock %}
|
||||
<script src="{{ pathto('_static/js/vendor/bootstrap.min.js', 1) }}"></script>
|
||||
{% include "javascript.html" %}
|
||||
</body>
|
||||
</html>
|
||||
78
docs/api_reference/themes/scikit-learn-modern/nav.html
Normal file
78
docs/api_reference/themes/scikit-learn-modern/nav.html
Normal file
@@ -0,0 +1,78 @@
|
||||
{%- if pagename != 'index' and pagename != 'documentation' %}
|
||||
{%- set nav_bar_class = "sk-docs-navbar" %}
|
||||
{%- set top_container_cls = "sk-docs-container" %}
|
||||
{%- else %}
|
||||
{%- set nav_bar_class = "sk-landing-navbar" %}
|
||||
{%- set top_container_cls = "sk-landing-container" %}
|
||||
{%- endif %}
|
||||
|
||||
<nav id="navbar" class="{{ nav_bar_class }} navbar navbar-expand-md navbar-light bg-light py-0">
|
||||
<div class="container-fluid {{ top_container_cls }} px-0">
|
||||
{%- if logo_url %}
|
||||
<a class="navbar-brand py-0" href="{{ pathto('index') }}">
|
||||
<img
|
||||
class="sk-brand-img"
|
||||
src="{{ logo_url|e }}"
|
||||
alt="logo"/>
|
||||
</a>
|
||||
{%- endif %}
|
||||
<button
|
||||
id="sk-navbar-toggler"
|
||||
class="navbar-toggler"
|
||||
type="button"
|
||||
data-toggle="collapse"
|
||||
data-target="#navbarSupportedContent"
|
||||
aria-controls="navbarSupportedContent"
|
||||
aria-expanded="false"
|
||||
aria-label="Toggle navigation"
|
||||
>
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
|
||||
<div class="sk-navbar-collapse collapse navbar-collapse" id="navbarSupportedContent">
|
||||
<ul class="navbar-nav mr-auto">
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('langchain_api_reference') }}">LangChain</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('core_api_reference') }}">Core</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('community_api_reference') }}">Community</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('experimental_api_reference') }}">Experimental</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('text_splitters_api_reference') }}">Text splitters</a>
|
||||
</li>
|
||||
{%- for title, pathname in partners %}
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link nav-more-item-mobile-items" href="{{ pathto(pathname) }}">{{ title }}</a>
|
||||
</li>
|
||||
{%- endfor %}
|
||||
<li class="nav-item dropdown nav-more-item-dropdown">
|
||||
<a class="sk-nav-link nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Partner libs</a>
|
||||
<div class="dropdown-menu" aria-labelledby="navbarDropdown">
|
||||
{%- for title, pathname in partners %}
|
||||
<a class="sk-nav-dropdown-item dropdown-item" href="{{ pathto(pathname) }}">{{ title }}</a>
|
||||
{%- endfor %}
|
||||
</div>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" target="_blank" rel="noopener noreferrer" href="https://python.langchain.com/">Docs</a>
|
||||
</li>
|
||||
</ul>
|
||||
{%- if pagename != "search"%}
|
||||
<div id="searchbox" role="search">
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="{{ pathto('search') }}" method="get">
|
||||
<input class="sk-search-text-input" type="text" name="q" aria-labelledby="searchlabel" />
|
||||
<input class="sk-search-text-btn" type="submit" value="{{ _('Go') }}" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
{%- endif %}
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
16
docs/api_reference/themes/scikit-learn-modern/search.html
Normal file
16
docs/api_reference/themes/scikit-learn-modern/search.html
Normal file
@@ -0,0 +1,16 @@
|
||||
{%- extends "basic/search.html" %}
|
||||
{% block extrahead %}
|
||||
<script type="text/javascript" src="{{ pathto('_static/underscore.js', 1) }}"></script>
|
||||
<script type="text/javascript" src="{{ pathto('searchindex.js', 1) }}" defer></script>
|
||||
<script type="text/javascript" src="{{ pathto('_static/doctools.js', 1) }}"></script>
|
||||
<script type="text/javascript" src="{{ pathto('_static/language_data.js', 1) }}"></script>
|
||||
<script type="text/javascript" src="{{ pathto('_static/searchtools.js', 1) }}"></script>
|
||||
<script type="text/javascript" src="{{ pathto('_static/sphinx_highlight.js', 1) }}"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() {
|
||||
if (!Search.out) {
|
||||
Search.init();
|
||||
}
|
||||
});
|
||||
</script>
|
||||
{% endblock %}
|
||||
1434
docs/api_reference/themes/scikit-learn-modern/static/css/theme.css
Normal file
1434
docs/api_reference/themes/scikit-learn-modern/static/css/theme.css
Normal file
File diff suppressed because it is too large
Load Diff
6
docs/api_reference/themes/scikit-learn-modern/static/css/vendor/bootstrap.min.css
vendored
Normal file
6
docs/api_reference/themes/scikit-learn-modern/static/css/vendor/bootstrap.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
6
docs/api_reference/themes/scikit-learn-modern/static/js/vendor/bootstrap.min.js
vendored
Normal file
6
docs/api_reference/themes/scikit-learn-modern/static/js/vendor/bootstrap.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
2
docs/api_reference/themes/scikit-learn-modern/static/js/vendor/jquery-3.6.3.slim.min.js
vendored
Normal file
2
docs/api_reference/themes/scikit-learn-modern/static/js/vendor/jquery-3.6.3.slim.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
8
docs/api_reference/themes/scikit-learn-modern/theme.conf
Normal file
8
docs/api_reference/themes/scikit-learn-modern/theme.conf
Normal file
@@ -0,0 +1,8 @@
|
||||
[theme]
|
||||
inherit = basic
|
||||
pygments_style = default
|
||||
stylesheet = css/theme.css
|
||||
|
||||
[options]
|
||||
link_to_live_contributing_page = false
|
||||
mathjax_path =
|
||||
@@ -4,11 +4,8 @@ LangChain implements the latest research in the field of Natural Language Proces
|
||||
This page contains `arXiv` papers referenced in the LangChain Documentation, API Reference,
|
||||
Templates, and Cookbooks.
|
||||
|
||||
From the opposite direction, scientists use `LangChain` in research and reference it in the research papers.
|
||||
Here you find papers that reference:
|
||||
- [LangChain](https://arxiv.org/search/?query=langchain&searchtype=all&source=header)
|
||||
- [LangGraph](https://arxiv.org/search/?query=langgraph&searchtype=all&source=header)
|
||||
- [LangSmith](https://arxiv.org/search/?query=langsmith&searchtype=all&source=header)
|
||||
From the opposite direction, scientists use LangChain in research and reference LangChain in the research papers.
|
||||
Here you find [such papers](https://arxiv.org/search/?query=langchain&searchtype=all&source=header).
|
||||
|
||||
## Summary
|
||||
|
||||
@@ -26,30 +23,32 @@ Here you find papers that reference:
|
||||
| `2305.14283v3` [Query Rewriting for Retrieval-Augmented Large Language Models](http://arxiv.org/abs/2305.14283v3) | Xinbei Ma, Yeyun Gong, Pengcheng He, et al. | 2023-05-23 | `Template:` [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read), `Cookbook:` [rewrite](https://github.com/langchain-ai/langchain/blob/master/cookbook/rewrite.ipynb)
|
||||
| `2305.08291v1` [Large Language Model Guided Tree-of-Thought](http://arxiv.org/abs/2305.08291v1) | Jieyi Long | 2023-05-15 | `API:` [langchain_experimental.tot](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.tot), `Cookbook:` [tree_of_thought](https://github.com/langchain-ai/langchain/blob/master/cookbook/tree_of_thought.ipynb)
|
||||
| `2305.04091v3` [Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models](http://arxiv.org/abs/2305.04091v3) | Lei Wang, Wanyu Xu, Yihuai Lan, et al. | 2023-05-06 | `Cookbook:` [plan_and_execute_agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
|
||||
| `2305.02156v1` [Zero-Shot Listwise Document Reranking with a Large Language Model](http://arxiv.org/abs/2305.02156v1) | Xueguang Ma, Xinyu Zhang, Ronak Pradeep, et al. | 2023-05-03 | `API:` [langchain...LLMListwiseRerank](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
|
||||
| `2304.08485v2` [Visual Instruction Tuning](http://arxiv.org/abs/2304.08485v2) | Haotian Liu, Chunyuan Li, Qingyang Wu, et al. | 2023-04-17 | `Cookbook:` [Semi_structured_and_multi_modal_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb), [Semi_structured_multi_modal_RAG_LLaMA2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb)
|
||||
| `2304.03442v2` [Generative Agents: Interactive Simulacra of Human Behavior](http://arxiv.org/abs/2304.03442v2) | Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai, et al. | 2023-04-07 | `Cookbook:` [multiagent_bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb), [generative_agents_interactive_simulacra_of_human_behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb)
|
||||
| `2303.17760v2` [CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society](http://arxiv.org/abs/2303.17760v2) | Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani, et al. | 2023-03-31 | `Cookbook:` [camel_role_playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
|
||||
| `2303.17580v4` [HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face](http://arxiv.org/abs/2303.17580v4) | Yongliang Shen, Kaitao Song, Xu Tan, et al. | 2023-03-30 | `API:` [langchain_experimental.autonomous_agents](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.autonomous_agents), `Cookbook:` [hugginggpt](https://github.com/langchain-ai/langchain/blob/master/cookbook/hugginggpt.ipynb)
|
||||
| `2301.10226v4` [A Watermark for Large Language Models](http://arxiv.org/abs/2301.10226v4) | John Kirchenbauer, Jonas Geiping, Yuxin Wen, et al. | 2023-01-24 | `API:` [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
| `2303.08774v6` [GPT-4 Technical Report](http://arxiv.org/abs/2303.08774v6) | OpenAI, Josh Achiam, Steven Adler, et al. | 2023-03-15 | `Docs:` [docs/integrations/vectorstores/mongodb_atlas](https://python.langchain.com/docs/integrations/vectorstores/mongodb_atlas)
|
||||
| `2301.10226v4` [A Watermark for Large Language Models](http://arxiv.org/abs/2301.10226v4) | John Kirchenbauer, Jonas Geiping, Yuxin Wen, et al. | 2023-01-24 | `API:` [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
| `2212.10496v1` [Precise Zero-Shot Dense Retrieval without Relevance Labels](http://arxiv.org/abs/2212.10496v1) | Luyu Gao, Xueguang Ma, Jimmy Lin, et al. | 2022-12-20 | `API:` [langchain...HypotheticalDocumentEmbedder](https://api.python.langchain.com/en/latest/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder), `Template:` [hyde](https://python.langchain.com/docs/templates/hyde), `Cookbook:` [hypothetical_document_embeddings](https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb)
|
||||
| `2212.07425v3` [Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments](http://arxiv.org/abs/2212.07425v3) | Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande, et al. | 2022-12-12 | `API:` [langchain_experimental.fallacy_removal](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)
|
||||
| `2211.13892v2` [Complementary Explanations for Effective In-Context Learning](http://arxiv.org/abs/2211.13892v2) | Xi Ye, Srinivasan Iyer, Asli Celikyilmaz, et al. | 2022-11-25 | `API:` [langchain_core...MaxMarginalRelevanceExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
|
||||
| `2211.10435v2` [PAL: Program-aided Language Models](http://arxiv.org/abs/2211.10435v2) | Luyu Gao, Aman Madaan, Shuyan Zhou, et al. | 2022-11-18 | `API:` [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), `Cookbook:` [program_aided_language_model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)
|
||||
| `2210.03629v3` [ReAct: Synergizing Reasoning and Acting in Language Models](http://arxiv.org/abs/2210.03629v3) | Shunyu Yao, Jeffrey Zhao, Dian Yu, et al. | 2022-10-06 | `Docs:` [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping), `API:` [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain), [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent)
|
||||
| `2211.10435v2` [PAL: Program-aided Language Models](http://arxiv.org/abs/2211.10435v2) | Luyu Gao, Aman Madaan, Shuyan Zhou, et al. | 2022-11-18 | `API:` [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), `Cookbook:` [program_aided_language_model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)
|
||||
| `2210.03629v3` [ReAct: Synergizing Reasoning and Acting in Language Models](http://arxiv.org/abs/2210.03629v3) | Shunyu Yao, Jeffrey Zhao, Dian Yu, et al. | 2022-10-06 | `Docs:` [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/integrations/chat/huggingface](https://python.langchain.com/docs/integrations/chat/huggingface), [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping), `API:` [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent), [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain)
|
||||
| `2209.10785v2` [Deep Lake: a Lakehouse for Deep Learning](http://arxiv.org/abs/2209.10785v2) | Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan, et al. | 2022-09-22 | `Docs:` [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/docs/integrations/providers/activeloop_deeplake)
|
||||
| `2205.13147v4` [Matryoshka Representation Learning](http://arxiv.org/abs/2205.13147v4) | Aditya Kusupati, Gantavya Bhatt, Aniket Rege, et al. | 2022-05-26 | `Docs:` [docs/integrations/providers/snowflake](https://python.langchain.com/docs/integrations/providers/snowflake)
|
||||
| `2205.12654v1` [Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages](http://arxiv.org/abs/2205.12654v1) | Kevin Heffernan, Onur Çelebi, Holger Schwenk | 2022-05-25 | `API:` [langchain_community...LaserEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
|
||||
| `2204.00498v1` [Evaluating the Text-to-SQL Capabilities of Large Language Models](http://arxiv.org/abs/2204.00498v1) | Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau | 2022-03-15 | `API:` [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
|
||||
| `2202.00666v5` [Locally Typical Sampling](http://arxiv.org/abs/2202.00666v5) | Clara Meister, Tiago Pimentel, Gian Wiher, et al. | 2022-02-01 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
| `2204.00498v1` [Evaluating the Text-to-SQL Capabilities of Large Language Models](http://arxiv.org/abs/2204.00498v1) | Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau | 2022-03-15 | `API:` [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL), [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase)
|
||||
| `2202.00666v5` [Locally Typical Sampling](http://arxiv.org/abs/2202.00666v5) | Clara Meister, Tiago Pimentel, Gian Wiher, et al. | 2022-02-01 | `API:` [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
| `2103.00020v1` [Learning Transferable Visual Models From Natural Language Supervision](http://arxiv.org/abs/2103.00020v1) | Alec Radford, Jong Wook Kim, Chris Hallacy, et al. | 2021-02-26 | `API:` [langchain_experimental.open_clip](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.open_clip)
|
||||
| `1909.05858v2` [CTRL: A Conditional Transformer Language Model for Controllable Generation](http://arxiv.org/abs/1909.05858v2) | Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney, et al. | 2019-09-11 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
| `1909.05858v2` [CTRL: A Conditional Transformer Language Model for Controllable Generation](http://arxiv.org/abs/1909.05858v2) | Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney, et al. | 2019-09-11 | `API:` [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
| `1908.10084v1` [Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks](http://arxiv.org/abs/1908.10084v1) | Nils Reimers, Iryna Gurevych | 2019-08-27 | `Docs:` [docs/integrations/text_embedding/sentence_transformers](https://python.langchain.com/docs/integrations/text_embedding/sentence_transformers)
|
||||
|
||||
## Self-Discover: Large Language Models Self-Compose Reasoning Structures
|
||||
|
||||
- **arXiv id:** [2402.03620v1](http://arxiv.org/abs/2402.03620v1) **Published Date:** 2024-02-06
|
||||
- **arXiv id:** 2402.03620v1
|
||||
- **Title:** Self-Discover: Large Language Models Self-Compose Reasoning Structures
|
||||
- **Authors:** Pei Zhou, Jay Pujara, Xiang Ren, et al.
|
||||
- **Published Date:** 2024-02-06
|
||||
- **URL:** http://arxiv.org/abs/2402.03620v1
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [self-discover](https://github.com/langchain-ai/langchain/blob/master/cookbook/self-discover.ipynb)
|
||||
@@ -71,9 +70,11 @@ commonalities with human reasoning patterns.
|
||||
|
||||
## RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval
|
||||
|
||||
- **arXiv id:** [2401.18059v1](http://arxiv.org/abs/2401.18059v1) **Published Date:** 2024-01-31
|
||||
- **arXiv id:** 2401.18059v1
|
||||
- **Title:** RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval
|
||||
- **Authors:** Parth Sarthi, Salman Abdullah, Aditi Tuli, et al.
|
||||
- **Published Date:** 2024-01-31
|
||||
- **URL:** http://arxiv.org/abs/2401.18059v1
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [RAPTOR](https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb)
|
||||
@@ -95,9 +96,11 @@ benchmark by 20% in absolute accuracy.
|
||||
|
||||
## Corrective Retrieval Augmented Generation
|
||||
|
||||
- **arXiv id:** [2401.15884v2](http://arxiv.org/abs/2401.15884v2) **Published Date:** 2024-01-29
|
||||
- **arXiv id:** 2401.15884v2
|
||||
- **Title:** Corrective Retrieval Augmented Generation
|
||||
- **Authors:** Shi-Qi Yan, Jia-Chen Gu, Yun Zhu, et al.
|
||||
- **Published Date:** 2024-01-29
|
||||
- **URL:** http://arxiv.org/abs/2401.15884v2
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [langgraph_crag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_crag.ipynb)
|
||||
@@ -123,9 +126,11 @@ performance of RAG-based approaches.
|
||||
|
||||
## Mixtral of Experts
|
||||
|
||||
- **arXiv id:** [2401.04088v1](http://arxiv.org/abs/2401.04088v1) **Published Date:** 2024-01-08
|
||||
- **arXiv id:** 2401.04088v1
|
||||
- **Title:** Mixtral of Experts
|
||||
- **Authors:** Albert Q. Jiang, Alexandre Sablayrolles, Antoine Roux, et al.
|
||||
- **Published Date:** 2024-01-08
|
||||
- **URL:** http://arxiv.org/abs/2401.04088v1
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [together_ai](https://github.com/langchain-ai/langchain/blob/master/cookbook/together_ai.ipynb)
|
||||
@@ -147,9 +152,11 @@ the base and instruct models are released under the Apache 2.0 license.
|
||||
|
||||
## Dense X Retrieval: What Retrieval Granularity Should We Use?
|
||||
|
||||
- **arXiv id:** [2312.06648v2](http://arxiv.org/abs/2312.06648v2) **Published Date:** 2023-12-11
|
||||
- **arXiv id:** 2312.06648v2
|
||||
- **Title:** Dense X Retrieval: What Retrieval Granularity Should We Use?
|
||||
- **Authors:** Tong Chen, Hongwei Wang, Sihao Chen, et al.
|
||||
- **Published Date:** 2023-12-11
|
||||
- **URL:** http://arxiv.org/abs/2312.06648v2
|
||||
- **LangChain:**
|
||||
|
||||
- **Template:** [propositional-retrieval](https://python.langchain.com/docs/templates/propositional-retrieval)
|
||||
@@ -174,9 +181,11 @@ information.
|
||||
|
||||
## Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models
|
||||
|
||||
- **arXiv id:** [2311.09210v1](http://arxiv.org/abs/2311.09210v1) **Published Date:** 2023-11-15
|
||||
- **arXiv id:** 2311.09210v1
|
||||
- **Title:** Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models
|
||||
- **Authors:** Wenhao Yu, Hongming Zhang, Xiaoman Pan, et al.
|
||||
- **Published Date:** 2023-11-15
|
||||
- **URL:** http://arxiv.org/abs/2311.09210v1
|
||||
- **LangChain:**
|
||||
|
||||
- **Template:** [chain-of-note-wiki](https://python.langchain.com/docs/templates/chain-of-note-wiki)
|
||||
@@ -206,9 +215,11 @@ outside the pre-training knowledge scope.
|
||||
|
||||
## Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection
|
||||
|
||||
- **arXiv id:** [2310.11511v1](http://arxiv.org/abs/2310.11511v1) **Published Date:** 2023-10-17
|
||||
- **arXiv id:** 2310.11511v1
|
||||
- **Title:** Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection
|
||||
- **Authors:** Akari Asai, Zeqiu Wu, Yizhong Wang, et al.
|
||||
- **Published Date:** 2023-10-17
|
||||
- **URL:** http://arxiv.org/abs/2310.11511v1
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [langgraph_self_rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_self_rag.ipynb)
|
||||
@@ -237,9 +248,11 @@ to these models.
|
||||
|
||||
## Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models
|
||||
|
||||
- **arXiv id:** [2310.06117v2](http://arxiv.org/abs/2310.06117v2) **Published Date:** 2023-10-09
|
||||
- **arXiv id:** 2310.06117v2
|
||||
- **Title:** Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models
|
||||
- **Authors:** Huaixiu Steven Zheng, Swaroop Mishra, Xinyun Chen, et al.
|
||||
- **Published Date:** 2023-10-09
|
||||
- **URL:** http://arxiv.org/abs/2310.06117v2
|
||||
- **LangChain:**
|
||||
|
||||
- **Template:** [stepback-qa-prompting](https://python.langchain.com/docs/templates/stepback-qa-prompting)
|
||||
@@ -258,9 +271,11 @@ and 11% respectively, TimeQA by 27%, and MuSiQue by 7%.
|
||||
|
||||
## Llama 2: Open Foundation and Fine-Tuned Chat Models
|
||||
|
||||
- **arXiv id:** [2307.09288v2](http://arxiv.org/abs/2307.09288v2) **Published Date:** 2023-07-18
|
||||
- **arXiv id:** 2307.09288v2
|
||||
- **Title:** Llama 2: Open Foundation and Fine-Tuned Chat Models
|
||||
- **Authors:** Hugo Touvron, Louis Martin, Kevin Stone, et al.
|
||||
- **Published Date:** 2023-07-18
|
||||
- **URL:** http://arxiv.org/abs/2307.09288v2
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [Semi_Structured_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb)
|
||||
@@ -277,9 +292,11 @@ contribute to the responsible development of LLMs.
|
||||
|
||||
## Query Rewriting for Retrieval-Augmented Large Language Models
|
||||
|
||||
- **arXiv id:** [2305.14283v3](http://arxiv.org/abs/2305.14283v3) **Published Date:** 2023-05-23
|
||||
- **arXiv id:** 2305.14283v3
|
||||
- **Title:** Query Rewriting for Retrieval-Augmented Large Language Models
|
||||
- **Authors:** Xinbei Ma, Yeyun Gong, Pengcheng He, et al.
|
||||
- **Published Date:** 2023-05-23
|
||||
- **URL:** http://arxiv.org/abs/2305.14283v3
|
||||
- **LangChain:**
|
||||
|
||||
- **Template:** [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read)
|
||||
@@ -305,9 +322,11 @@ for retrieval-augmented LLM.
|
||||
|
||||
## Large Language Model Guided Tree-of-Thought
|
||||
|
||||
- **arXiv id:** [2305.08291v1](http://arxiv.org/abs/2305.08291v1) **Published Date:** 2023-05-15
|
||||
- **arXiv id:** 2305.08291v1
|
||||
- **Title:** Large Language Model Guided Tree-of-Thought
|
||||
- **Authors:** Jieyi Long
|
||||
- **Published Date:** 2023-05-15
|
||||
- **URL:** http://arxiv.org/abs/2305.08291v1
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_experimental.tot](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.tot)
|
||||
@@ -333,9 +352,11 @@ implementation of the ToT-based Sudoku solver is available on GitHub:
|
||||
|
||||
## Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models
|
||||
|
||||
- **arXiv id:** [2305.04091v3](http://arxiv.org/abs/2305.04091v3) **Published Date:** 2023-05-06
|
||||
- **arXiv id:** 2305.04091v3
|
||||
- **Title:** Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models
|
||||
- **Authors:** Lei Wang, Wanyu Xu, Yihuai Lan, et al.
|
||||
- **Published Date:** 2023-05-06
|
||||
- **URL:** http://arxiv.org/abs/2305.04091v3
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [plan_and_execute_agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
|
||||
@@ -362,35 +383,13 @@ Prompting, and has comparable performance with 8-shot CoT prompting on the math
|
||||
reasoning problem. The code can be found at
|
||||
https://github.com/AGI-Edgerunners/Plan-and-Solve-Prompting.
|
||||
|
||||
## Zero-Shot Listwise Document Reranking with a Large Language Model
|
||||
|
||||
- **arXiv id:** [2305.02156v1](http://arxiv.org/abs/2305.02156v1) **Published Date:** 2023-05-03
|
||||
- **Title:** Zero-Shot Listwise Document Reranking with a Large Language Model
|
||||
- **Authors:** Xueguang Ma, Xinyu Zhang, Ronak Pradeep, et al.
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain...LLMListwiseRerank](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
|
||||
|
||||
**Abstract:** Supervised ranking methods based on bi-encoder or cross-encoder architectures
|
||||
have shown success in multi-stage text ranking tasks, but they require large
|
||||
amounts of relevance judgments as training data. In this work, we propose
|
||||
Listwise Reranker with a Large Language Model (LRL), which achieves strong
|
||||
reranking effectiveness without using any task-specific training data.
|
||||
Different from the existing pointwise ranking methods, where documents are
|
||||
scored independently and ranked according to the scores, LRL directly generates
|
||||
a reordered list of document identifiers given the candidate documents.
|
||||
Experiments on three TREC web search datasets demonstrate that LRL not only
|
||||
outperforms zero-shot pointwise methods when reranking first-stage retrieval
|
||||
results, but can also act as a final-stage reranker to improve the top-ranked
|
||||
results of a pointwise method for improved efficiency. Additionally, we apply
|
||||
our approach to subsets of MIRACL, a recent multilingual retrieval dataset,
|
||||
with results showing its potential to generalize across different languages.
|
||||
|
||||
## Visual Instruction Tuning
|
||||
|
||||
- **arXiv id:** [2304.08485v2](http://arxiv.org/abs/2304.08485v2) **Published Date:** 2023-04-17
|
||||
- **arXiv id:** 2304.08485v2
|
||||
- **Title:** Visual Instruction Tuning
|
||||
- **Authors:** Haotian Liu, Chunyuan Li, Qingyang Wu, et al.
|
||||
- **Published Date:** 2023-04-17
|
||||
- **URL:** http://arxiv.org/abs/2304.08485v2
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [Semi_structured_and_multi_modal_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb), [Semi_structured_multi_modal_RAG_LLaMA2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb)
|
||||
@@ -413,9 +412,11 @@ publicly available.
|
||||
|
||||
## Generative Agents: Interactive Simulacra of Human Behavior
|
||||
|
||||
- **arXiv id:** [2304.03442v2](http://arxiv.org/abs/2304.03442v2) **Published Date:** 2023-04-07
|
||||
- **arXiv id:** 2304.03442v2
|
||||
- **Title:** Generative Agents: Interactive Simulacra of Human Behavior
|
||||
- **Authors:** Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai, et al.
|
||||
- **Published Date:** 2023-04-07
|
||||
- **URL:** http://arxiv.org/abs/2304.03442v2
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [multiagent_bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb), [generative_agents_interactive_simulacra_of_human_behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb)
|
||||
@@ -447,9 +448,11 @@ interaction patterns for enabling believable simulations of human behavior.
|
||||
|
||||
## CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society
|
||||
|
||||
- **arXiv id:** [2303.17760v2](http://arxiv.org/abs/2303.17760v2) **Published Date:** 2023-03-31
|
||||
- **arXiv id:** 2303.17760v2
|
||||
- **Title:** CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society
|
||||
- **Authors:** Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani, et al.
|
||||
- **Published Date:** 2023-03-31
|
||||
- **URL:** http://arxiv.org/abs/2303.17760v2
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [camel_role_playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
|
||||
@@ -475,9 +478,11 @@ agents and beyond: https://github.com/camel-ai/camel.
|
||||
|
||||
## HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face
|
||||
|
||||
- **arXiv id:** [2303.17580v4](http://arxiv.org/abs/2303.17580v4) **Published Date:** 2023-03-30
|
||||
- **arXiv id:** 2303.17580v4
|
||||
- **Title:** HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face
|
||||
- **Authors:** Yongliang Shen, Kaitao Song, Xu Tan, et al.
|
||||
- **Published Date:** 2023-03-30
|
||||
- **URL:** http://arxiv.org/abs/2303.17580v4
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_experimental.autonomous_agents](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.autonomous_agents)
|
||||
@@ -503,14 +508,40 @@ modalities and domains and achieve impressive results in language, vision,
|
||||
speech, and other challenging tasks, which paves a new way towards the
|
||||
realization of artificial general intelligence.
|
||||
|
||||
## A Watermark for Large Language Models
|
||||
## GPT-4 Technical Report
|
||||
|
||||
- **arXiv id:** [2301.10226v4](http://arxiv.org/abs/2301.10226v4) **Published Date:** 2023-01-24
|
||||
- **Title:** A Watermark for Large Language Models
|
||||
- **Authors:** John Kirchenbauer, Jonas Geiping, Yuxin Wen, et al.
|
||||
- **arXiv id:** 2303.08774v6
|
||||
- **Title:** GPT-4 Technical Report
|
||||
- **Authors:** OpenAI, Josh Achiam, Steven Adler, et al.
|
||||
- **Published Date:** 2023-03-15
|
||||
- **URL:** http://arxiv.org/abs/2303.08774v6
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
- **Documentation:** [docs/integrations/vectorstores/mongodb_atlas](https://python.langchain.com/docs/integrations/vectorstores/mongodb_atlas)
|
||||
|
||||
**Abstract:** We report the development of GPT-4, a large-scale, multimodal model which can
|
||||
accept image and text inputs and produce text outputs. While less capable than
|
||||
humans in many real-world scenarios, GPT-4 exhibits human-level performance on
|
||||
various professional and academic benchmarks, including passing a simulated bar
|
||||
exam with a score around the top 10% of test takers. GPT-4 is a
|
||||
Transformer-based model pre-trained to predict the next token in a document.
|
||||
The post-training alignment process results in improved performance on measures
|
||||
of factuality and adherence to desired behavior. A core component of this
|
||||
project was developing infrastructure and optimization methods that behave
|
||||
predictably across a wide range of scales. This allowed us to accurately
|
||||
predict some aspects of GPT-4's performance based on models trained with no
|
||||
more than 1/1,000th the compute of GPT-4.
|
||||
|
||||
## A Watermark for Large Language Models
|
||||
|
||||
- **arXiv id:** 2301.10226v4
|
||||
- **Title:** A Watermark for Large Language Models
|
||||
- **Authors:** John Kirchenbauer, Jonas Geiping, Yuxin Wen, et al.
|
||||
- **Published Date:** 2023-01-24
|
||||
- **URL:** http://arxiv.org/abs/2301.10226v4
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
|
||||
**Abstract:** Potential harms of large language models can be mitigated by watermarking
|
||||
model output, i.e., embedding signals into generated text that are invisible to
|
||||
@@ -528,9 +559,11 @@ family, and discuss robustness and security.
|
||||
|
||||
## Precise Zero-Shot Dense Retrieval without Relevance Labels
|
||||
|
||||
- **arXiv id:** [2212.10496v1](http://arxiv.org/abs/2212.10496v1) **Published Date:** 2022-12-20
|
||||
- **arXiv id:** 2212.10496v1
|
||||
- **Title:** Precise Zero-Shot Dense Retrieval without Relevance Labels
|
||||
- **Authors:** Luyu Gao, Xueguang Ma, Jimmy Lin, et al.
|
||||
- **Published Date:** 2022-12-20
|
||||
- **URL:** http://arxiv.org/abs/2212.10496v1
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain...HypotheticalDocumentEmbedder](https://api.python.langchain.com/en/latest/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder)
|
||||
@@ -557,9 +590,11 @@ search, QA, fact verification) and languages~(e.g. sw, ko, ja).
|
||||
|
||||
## Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments
|
||||
|
||||
- **arXiv id:** [2212.07425v3](http://arxiv.org/abs/2212.07425v3) **Published Date:** 2022-12-12
|
||||
- **arXiv id:** 2212.07425v3
|
||||
- **Title:** Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments
|
||||
- **Authors:** Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande, et al.
|
||||
- **Published Date:** 2022-12-12
|
||||
- **URL:** http://arxiv.org/abs/2212.07425v3
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_experimental.fallacy_removal](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)
|
||||
@@ -588,9 +623,11 @@ further work on logical fallacy identification.
|
||||
|
||||
## Complementary Explanations for Effective In-Context Learning
|
||||
|
||||
- **arXiv id:** [2211.13892v2](http://arxiv.org/abs/2211.13892v2) **Published Date:** 2022-11-25
|
||||
- **arXiv id:** 2211.13892v2
|
||||
- **Title:** Complementary Explanations for Effective In-Context Learning
|
||||
- **Authors:** Xi Ye, Srinivasan Iyer, Asli Celikyilmaz, et al.
|
||||
- **Published Date:** 2022-11-25
|
||||
- **URL:** http://arxiv.org/abs/2211.13892v2
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_core...MaxMarginalRelevanceExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
|
||||
@@ -614,12 +651,14 @@ performance across three real-world tasks on multiple LLMs.
|
||||
|
||||
## PAL: Program-aided Language Models
|
||||
|
||||
- **arXiv id:** [2211.10435v2](http://arxiv.org/abs/2211.10435v2) **Published Date:** 2022-11-18
|
||||
- **arXiv id:** 2211.10435v2
|
||||
- **Title:** PAL: Program-aided Language Models
|
||||
- **Authors:** Luyu Gao, Aman Madaan, Shuyan Zhou, et al.
|
||||
- **Published Date:** 2022-11-18
|
||||
- **URL:** http://arxiv.org/abs/2211.10435v2
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain)
|
||||
- **API Reference:** [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain)
|
||||
- **Cookbook:** [program_aided_language_model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)
|
||||
|
||||
**Abstract:** Large language models (LLMs) have recently demonstrated an impressive ability
|
||||
@@ -647,13 +686,15 @@ publicly available at http://reasonwithpal.com/ .
|
||||
|
||||
## ReAct: Synergizing Reasoning and Acting in Language Models
|
||||
|
||||
- **arXiv id:** [2210.03629v3](http://arxiv.org/abs/2210.03629v3) **Published Date:** 2022-10-06
|
||||
- **arXiv id:** 2210.03629v3
|
||||
- **Title:** ReAct: Synergizing Reasoning and Acting in Language Models
|
||||
- **Authors:** Shunyu Yao, Jeffrey Zhao, Dian Yu, et al.
|
||||
- **Published Date:** 2022-10-06
|
||||
- **URL:** http://arxiv.org/abs/2210.03629v3
|
||||
- **LangChain:**
|
||||
|
||||
- **Documentation:** [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping)
|
||||
- **API Reference:** [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain), [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent)
|
||||
- **Documentation:** [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/integrations/chat/huggingface](https://python.langchain.com/docs/integrations/chat/huggingface), [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping)
|
||||
- **API Reference:** [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent), [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain)
|
||||
|
||||
**Abstract:** While large language models (LLMs) have demonstrated impressive capabilities
|
||||
across tasks in language understanding and interactive decision making, their
|
||||
@@ -680,9 +721,11 @@ Project site with code: https://react-lm.github.io
|
||||
|
||||
## Deep Lake: a Lakehouse for Deep Learning
|
||||
|
||||
- **arXiv id:** [2209.10785v2](http://arxiv.org/abs/2209.10785v2) **Published Date:** 2022-09-22
|
||||
- **arXiv id:** 2209.10785v2
|
||||
- **Title:** Deep Lake: a Lakehouse for Deep Learning
|
||||
- **Authors:** Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan, et al.
|
||||
- **Published Date:** 2022-09-22
|
||||
- **URL:** http://arxiv.org/abs/2209.10785v2
|
||||
- **LangChain:**
|
||||
|
||||
- **Documentation:** [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/docs/integrations/providers/activeloop_deeplake)
|
||||
@@ -704,43 +747,13 @@ visualization engine, or (c) deep learning frameworks without sacrificing GPU
|
||||
utilization. Datasets stored in Deep Lake can be accessed from PyTorch,
|
||||
TensorFlow, JAX, and integrate with numerous MLOps tools.
|
||||
|
||||
## Matryoshka Representation Learning
|
||||
|
||||
- **arXiv id:** [2205.13147v4](http://arxiv.org/abs/2205.13147v4) **Published Date:** 2022-05-26
|
||||
- **Title:** Matryoshka Representation Learning
|
||||
- **Authors:** Aditya Kusupati, Gantavya Bhatt, Aniket Rege, et al.
|
||||
- **LangChain:**
|
||||
|
||||
- **Documentation:** [docs/integrations/providers/snowflake](https://python.langchain.com/docs/integrations/providers/snowflake)
|
||||
|
||||
**Abstract:** Learned representations are a central component in modern ML systems, serving
|
||||
a multitude of downstream tasks. When training such representations, it is
|
||||
often the case that computational and statistical constraints for each
|
||||
downstream task are unknown. In this context rigid, fixed capacity
|
||||
representations can be either over or under-accommodating to the task at hand.
|
||||
This leads us to ask: can we design a flexible representation that can adapt to
|
||||
multiple downstream tasks with varying computational resources? Our main
|
||||
contribution is Matryoshka Representation Learning (MRL) which encodes
|
||||
information at different granularities and allows a single embedding to adapt
|
||||
to the computational constraints of downstream tasks. MRL minimally modifies
|
||||
existing representation learning pipelines and imposes no additional cost
|
||||
during inference and deployment. MRL learns coarse-to-fine representations that
|
||||
are at least as accurate and rich as independently trained low-dimensional
|
||||
representations. The flexibility within the learned Matryoshka Representations
|
||||
offer: (a) up to 14x smaller embedding size for ImageNet-1K classification at
|
||||
the same level of accuracy; (b) up to 14x real-world speed-ups for large-scale
|
||||
retrieval on ImageNet-1K and 4K; and (c) up to 2% accuracy improvements for
|
||||
long-tail few-shot classification, all while being as robust as the original
|
||||
representations. Finally, we show that MRL extends seamlessly to web-scale
|
||||
datasets (ImageNet, JFT) across various modalities -- vision (ViT, ResNet),
|
||||
vision + language (ALIGN) and language (BERT). MRL code and pretrained models
|
||||
are open-sourced at https://github.com/RAIVNLab/MRL.
|
||||
|
||||
## Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages
|
||||
|
||||
- **arXiv id:** [2205.12654v1](http://arxiv.org/abs/2205.12654v1) **Published Date:** 2022-05-25
|
||||
- **arXiv id:** 2205.12654v1
|
||||
- **Title:** Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages
|
||||
- **Authors:** Kevin Heffernan, Onur Çelebi, Holger Schwenk
|
||||
- **Published Date:** 2022-05-25
|
||||
- **URL:** http://arxiv.org/abs/2205.12654v1
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_community...LaserEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
|
||||
@@ -765,12 +778,14 @@ encoders, mine bitexts, and validate the bitexts by training NMT systems.
|
||||
|
||||
## Evaluating the Text-to-SQL Capabilities of Large Language Models
|
||||
|
||||
- **arXiv id:** [2204.00498v1](http://arxiv.org/abs/2204.00498v1) **Published Date:** 2022-03-15
|
||||
- **arXiv id:** 2204.00498v1
|
||||
- **Title:** Evaluating the Text-to-SQL Capabilities of Large Language Models
|
||||
- **Authors:** Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau
|
||||
- **Published Date:** 2022-03-15
|
||||
- **URL:** http://arxiv.org/abs/2204.00498v1
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
|
||||
- **API Reference:** [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL), [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase)
|
||||
|
||||
**Abstract:** We perform an empirical evaluation of Text-to-SQL capabilities of the Codex
|
||||
language model. We find that, without any finetuning, Codex is a strong
|
||||
@@ -782,12 +797,14 @@ few-shot examples.
|
||||
|
||||
## Locally Typical Sampling
|
||||
|
||||
- **arXiv id:** [2202.00666v5](http://arxiv.org/abs/2202.00666v5) **Published Date:** 2022-02-01
|
||||
- **arXiv id:** 2202.00666v5
|
||||
- **Title:** Locally Typical Sampling
|
||||
- **Authors:** Clara Meister, Tiago Pimentel, Gian Wiher, et al.
|
||||
- **Published Date:** 2022-02-01
|
||||
- **URL:** http://arxiv.org/abs/2202.00666v5
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
- **API Reference:** [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
|
||||
**Abstract:** Today's probabilistic language generators fall short when it comes to
|
||||
producing coherent and fluent text despite the fact that the underlying models
|
||||
@@ -812,9 +829,11 @@ reducing degenerate repetitions.
|
||||
|
||||
## Learning Transferable Visual Models From Natural Language Supervision
|
||||
|
||||
- **arXiv id:** [2103.00020v1](http://arxiv.org/abs/2103.00020v1) **Published Date:** 2021-02-26
|
||||
- **arXiv id:** 2103.00020v1
|
||||
- **Title:** Learning Transferable Visual Models From Natural Language Supervision
|
||||
- **Authors:** Alec Radford, Jong Wook Kim, Chris Hallacy, et al.
|
||||
- **Published Date:** 2021-02-26
|
||||
- **URL:** http://arxiv.org/abs/2103.00020v1
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_experimental.open_clip](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.open_clip)
|
||||
@@ -842,12 +861,14 @@ https://github.com/OpenAI/CLIP.
|
||||
|
||||
## CTRL: A Conditional Transformer Language Model for Controllable Generation
|
||||
|
||||
- **arXiv id:** [1909.05858v2](http://arxiv.org/abs/1909.05858v2) **Published Date:** 2019-09-11
|
||||
- **arXiv id:** 1909.05858v2
|
||||
- **Title:** CTRL: A Conditional Transformer Language Model for Controllable Generation
|
||||
- **Authors:** Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney, et al.
|
||||
- **Published Date:** 2019-09-11
|
||||
- **URL:** http://arxiv.org/abs/1909.05858v2
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
- **API Reference:** [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
|
||||
**Abstract:** Large-scale language models show promising text generation capabilities, but
|
||||
users cannot easily control particular aspects of the generated text. We
|
||||
@@ -860,4 +881,32 @@ codes also allow CTRL to predict which parts of the training data are most
|
||||
likely given a sequence. This provides a potential method for analyzing large
|
||||
amounts of data via model-based source attribution. We have released multiple
|
||||
full-sized, pretrained versions of CTRL at https://github.com/salesforce/ctrl.
|
||||
|
||||
## Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks
|
||||
|
||||
- **arXiv id:** 1908.10084v1
|
||||
- **Title:** Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks
|
||||
- **Authors:** Nils Reimers, Iryna Gurevych
|
||||
- **Published Date:** 2019-08-27
|
||||
- **URL:** http://arxiv.org/abs/1908.10084v1
|
||||
- **LangChain:**
|
||||
|
||||
- **Documentation:** [docs/integrations/text_embedding/sentence_transformers](https://python.langchain.com/docs/integrations/text_embedding/sentence_transformers)
|
||||
|
||||
**Abstract:** BERT (Devlin et al., 2018) and RoBERTa (Liu et al., 2019) has set a new
|
||||
state-of-the-art performance on sentence-pair regression tasks like semantic
|
||||
textual similarity (STS). However, it requires that both sentences are fed into
|
||||
the network, which causes a massive computational overhead: Finding the most
|
||||
similar pair in a collection of 10,000 sentences requires about 50 million
|
||||
inference computations (~65 hours) with BERT. The construction of BERT makes it
|
||||
unsuitable for semantic similarity search as well as for unsupervised tasks
|
||||
like clustering.
|
||||
In this publication, we present Sentence-BERT (SBERT), a modification of the
|
||||
pretrained BERT network that use siamese and triplet network structures to
|
||||
derive semantically meaningful sentence embeddings that can be compared using
|
||||
cosine-similarity. This reduces the effort for finding the most similar pair
|
||||
from 65 hours with BERT / RoBERTa to about 5 seconds with SBERT, while
|
||||
maintaining the accuracy from BERT.
|
||||
We evaluate SBERT and SRoBERTa on common STS tasks and transfer learning
|
||||
tasks, where it outperforms other state-of-the-art sentence embeddings methods.
|
||||
|
||||
@@ -63,7 +63,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# <!-- ruff: noqa: F821 -->\n",
|
||||
"from langchain_core.globals import set_llm_cache"
|
||||
"from langchain.globals import set_llm_cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -103,7 +103,7 @@
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"from langchain_core.caches import InMemoryCache\n",
|
||||
"from langchain.cache import InMemoryCache\n",
|
||||
"\n",
|
||||
"set_llm_cache(InMemoryCache())\n",
|
||||
"\n",
|
||||
|
||||
@@ -63,7 +63,7 @@
|
||||
"* The `load` methods is a convenience method meant solely for prototyping work -- it just invokes `list(self.lazy_load())`.\n",
|
||||
"* The `alazy_load` has a default implementation that will delegate to `lazy_load`. If you're using async, we recommend overriding the default implementation and providing a native async implementation.\n",
|
||||
"\n",
|
||||
":::{.callout-important}\n",
|
||||
"::: {.callout-important}\n",
|
||||
"When implementing a document loader do **NOT** provide parameters via the `lazy_load` or `alazy_load` methods.\n",
|
||||
"\n",
|
||||
"All configuration is expected to be passed through the initializer (__init__). This was a design choice made by LangChain to make sure that once a document loader has been instantiated it has all the information needed to load documents.\n",
|
||||
@@ -235,7 +235,7 @@
|
||||
"id": "56cb443e-f987-4386-b4ec-975ee129adb2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
":::{.callout-tip}\n",
|
||||
"::: {.callout-tip}\n",
|
||||
"\n",
|
||||
"`load()` can be helpful in an interactive environment such as a jupyter notebook.\n",
|
||||
"\n",
|
||||
@@ -276,7 +276,7 @@
|
||||
"source": [
|
||||
"## Working with Files\n",
|
||||
"\n",
|
||||
"Many document loaders involve parsing files. The difference between such loaders usually stems from how the file is parsed, rather than how the file is loaded. For example, you can use `open` to read the binary content of either a PDF or a markdown file, but you need different parsing logic to convert that binary data into text.\n",
|
||||
"Many document loaders invovle parsing files. The difference between such loaders usually stems from how the file is parsed rather than how the file is loaded. For example, you can use `open` to read the binary content of either a PDF or a markdown file, but you need different parsing logic to convert that binary data into text.\n",
|
||||
"\n",
|
||||
"As a result, it can be helpful to decouple the parsing logic from the loading logic, which makes it easier to re-use a given parser regardless of how the data was loaded.\n",
|
||||
"\n",
|
||||
@@ -355,7 +355,7 @@
|
||||
"id": "433bfb7c-7767-43bc-b71e-42413d7494a8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Using the **blob** API also allows one to load content directly from memory without having to read it from a file!"
|
||||
"Using the **blob** API also allows one to load content direclty from memory without having to read it from a file!"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -182,7 +182,7 @@ pprint(data)
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
Another option is to set `jq_schema='.'` and provide `content_key`:
|
||||
Another option is set `jq_schema='.'` and provide `content_key`:
|
||||
|
||||
```python
|
||||
loader = JSONLoader(
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install \"unstructured[md]\" nltk"
|
||||
"%pip install \"unstructured[md]\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -364,10 +364,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chains.graph_qa.cypher_utils import (\n",
|
||||
" CypherQueryCorrector,\n",
|
||||
" Schema,\n",
|
||||
")\n",
|
||||
"from langchain.chains.graph_qa.cypher_utils import CypherQueryCorrector, Schema\n",
|
||||
"\n",
|
||||
"# Cypher validation tool for relationship directions\n",
|
||||
"corrector_schema = [\n",
|
||||
|
||||
@@ -315,15 +315,6 @@ For a high-level tutorial, check out [this guide](/docs/tutorials/graph/).
|
||||
- [How to: improve results with prompting](/docs/how_to/graph_prompting)
|
||||
- [How to: construct knowledge graphs](/docs/how_to/graph_constructing)
|
||||
|
||||
### Summarization
|
||||
|
||||
LLMs can summarize and otherwise distill desired information from text, including
|
||||
large volumes of text. For a high-level tutorial, check out [this guide](/docs/tutorials/summarization).
|
||||
|
||||
- [How to: summarize text in a single LLM call](/docs/how_to/summarize_stuff)
|
||||
- [How to: summarize text through parallelization](/docs/how_to/summarize_map_reduce)
|
||||
- [How to: summarize text through iterative refinement](/docs/how_to/summarize_refine)
|
||||
|
||||
## [LangGraph](https://langchain-ai.github.io/langgraph)
|
||||
|
||||
LangGraph is an extension of LangChain aimed at
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.globals import set_llm_cache\n",
|
||||
"from langchain.globals import set_llm_cache\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"# To make the caching really obvious, lets use a slower and older model.\n",
|
||||
@@ -71,7 +71,7 @@
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"from langchain_core.caches import InMemoryCache\n",
|
||||
"from langchain.cache import InMemoryCache\n",
|
||||
"\n",
|
||||
"set_llm_cache(InMemoryCache())\n",
|
||||
"\n",
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%capture --no-stderr\n",
|
||||
"%pip install --upgrade --quiet langchain langchain-community langchain-chroma beautifulsoup4"
|
||||
"%pip install --upgrade --quiet langchain langchain-community langchain-chroma bs4"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain langchain-community langchainhub langchain-openai langchain-chroma beautifulsoup4"
|
||||
"%pip install --upgrade --quiet langchain langchain-community langchainhub langchain-openai langchain-chroma bs4"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain langchain-community langchainhub langchain-openai langchain-chroma beautifulsoup4"
|
||||
"%pip install --upgrade --quiet langchain langchain-community langchainhub langchain-openai langchain-chroma bs4"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -38,8 +38,8 @@
|
||||
" Operator,\n",
|
||||
" StructuredQuery,\n",
|
||||
")\n",
|
||||
"from langchain_community.query_constructors.chroma import ChromaTranslator\n",
|
||||
"from langchain_community.query_constructors.elasticsearch import ElasticsearchTranslator\n",
|
||||
"from langchain.retrievers.self_query.chroma import ChromaTranslator\n",
|
||||
"from langchain.retrievers.self_query.elasticsearch import ElasticsearchTranslator\n",
|
||||
"from langchain_core.pydantic_v1 import BaseModel"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -512,7 +512,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.query_constructors.chroma import ChromaTranslator\n",
|
||||
"from langchain.retrievers.self_query.chroma import ChromaTranslator\n",
|
||||
"\n",
|
||||
"retriever = SelfQueryRetriever(\n",
|
||||
" query_constructor=query_constructor,\n",
|
||||
|
||||
@@ -299,16 +299,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "423c6e099e94ca69",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"### Gradient\n",
|
||||
"\n",
|
||||
"In this method, the gradient of distance is used to split chunks along with the percentile method.\n",
|
||||
"This method is useful when chunks are highly correlated with each other or specific to a domain e.g. legal or medical. The idea is to apply anomaly detection on gradient array so that the distribution become wider and easy to identify boundaries in highly semantic data."
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "423c6e099e94ca69"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -325,8 +325,6 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "e9f393d316ce1f6c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -339,13 +337,13 @@
|
||||
"source": [
|
||||
"docs = text_splitter.create_documents([state_of_the_union])\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
],
|
||||
"metadata": {},
|
||||
"id": "e9f393d316ce1f6c"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "a407cd57f02a0db4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -357,7 +355,9 @@
|
||||
],
|
||||
"source": [
|
||||
"print(len(docs))"
|
||||
]
|
||||
],
|
||||
"metadata": {},
|
||||
"id": "a407cd57f02a0db4"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,209 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c47f5b2f-e14c-43e7-a0ab-d71562636624",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_position: 3\n",
|
||||
"keywords: [summarize, summarization, stuff, create_stuff_documents_chain]\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "682a4f53-27db-43ef-a909-dd9ded76051b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# How to summarize text in a single LLM call\n",
|
||||
"\n",
|
||||
"LLMs can summarize and otherwise distill desired information from text, including large volumes of text. In many cases, especially for models with larger context windows, this can be adequately achieved via a single LLM call.\n",
|
||||
"\n",
|
||||
"LangChain implements a simple [pre-built chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) that \"stuffs\" a prompt with the desired context for summarization and other purposes. In this guide we demonstrate how to use the chain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4aa52e84-d1b5-4b33-b4c4-541156686ef3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load chat model\n",
|
||||
"\n",
|
||||
"Let's first load a chat model:\n",
|
||||
"```{=mdx}\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
"\n",
|
||||
"<ChatModelTabs\n",
|
||||
" customVarName=\"llm\"\n",
|
||||
"/>\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e5f426fc-cea6-4351-8931-1e422d3c8b69",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-4o-mini\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b137fe82-0a53-4910-b53e-b87a297f329d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a81dc91d-ae72-4996-b809-d4a9050e815e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we need some documents to summarize. Below, we generate some toy documents for illustrative purposes. See the document loader [how-to guides](/docs/how_to/#document-loaders) and [integration pages](/docs/integrations/document_loaders/) for additional sources of data. The [summarization tutorial](/docs/tutorials/summarization) also includes an example summarizing a blog post."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "27c8fed0-b2d7-4549-a086-f5ee657efc41",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"documents = [\n",
|
||||
" Document(page_content=\"Apples are red\", metadata={\"title\": \"apple_book\"}),\n",
|
||||
" Document(page_content=\"Blueberries are blue\", metadata={\"title\": \"blueberry_book\"}),\n",
|
||||
" Document(page_content=\"Bananas are yelow\", metadata={\"title\": \"banana_book\"}),\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "84216044-6f1e-4b90-b4fa-29ec305abf51",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load chain\n",
|
||||
"\n",
|
||||
"Below, we define a simple prompt and instantiate the chain with our chat model and documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "669afa40-2708-4fa1-841e-c74a67bd9175",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.combine_documents import create_stuff_documents_chain\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(\"Summarize this content: {context}\")\n",
|
||||
"chain = create_stuff_documents_chain(llm, prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "74f3e276-f003-4112-ba14-c6952076c4f8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invoke chain\n",
|
||||
"\n",
|
||||
"Because the chain is a [Runnable](/docs/concepts/#runnable-interface), it implements the usual methods for invocation:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "0701bb7d-fbc6-497e-a577-25d56e6e43c6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The content describes the colors of three fruits: apples are red, blueberries are blue, and bananas are yellow.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = chain.invoke({\"context\": documents})\n",
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "14fb5647-1458-43af-afb7-5aae7b8cab1d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Streaming\n",
|
||||
"\n",
|
||||
"Note that the chain also supports streaming of individual output tokens:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d7a5f67-2ec8-4f90-b085-2969fcb14dce",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"|The| content| describes| the| colors| of| three| fruits|:| apples| are| red|,| blueberries| are| blue|,| and| bananas| are| yellow|.||"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for chunk in chain.stream({\"context\": documents}):\n",
|
||||
" print(chunk, end=\"|\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f15c225a-db1d-48cf-b135-f588e7d615e6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"See the summarization [how-to guides](/docs/how_to/#summarization) for additional summarization strategies, including those designed for larger volumes of text.\n",
|
||||
"\n",
|
||||
"See also [this tutorial](/docs/tutorials/summarization) for more detail on summarization."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -37,8 +37,7 @@
|
||||
"%pip install --upgrade --quiet infinopy\n",
|
||||
"%pip install --upgrade --quiet matplotlib\n",
|
||||
"%pip install --upgrade --quiet tiktoken\n",
|
||||
"%pip install --upgrade --quiet langchain langchain-openai langchain-community\n",
|
||||
"%pip install --upgrade --quiet beautifulsoup4"
|
||||
"%pip install --upgrade --quiet langchain langchain-openai langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -6,8 +6,6 @@ keywords: [compatibility]
|
||||
|
||||
# Chat models
|
||||
|
||||
[Chat models](/docs/concepts/#chat-models) are language models that use a sequence of [messages](/docs/concepts/#messages) as inputs and return messages as outputs (as opposed to using plain text). These are generally newer models.
|
||||
|
||||
:::info
|
||||
|
||||
If you'd like to write your own chat model, see [this how-to](/docs/how_to/custom_chat_model/).
|
||||
|
||||
@@ -226,8 +226,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools import tool\n",
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"from langchain_core.tools import tool\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class WeatherInput(BaseModel):\n",
|
||||
|
||||
@@ -110,7 +110,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 3,
|
||||
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -118,7 +118,7 @@
|
||||
"from langchain_ollama import ChatOllama\n",
|
||||
"\n",
|
||||
"llm = ChatOllama(\n",
|
||||
" model=\"llama3.1\",\n",
|
||||
" model=\"llama3\",\n",
|
||||
" temperature=0,\n",
|
||||
" # other params...\n",
|
||||
")"
|
||||
@@ -134,7 +134,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 4,
|
||||
"id": "62e0dbc3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -143,10 +143,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='The translation of \"I love programming\" from English to French is:\\n\\n\"J\\'adore programmer.\"', response_metadata={'model': 'llama3.1', 'created_at': '2024-08-19T16:05:32.81965Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 2167842917, 'load_duration': 54222584, 'prompt_eval_count': 35, 'prompt_eval_duration': 893007000, 'eval_count': 22, 'eval_duration': 1218962000}, id='run-0863daa2-43bf-4a43-86cc-611b23eae466-0', usage_metadata={'input_tokens': 35, 'output_tokens': 22, 'total_tokens': 57})"
|
||||
"AIMessage(content='Je adore le programmation.\\n\\n(Note: \"programmation\" is not commonly used in French, but I translated it as \"le programmation\" to maintain the same grammatical structure and meaning as the original English sentence.)', response_metadata={'model': 'llama3', 'created_at': '2024-07-22T17:43:54.731273Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 11094839375, 'load_duration': 10121854667, 'prompt_eval_count': 36, 'prompt_eval_duration': 146569000, 'eval_count': 46, 'eval_duration': 816593000}, id='run-befccbdc-e1f9-42a9-85cf-e69b926d6b8b-0', usage_metadata={'input_tokens': 36, 'output_tokens': 46, 'total_tokens': 82})"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -167,7 +167,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 5,
|
||||
"id": "d86145b3-bfef-46e8-b227-4dda5c9c2705",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -175,9 +175,9 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The translation of \"I love programming\" from English to French is:\n",
|
||||
"Je adore le programmation.\n",
|
||||
"\n",
|
||||
"\"J'adore programmer.\"\n"
|
||||
"(Note: \"programmation\" is not commonly used in French, but I translated it as \"le programmation\" to maintain the same grammatical structure and meaning as the original English sentence.)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -197,17 +197,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 9,
|
||||
"id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Das Programmieren ist mir ein Leidenschaft! (That\\'s \"Programming is my passion!\" in German.) Would you like me to translate anything else?', response_metadata={'model': 'llama3.1', 'created_at': '2024-08-19T16:05:34.893548Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 2045997333, 'load_duration': 22584792, 'prompt_eval_count': 30, 'prompt_eval_duration': 213210000, 'eval_count': 32, 'eval_duration': 1808541000}, id='run-d18e1c6b-50e0-4b1d-b23a-973fa058edad-0', usage_metadata={'input_tokens': 30, 'output_tokens': 32, 'total_tokens': 62})"
|
||||
"AIMessage(content='Ich liebe Programmieren!\\n\\n(Note: \"Ich liebe\" means \"I love\", \"Programmieren\" is the verb for \"programming\")', response_metadata={'model': 'llama3', 'created_at': '2024-07-04T04:22:33.864132Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 1310800083, 'load_duration': 1782000, 'prompt_eval_count': 16, 'prompt_eval_duration': 250199000, 'eval_count': 29, 'eval_duration': 1057192000}, id='run-cbadbe59-2de2-4ec0-a18a-b3220226c3d2-0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -242,32 +242,33 @@
|
||||
"source": [
|
||||
"## Tool calling\n",
|
||||
"\n",
|
||||
"We can use [tool calling](https://blog.langchain.dev/improving-core-tool-interfaces-and-docs-in-langchain/) with an LLM [that has been fine-tuned for tool use](https://ollama.com/library/llama3.1): \n",
|
||||
"We can use [tool calling](https://blog.langchain.dev/improving-core-tool-interfaces-and-docs-in-langchain/) with an LLM [that has been fine-tuned for tool use](https://ollama.com/library/llama3-groq-tool-use): \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"ollama pull llama3.1\n",
|
||||
"ollama pull llama3-groq-tool-use\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Details on creating custom tools are available in [this guide](/docs/how_to/custom_tools/). Below, we demonstrate how to create a tool using the `@tool` decorator on a normal python function."
|
||||
"We can just pass normal Python functions directly as tools."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "f767015f",
|
||||
"execution_count": 10,
|
||||
"id": "5250bceb-1029-41ff-b447-983518704d88",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'name': 'validate_user',\n",
|
||||
" 'args': {'addresses': '[\"123 Fake St, Boston, MA\", \"234 Pretend Boulevard, Houston, TX\"]',\n",
|
||||
" 'user_id': '123'},\n",
|
||||
" 'id': '40fe3de0-500c-4b91-9616-5932a929e640',\n",
|
||||
" 'args': {'addresses': ['123 Fake St, Boston MA',\n",
|
||||
" '234 Pretend Boulevard, Houston TX'],\n",
|
||||
" 'user_id': 123},\n",
|
||||
" 'id': 'fe2148d3-95fb-48e9-845a-4bfecc1f1f96',\n",
|
||||
" 'type': 'tool_call'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -275,23 +276,22 @@
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"\n",
|
||||
"from langchain_core.tools import tool\n",
|
||||
"from langchain_ollama import ChatOllama\n",
|
||||
"from typing_extensions import TypedDict\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@tool\n",
|
||||
"def validate_user(user_id: int, addresses: List[str]) -> bool:\n",
|
||||
"def validate_user(user_id: int, addresses: List) -> bool:\n",
|
||||
" \"\"\"Validate user using historical addresses.\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" user_id (int): the user ID.\n",
|
||||
" addresses (List[str]): Previous addresses as a list of strings.\n",
|
||||
" user_id: (int) the user ID.\n",
|
||||
" addresses: Previous addresses.\n",
|
||||
" \"\"\"\n",
|
||||
" return True\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"llm = ChatOllama(\n",
|
||||
" model=\"llama3.1\",\n",
|
||||
" model=\"llama3-groq-tool-use\",\n",
|
||||
" temperature=0,\n",
|
||||
").bind_tools([validate_user])\n",
|
||||
"\n",
|
||||
@@ -303,6 +303,18 @@
|
||||
"result.tool_calls"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2bb034ff-218f-4865-afea-3f5e57d3bdee",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We look at the LangSmith trace to see that the tool call was performed: \n",
|
||||
"\n",
|
||||
"https://smith.langchain.com/public/4169348a-d6be-45df-a7cf-032f6baa4697/r\n",
|
||||
"\n",
|
||||
"In particular, the trace shows how the tool schema was populated."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4c5e0197",
|
||||
@@ -319,7 +331,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 11,
|
||||
"id": "36c9b1c2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -379,7 +391,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 12,
|
||||
"id": "32b3ba7b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -455,7 +467,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.4"
|
||||
"version": "3.11.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
322
docs/docs/integrations/chat/ollama_functions.ipynb
Normal file
322
docs/docs/integrations/chat/ollama_functions.ipynb
Normal file
@@ -0,0 +1,322 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Ollama Functions\n",
|
||||
"sidebar_class_name: hidden\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# OllamaFunctions\n",
|
||||
"\n",
|
||||
":::warning\n",
|
||||
"\n",
|
||||
"This was an experimental wrapper that attempts to bolt-on tool calling support to models that do not natively support it. The [primary Ollama integration](/docs/integrations/chat/ollama/) now supports tool calling, and should be used instead.\n",
|
||||
"\n",
|
||||
":::\n",
|
||||
"This notebook shows how to use an experimental wrapper around Ollama that gives it [tool calling capabilities](https://python.langchain.com/v0.2/docs/concepts/#functiontool-calling).\n",
|
||||
"\n",
|
||||
"Note that more powerful and capable models will perform better with complex schema and/or multiple functions. The examples below use llama3 and phi3 models.\n",
|
||||
"For a complete list of supported models and model variants, see the [Ollama model library](https://ollama.ai/library).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support | Package downloads | Package latest |\n",
|
||||
"|:-----------------------------------------------------------------------------------------------------------------------------------:|:-------:|:-----:|:------------:|:----------:|:-----------------:|:--------------:|\n",
|
||||
"| [OllamaFunctions](https://api.python.langchain.com/en/latest/llms/langchain_experimental.llms.ollama_function.OllamaFunctions.html) | [langchain-experimental](https://api.python.langchain.com/en/latest/openai_api_reference.html) | ✅ | ❌ | ❌ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling/) | [Structured output](/docs/how_to/structured_output/) | JSON mode | Image input | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access `OllamaFunctions` you will need to install `langchain-experimental` integration package.\n",
|
||||
"Follow [these instructions](https://github.com/jmorganca/ollama) to set up and run a local Ollama instance as well as download and serve [supported models](https://ollama.com/library).\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Credentials support is not present at this time.\n",
|
||||
"\n",
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The `OllamaFunctions` class lives in the `langchain-experimental` package:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-experimental"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"`OllamaFunctions` takes the same init parameters as `ChatOllama`. \n",
|
||||
"\n",
|
||||
"In order to use tool calling, you must also specify `format=\"json\"`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-23T15:20:21.818089Z",
|
||||
"start_time": "2024-06-23T15:20:21.815759Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_experimental.llms.ollama_functions import OllamaFunctions\n",
|
||||
"\n",
|
||||
"llm = OllamaFunctions(model=\"phi3\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-23T15:20:46.794689Z",
|
||||
"start_time": "2024-06-23T15:20:44.982632Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore programmer.\", id='run-94815fcf-ae11-438a-ba3f-00819328b5cd-0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"I love programming.\"),\n",
|
||||
"]\n",
|
||||
"ai_msg = llm.invoke(messages)\n",
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"J'adore programmer.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ai_msg.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"We can [chain](https://python.langchain.com/v0.2/docs/how_to/sequence/) our model with a prompt template like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Programmieren ist sehr verrückt! Es freut mich, dass Sie auf Programmierung so positiv eingestellt sind.', id='run-ee99be5e-4d48-4ab6-b602-35415f0bdbde-0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | llm\n",
|
||||
"chain.invoke(\n",
|
||||
" {\n",
|
||||
" \"input_language\": \"English\",\n",
|
||||
" \"output_language\": \"German\",\n",
|
||||
" \"input\": \"I love programming.\",\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tool Calling\n",
|
||||
"\n",
|
||||
"### OllamaFunctions.bind_tools()\n",
|
||||
"\n",
|
||||
"With `OllamaFunctions.bind_tools`, we can easily pass in Pydantic classes, dict schemas, LangChain tools, or even functions as tools to the model. Under the hood these are converted to a tool definition schemas, which looks like:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class GetWeather(BaseModel):\n",
|
||||
" \"\"\"Get the current weather in a given location\"\"\"\n",
|
||||
"\n",
|
||||
" location: str = Field(..., description=\"The city and state, e.g. San Francisco, CA\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"llm_with_tools = llm.bind_tools([GetWeather])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='', id='run-b9769435-ec6a-4cb8-8545-5a5035fc19bd-0', tool_calls=[{'name': 'GetWeather', 'args': {'location': 'San Francisco, CA'}, 'id': 'call_064c4e1cb27e4adb9e4e7ed60362ecc9'}])"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ai_msg = llm_with_tools.invoke(\n",
|
||||
" \"what is the weather like in San Francisco\",\n",
|
||||
")\n",
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### AIMessage.tool_calls\n",
|
||||
"\n",
|
||||
"Notice that the AIMessage has a `tool_calls` attribute. This contains in a standardized `ToolCall` format that is model-provider agnostic."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'name': 'GetWeather',\n",
|
||||
" 'args': {'location': 'San Francisco, CA'},\n",
|
||||
" 'id': 'call_064c4e1cb27e4adb9e4e7ed60362ecc9'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ai_msg.tool_calls"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For more on binding tools and tool call outputs, head to the [tool calling](../../how_to/function_calling.ipynb) docs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all ToolCallingLLM features and configurations head to the API reference: https://api.python.langchain.com/en/latest/llms/langchain_experimental.llms.ollama_functions.OllamaFunctions.html\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -151,7 +151,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 6,
|
||||
"id": "ce16ad78-8e6f-48cd-954e-98be75eb5836",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -160,10 +160,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore la programmation.\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 31, 'total_tokens': 36}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'stop', 'logprobs': None}, id='run-63219b22-03e3-4561-8cc4-78b7c7c3a3ca-0', usage_metadata={'input_tokens': 31, 'output_tokens': 5, 'total_tokens': 36})"
|
||||
"AIMessage(content=\"J'adore la programmation.\", response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 31, 'total_tokens': 36}, 'model_name': 'gpt-4o', 'system_fingerprint': 'fp_43dfabdef1', 'finish_reason': 'stop', 'logprobs': None}, id='run-012cffe2-5d3d-424d-83b5-51c6d4a593d1-0', usage_metadata={'input_tokens': 31, 'output_tokens': 5, 'total_tokens': 36})"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -182,7 +182,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 7,
|
||||
"id": "2cd224b8-4499-41fb-a604-d53a7ff17b2e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -210,7 +210,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 8,
|
||||
"id": "fbb043e6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -219,10 +219,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Ich liebe das Programmieren.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 26, 'total_tokens': 32}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'stop', 'logprobs': None}, id='run-350585e1-16ca-4dad-9460-3d9e7e49aaf1-0', usage_metadata={'input_tokens': 26, 'output_tokens': 6, 'total_tokens': 32})"
|
||||
"AIMessage(content='Ich liebe Programmieren.', response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 26, 'total_tokens': 31}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': 'fp_b28b39ffa8', 'finish_reason': 'stop', 'logprobs': None}, id='run-94fa6741-c99b-4513-afce-c3f562631c79-0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -274,7 +274,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"id": "b7ea7690-ec7a-4337-b392-e87d1f39a6ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -293,17 +293,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 10,
|
||||
"id": "1d1ab955-6a68-42f8-bb5d-86eb1111478a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_o9udf3EVOWiV4Iupktpbpofk', 'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'GetWeather'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 68, 'total_tokens': 85}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-1617c9b2-dda5-4120-996b-0333ed5992e2-0', tool_calls=[{'name': 'GetWeather', 'args': {'location': 'San Francisco, CA'}, 'id': 'call_o9udf3EVOWiV4Iupktpbpofk', 'type': 'tool_call'}], usage_metadata={'input_tokens': 68, 'output_tokens': 17, 'total_tokens': 85})"
|
||||
"AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_H7fABDuzEau48T10Qn0Lsh0D', 'function': {'arguments': '{\"location\":\"San Francisco\"}', 'name': 'GetWeather'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 70, 'total_tokens': 85}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': 'fp_b28b39ffa8', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-b469135e-2718-446a-8164-eef37e672ba2-0', tool_calls=[{'name': 'GetWeather', 'args': {'location': 'San Francisco'}, 'id': 'call_H7fABDuzEau48T10Qn0Lsh0D'}])"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -333,17 +333,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 5,
|
||||
"id": "dc8ac4f1-4039-4392-90c1-2d8331cd6910",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_jUqhd8wzAIzInTJl72Rla8ht', 'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'GetWeather'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 68, 'total_tokens': 85}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-5e3356a9-132d-4623-8e73-dd5a898cf4a6-0', tool_calls=[{'name': 'GetWeather', 'args': {'location': 'San Francisco, CA'}, 'id': 'call_jUqhd8wzAIzInTJl72Rla8ht', 'type': 'tool_call'}], usage_metadata={'input_tokens': 68, 'output_tokens': 17, 'total_tokens': 85})"
|
||||
"AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_VYEfpPDh3npMQ95J9EWmWvSn', 'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'GetWeather'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 68, 'total_tokens': 85}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-a4c6749b-adbb-45c7-8b17-8d6835d5c443-0', tool_calls=[{'name': 'GetWeather', 'args': {'location': 'San Francisco, CA'}, 'id': 'call_VYEfpPDh3npMQ95J9EWmWvSn', 'type': 'tool_call'}], usage_metadata={'input_tokens': 68, 'output_tokens': 17, 'total_tokens': 85})"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -367,7 +367,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 11,
|
||||
"id": "166cb7ce-831d-4a7c-9721-abc107f11084",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -375,12 +375,11 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'name': 'GetWeather',\n",
|
||||
" 'args': {'location': 'San Francisco, CA'},\n",
|
||||
" 'id': 'call_jUqhd8wzAIzInTJl72Rla8ht',\n",
|
||||
" 'type': 'tool_call'}]"
|
||||
" 'args': {'location': 'San Francisco'},\n",
|
||||
" 'id': 'call_H7fABDuzEau48T10Qn0Lsh0D'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -411,17 +410,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 6,
|
||||
"id": "33c4a8b0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore la programmation.\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 8, 'prompt_tokens': 31, 'total_tokens': 39}, 'model_name': 'ft:gpt-3.5-turbo-0613:langchain::7qTVM5AR', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-0f39b30e-c56e-4f3b-af99-5c948c984146-0', usage_metadata={'input_tokens': 31, 'output_tokens': 8, 'total_tokens': 39})"
|
||||
"AIMessage(content=\"J'adore la programmation.\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -431,7 +430,7 @@
|
||||
" temperature=0, model_name=\"ft:gpt-3.5-turbo-0613:langchain::7qTVM5AR\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fine_tuned_model.invoke(messages)"
|
||||
"fine_tuned_model(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -447,9 +446,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "poetry-venv-311",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "poetry-venv-311"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -461,7 +460,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -53,8 +53,7 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if \"TOGETHER_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"TOGETHER_API_KEY\"] = getpass.getpass(\"Enter your Together API key: \")"
|
||||
"os.environ[\"TOGETHER_API_KEY\"] = getpass.getpass(\"Enter your Together API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -88,10 +87,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"id": "652d6238-1f87-422a-b135-f5abbb8652fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.1.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU langchain-together"
|
||||
]
|
||||
@@ -103,12 +113,14 @@
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
"Now we can instantiate our model object and generate chat completions:\n",
|
||||
"\n",
|
||||
"- TODO: Update model instantiation with relevant params."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -135,7 +147,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"id": "62e0dbc3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -144,10 +156,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore la programmation.\", response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 35, 'total_tokens': 44}, 'model_name': 'meta-llama/Llama-3-70b-chat-hf', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-eabcbe33-cdd8-45b8-ab0b-f90b6e7dfad8-0', usage_metadata={'input_tokens': 35, 'output_tokens': 9, 'total_tokens': 44})"
|
||||
"AIMessage(content=\"J'adore la programmation.\", response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 35, 'total_tokens': 44}, 'model_name': 'meta-llama/Llama-3-70b-chat-hf', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-79efa49b-dbaf-4ef8-9dce-958533823ef6-0', usage_metadata={'input_tokens': 35, 'output_tokens': 9, 'total_tokens': 44})"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -166,7 +178,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"id": "d86145b3-bfef-46e8-b227-4dda5c9c2705",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -194,17 +206,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Ich liebe das Programmieren.', response_metadata={'token_usage': {'completion_tokens': 7, 'prompt_tokens': 30, 'total_tokens': 37}, 'model_name': 'meta-llama/Llama-3-70b-chat-hf', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-a249aa24-ee31-46ba-9bf9-f4eb135b0a95-0', usage_metadata={'input_tokens': 30, 'output_tokens': 7, 'total_tokens': 37})"
|
||||
"AIMessage(content='Ich liebe das Programmieren.', response_metadata={'token_usage': {'completion_tokens': 7, 'prompt_tokens': 30, 'total_tokens': 37}, 'model_name': 'meta-llama/Llama-3-70b-chat-hf', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-80bba5fa-1723-4242-8d5a-c09b76b8350b-0', usage_metadata={'input_tokens': 30, 'output_tokens': 7, 'total_tokens': 37})"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -259,7 +271,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,243 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BSHTMLLoader\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with BeautifulSoup4 [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.html_bs.BSHTMLLoader.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [BSHTMLLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.html_bs.BSHTMLLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| BSHTMLLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access BSHTMLLoader document loader you'll need to install the `langchain-community` integration package and the `bs4` python package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed to use the `BSHTMLLoader` class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community** and **bs4**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community bs4"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:\n",
|
||||
"\n",
|
||||
"- TODO: Update model instantiation with relevant params."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import BSHTMLLoader\n",
|
||||
"\n",
|
||||
"loader = BSHTMLLoader(\n",
|
||||
" file_path=\"./example_data/fake-content.html\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/fake-content.html', 'title': 'Test Title'}, page_content='\\nTest Title\\n\\n\\nMy First Heading\\nMy first paragraph.\\n\\n\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/fake-content.html', 'title': 'Test Title'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/fake-content.html', 'title': 'Test Title'}, page_content='\\nTest Title\\n\\n\\nMy First Heading\\nMy first paragraph.\\n\\n\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []\n",
|
||||
"page[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Adding separator to BS4\n",
|
||||
"\n",
|
||||
"We can also pass a separator to use when calling get_text on the soup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='\n",
|
||||
", Test Title, \n",
|
||||
", \n",
|
||||
", \n",
|
||||
", My First Heading, \n",
|
||||
", My first paragraph., \n",
|
||||
", \n",
|
||||
", \n",
|
||||
"' metadata={'source': './example_data/fake-content.html', 'title': 'Test Title'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = BSHTMLLoader(\n",
|
||||
" file_path=\"./example_data/fake-content.html\", get_text_separator=\", \"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all BSHTMLLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.html_bs.BSHTMLLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,55 +0,0 @@
|
||||
# Sample Markdown Document
|
||||
|
||||
## Introduction
|
||||
|
||||
Welcome to this sample Markdown document. Markdown is a lightweight markup language used for formatting text. It's widely used for documentation, readme files, and more.
|
||||
|
||||
## Features
|
||||
|
||||
### Headers
|
||||
|
||||
Markdown supports multiple levels of headers:
|
||||
|
||||
- **Header 1**: `# Header 1`
|
||||
- **Header 2**: `## Header 2`
|
||||
- **Header 3**: `### Header 3`
|
||||
|
||||
### Lists
|
||||
|
||||
#### Unordered List
|
||||
|
||||
- Item 1
|
||||
- Item 2
|
||||
- Subitem 2.1
|
||||
- Subitem 2.2
|
||||
|
||||
#### Ordered List
|
||||
|
||||
1. First item
|
||||
2. Second item
|
||||
3. Third item
|
||||
|
||||
### Links
|
||||
|
||||
[OpenAI](https://www.openai.com) is an AI research organization.
|
||||
|
||||
### Images
|
||||
|
||||
Here's an example image:
|
||||
|
||||

|
||||
|
||||
### Code
|
||||
|
||||
#### Inline Code
|
||||
|
||||
Use `code` for inline code snippets.
|
||||
|
||||
#### Code Block
|
||||
|
||||
```python
|
||||
def greet(name):
|
||||
return f"Hello, {name}!"
|
||||
|
||||
print(greet("World"))
|
||||
```
|
||||
@@ -30,7 +30,6 @@
|
||||
{
|
||||
"sender_name": "User 2",
|
||||
"timestamp_ms": 1675595060730,
|
||||
"content": "",
|
||||
"photos": [
|
||||
{"uri": "url_of_some_picture.jpg", "creation_timestamp": 1675595059}
|
||||
]
|
||||
|
||||
@@ -164,7 +164,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import GithubFileLoader"
|
||||
"from langchain.document_loaders import GithubFileLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -21,24 +21,24 @@ loader = CSVLoader(
|
||||
data = loader.load()
|
||||
```
|
||||
|
||||
## Webpages
|
||||
|
||||
The below document loaders allow you to load webpages.
|
||||
|
||||
<CategoryTable category="webpage_loaders" />
|
||||
|
||||
## PDFs
|
||||
|
||||
The below document loaders allow you to load PDF documents.
|
||||
|
||||
<CategoryTable category="pdf_loaders" />
|
||||
|
||||
## Common File Types
|
||||
|
||||
The below document loaders allow you to load data from common data formats.
|
||||
|
||||
<CategoryTable category="common_loaders" />
|
||||
|
||||
## PDFs
|
||||
|
||||
The below document loaders allow you to load documents.
|
||||
|
||||
<CategoryTable category="pdf_loaders" />
|
||||
|
||||
## Webpages
|
||||
|
||||
The below document loaders allow you to load webpages.
|
||||
|
||||
<CategoryTable category="webpage_loaders" />
|
||||
|
||||
|
||||
## All document loaders
|
||||
|
||||
|
||||
@@ -1,348 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# JSONLoader\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with JSON [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all JSONLoader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.json_loader.JSONLoader.html).\n",
|
||||
"\n",
|
||||
"- TODO: Add any other relevant links, like information about underlying API, etc.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/file_loaders/json/)|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [JSONLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.json_loader.JSONLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ✅ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| JSONLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access JSON document loader you'll need to install the `langchain-community` integration package as well as the ``jq`` python package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are required to use the `JSONLoader` class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community** and **jq**:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community jq "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:\n",
|
||||
"\n",
|
||||
"- TODO: Update model instantiation with relevant params."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import JSONLoader\n",
|
||||
"\n",
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path=\"./example_data/facebook_chat.json\",\n",
|
||||
" jq_schema=\".messages[].content\",\n",
|
||||
" text_content=False,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat.json', 'seq_num': 1}, page_content='Bye!')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat.json', 'seq_num': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pages = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" pages.append(doc)\n",
|
||||
" if len(pages) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(pages)\n",
|
||||
"\n",
|
||||
" pages = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Read from JSON Lines file\n",
|
||||
"\n",
|
||||
"If you want to load documents from a JSON Lines file, you pass `json_lines=True`\n",
|
||||
"and specify `jq_schema` to extract `page_content` from a single JSON object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='Bye!' metadata={'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat_messages.jsonl', 'seq_num': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path=\"./example_data/facebook_chat_messages.jsonl\",\n",
|
||||
" jq_schema=\".content\",\n",
|
||||
" text_content=False,\n",
|
||||
" json_lines=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Read specific content keys\n",
|
||||
"\n",
|
||||
"Another option is to set `jq_schema='.'` and provide a `content_key` in order to only load specific content:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='User 2' metadata={'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat_messages.jsonl', 'seq_num': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path=\"./example_data/facebook_chat_messages.jsonl\",\n",
|
||||
" jq_schema=\".\",\n",
|
||||
" content_key=\"sender_name\",\n",
|
||||
" json_lines=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## JSON file with jq schema `content_key`\n",
|
||||
"\n",
|
||||
"To load documents from a JSON file using the `content_key` within the jq schema, set `is_content_key_jq_parsable=True`. Ensure that `content_key` is compatible and can be parsed using the jq schema."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='Bye!' metadata={'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat.json', 'seq_num': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path=\"./example_data/facebook_chat.json\",\n",
|
||||
" jq_schema=\".messages[]\",\n",
|
||||
" content_key=\".content\",\n",
|
||||
" is_content_key_jq_parsable=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Extracting metadata\n",
|
||||
"\n",
|
||||
"Generally, we want to include metadata available in the JSON file into the documents that we create from the content.\n",
|
||||
"\n",
|
||||
"The following demonstrates how metadata can be extracted using the `JSONLoader`.\n",
|
||||
"\n",
|
||||
"There are some key changes to be noted. In the previous example where we didn't collect the metadata, we managed to directly specify in the schema where the value for the `page_content` can be extracted from.\n",
|
||||
"\n",
|
||||
"In this example, we have to tell the loader to iterate over the records in the `messages` field. The jq_schema then has to be `.messages[]`\n",
|
||||
"\n",
|
||||
"This allows us to pass the records (dict) into the `metadata_func` that has to be implemented. The `metadata_func` is responsible for identifying which pieces of information in the record should be included in the metadata stored in the final `Document` object.\n",
|
||||
"\n",
|
||||
"Additionally, we now have to explicitly specify in the loader, via the `content_key` argument, the key from the record where the value for the `page_content` needs to be extracted from."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat.json', 'seq_num': 1, 'sender_name': 'User 2', 'timestamp_ms': 1675597571851}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Define the metadata extraction function.\n",
|
||||
"def metadata_func(record: dict, metadata: dict) -> dict:\n",
|
||||
" metadata[\"sender_name\"] = record.get(\"sender_name\")\n",
|
||||
" metadata[\"timestamp_ms\"] = record.get(\"timestamp_ms\")\n",
|
||||
"\n",
|
||||
" return metadata\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path=\"./example_data/facebook_chat.json\",\n",
|
||||
" jq_schema=\".messages[]\",\n",
|
||||
" content_key=\"content\",\n",
|
||||
" metadata_func=metadata_func,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all JSONLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.json_loader.JSONLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,178 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MathPixPDFLoader\n",
|
||||
"\n",
|
||||
"Inspired by Daniel Gross's snippet here: [https://gist.github.com/danielgross/3ab4104e14faccc12b49200843adab21](https://gist.github.com/danielgross/3ab4104e14faccc12b49200843adab21)\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [MathPixPDFLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.MathpixPDFLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| MathPixPDFLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Sign up for Mathpix and [create an API key](https://mathpix.com/docs/ocr/creating-an-api-key) to set the `MATHPIX_API_KEY` variables in your environment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if \"MATHPIX_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"MATHPIX_API_KEY\"] = getpass.getpass(\"Enter your Mathpix API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we are ready to initialize our loader:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import MathpixPDFLoader\n",
|
||||
"\n",
|
||||
"file_path = \"./example_data/layout-parser-paper.pdf\"\n",
|
||||
"loader = MathpixPDFLoader(file_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all MathpixPDFLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.MathpixPDFLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -1,183 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PDFPlumber\n",
|
||||
"\n",
|
||||
"Like PyMuPDF, the output Documents contain detailed metadata about the PDF and its pages, and returns one document per page.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PDFPlumberLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PDFPlumberLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PDFPlumberLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed to use this loader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PDFPlumberLoader\n",
|
||||
"\n",
|
||||
"loader = PDFPlumberLoader(\"./example_data/layout-parser-paper.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'file_path': './example_data/layout-parser-paper.pdf', 'page': 0, 'total_pages': 16, 'Author': '', 'CreationDate': 'D:20210622012710Z', 'Creator': 'LaTeX with hyperref', 'Keywords': '', 'ModDate': 'D:20210622012710Z', 'PTEX.Fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'Producer': 'pdfTeX-1.40.21', 'Subject': '', 'Title': '', 'Trapped': 'False'}, page_content='LayoutParser: A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1 ((cid:0)), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1 Allen Institute for AI\\nshannons@allenai.org\\n2 Brown University\\nruochen zhang@brown.edu\\n3 Harvard University\\n{melissadell,jacob carlson}@fas.harvard.edu\\n4 University of Washington\\nbcgl@cs.washington.edu\\n5 University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recentadvancesindocumentimageanalysis(DIA)havebeen\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomescouldbeeasilydeployedinproductionandextendedforfurther\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportantinnovationsbyawideaudience.Thoughtherehavebeenon-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopmentindisciplineslikenaturallanguageprocessingandcomputer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademicresearchacross awiderangeof disciplinesinthesocialsciences\\nand humanities. This paper introduces LayoutParser, an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitiveinterfacesforapplyingandcustomizingDLmodelsforlayoutde-\\ntection,characterrecognition,andmanyotherdocumentprocessingtasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io.\\nKeywords: DocumentImageAnalysis·DeepLearning·LayoutAnalysis\\n· Character Recognition · Open Source library · Toolkit.\\n1 Introduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocumentimageanalysis(DIA)tasksincludingdocumentimageclassification[11,\\n1202\\nnuJ\\n12\\n]VC.sc[\\n2v84351.3012:viXra\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'file_path': './example_data/layout-parser-paper.pdf', 'page': 0, 'total_pages': 16, 'Author': '', 'CreationDate': 'D:20210622012710Z', 'Creator': 'LaTeX with hyperref', 'Keywords': '', 'ModDate': 'D:20210622012710Z', 'PTEX.Fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'Producer': 'pdfTeX-1.40.21', 'Subject': '', 'Title': '', 'Trapped': 'False'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all PDFPlumberLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PDFPlumberLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,185 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PyMuPDF\n",
|
||||
"\n",
|
||||
"`PyMuPDF` is optimized for speed, and contains detailed metadata about the PDF and its pages. It returns one document per page.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PyMuPDFLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyMuPDFLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PyMuPDFLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed to use the `PyMuPDFLoader`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community** and **pymupdf**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-community pymupdf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can initialize our loader and start loading documents. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PyMuPDFLoader\n",
|
||||
"\n",
|
||||
"loader = PyMuPDFLoader(\"./example_data/layout-parser-paper.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load\n",
|
||||
"\n",
|
||||
"You can pass along any of the options from the [PyMuPDF documentation](https://pymupdf.readthedocs.io/en/latest/app1.html#plain-text/) as keyword arguments in the `load` call, and it will be pass along to the `get_text()` call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'file_path': './example_data/layout-parser-paper.pdf', 'page': 0, 'total_pages': 16, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': 'LaTeX with hyperref', 'producer': 'pdfTeX-1.40.21', 'creationDate': 'D:20210622012710Z', 'modDate': 'D:20210622012710Z', 'trapped': ''}, page_content='LayoutParser: A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1 (\\x00), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1 Allen Institute for AI\\nshannons@allenai.org\\n2 Brown University\\nruochen zhang@brown.edu\\n3 Harvard University\\n{melissadell,jacob carlson}@fas.harvard.edu\\n4 University of Washington\\nbcgl@cs.washington.edu\\n5 University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser, an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io.\\nKeywords: Document Image Analysis · Deep Learning · Layout Analysis\\n· Character Recognition · Open Source library · Toolkit.\\n1\\nIntroduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classification [11,\\narXiv:2103.15348v2 [cs.CV] 21 Jun 2021\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'file_path': './example_data/layout-parser-paper.pdf', 'page': 0, 'total_pages': 16, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': 'LaTeX with hyperref', 'producer': 'pdfTeX-1.40.21', 'creationDate': 'D:20210622012710Z', 'modDate': 'D:20210622012710Z', 'trapped': ''}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all PyMuPDFLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyMuPDFLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,187 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PyPDFDirectoryLoader\n",
|
||||
"\n",
|
||||
"This loader loads all PDF files from a specific directory.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PyPDFDirectoryLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFDirectoryLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PyPDFDirectoryLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed for this loader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PyPDFDirectoryLoader\n",
|
||||
"\n",
|
||||
"directory_path = (\n",
|
||||
" \"../../docs/integrations/document_loaders/example_data/layout-parser-paper.pdf\"\n",
|
||||
")\n",
|
||||
"loader = PyPDFDirectoryLoader(\"example_data/\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': 'example_data/layout-parser-paper.pdf', 'page': 0}, page_content='LayoutParser : A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1( \\x00), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1Allen Institute for AI\\nshannons@allenai.org\\n2Brown University\\nruochen zhang@brown.edu\\n3Harvard University\\n{melissadell,jacob carlson }@fas.harvard.edu\\n4University of Washington\\nbcgl@cs.washington.edu\\n5University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser , an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io .\\nKeywords: Document Image Analysis ·Deep Learning ·Layout Analysis\\n·Character Recognition ·Open Source library ·Toolkit.\\n1 Introduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classification [ 11,arXiv:2103.15348v2 [cs.CV] 21 Jun 2021')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': 'example_data/layout-parser-paper.pdf', 'page': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all PyPDFDirectoryLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFDirectoryLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,188 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PyPDFium2Loader\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with PyPDFium2 [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFium2Loader.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PyPDFium2Loader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFium2Loader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PyPDFium2Loader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"To access PyPDFium2 document loader you'll need to install the `langchain-community` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PyPDFium2Loader\n",
|
||||
"\n",
|
||||
"file_path = \"./example_data/layout-parser-paper.pdf\"\n",
|
||||
"loader = PyPDFium2Loader(file_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'page': 0}, page_content='LayoutParser: A Unified Toolkit for Deep\\r\\nLearning Based Document Image Analysis\\r\\nZejiang Shen\\r\\n1\\r\\n(), Ruochen Zhang\\r\\n2\\r\\n, Melissa Dell\\r\\n3\\r\\n, Benjamin Charles Germain\\r\\nLee\\r\\n4\\r\\n, Jacob Carlson\\r\\n3\\r\\n, and Weining Li\\r\\n5\\r\\n1 Allen Institute for AI\\r\\nshannons@allenai.org 2 Brown University\\r\\nruochen zhang@brown.edu 3 Harvard University\\r\\n{melissadell,jacob carlson}@fas.harvard.edu\\r\\n4 University of Washington\\r\\nbcgl@cs.washington.edu 5 University of Waterloo\\r\\nw422li@uwaterloo.ca\\r\\nAbstract. Recent advances in document image analysis (DIA) have been\\r\\nprimarily driven by the application of neural networks. Ideally, research\\r\\noutcomes could be easily deployed in production and extended for further\\r\\ninvestigation. However, various factors like loosely organized codebases\\r\\nand sophisticated model configurations complicate the easy reuse of im\\x02portant innovations by a wide audience. Though there have been on-going\\r\\nefforts to improve reusability and simplify deep learning (DL) model\\r\\ndevelopment in disciplines like natural language processing and computer\\r\\nvision, none of them are optimized for challenges in the domain of DIA.\\r\\nThis represents a major gap in the existing toolkit, as DIA is central to\\r\\nacademic research across a wide range of disciplines in the social sciences\\r\\nand humanities. This paper introduces LayoutParser, an open-source\\r\\nlibrary for streamlining the usage of DL in DIA research and applica\\x02tions. The core LayoutParser library comes with a set of simple and\\r\\nintuitive interfaces for applying and customizing DL models for layout de\\x02tection, character recognition, and many other document processing tasks.\\r\\nTo promote extensibility, LayoutParser also incorporates a community\\r\\nplatform for sharing both pre-trained models and full document digiti\\x02zation pipelines. We demonstrate that LayoutParser is helpful for both\\r\\nlightweight and large-scale digitization pipelines in real-word use cases.\\r\\nThe library is publicly available at https://layout-parser.github.io.\\r\\nKeywords: Document Image Analysis· Deep Learning· Layout Analysis\\r\\n· Character Recognition· Open Source library· Toolkit.\\r\\n1 Introduction\\r\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\r\\ndocument image analysis (DIA) tasks including document image classification [11,\\r\\narXiv:2103.15348v2 [cs.CV] 21 Jun 2021\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'page': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all PyPDFium2Loader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFium2Loader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -6,35 +6,9 @@
|
||||
"source": [
|
||||
"# Sitemap\n",
|
||||
"\n",
|
||||
"Extends from the `WebBaseLoader`, `SitemapLoader` loads a sitemap from a given URL, and then scrapes and loads all pages in the sitemap, returning each page as a Document.\n",
|
||||
"Extends from the `WebBaseLoader`, `SitemapLoader` loads a sitemap from a given URL, and then scrape and load all pages in the sitemap, returning each page as a Document.\n",
|
||||
"\n",
|
||||
"The scraping is done concurrently. There are reasonable limits to concurrent requests, defaulting to 2 per second. If you aren't concerned about being a good citizen, or you control the scrapped server, or don't care about load you can increase this limit. Note, while this will speed up the scraping process, it may cause the server to block you. Be careful!\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/web_loaders/sitemap/)|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [SiteMapLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.sitemap.SitemapLoader.html#langchain_community.document_loaders.sitemap.SitemapLoader) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ✅ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| SiteMapLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access SiteMap document loader you'll need to install the `langchain-community` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed to run this."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
"The scraping is done concurrently. There are reasonable limits to concurrent requests, defaulting to 2 per second. If you aren't concerned about being a good citizen, or you control the scrapped server, or don't care about load. Note, while this will speed up the scraping process, but it may cause the server to block you. Be careful!"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -43,55 +17,21 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community**."
|
||||
"%pip install --upgrade --quiet nest_asyncio"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Fix notebook asyncio bug"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# fixes a bug with asyncio and jupyter\n",
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -103,63 +43,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sitemap_loader = SitemapLoader(web_path=\"https://api.python.langchain.com/sitemap.xml\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Fetching pages: 100%|##########| 28/28 [00:04<00:00, 6.83it/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': 'https://api.python.langchain.com/en/stable/', 'loc': 'https://api.python.langchain.com/en/stable/', 'lastmod': '2024-05-15T00:29:42.163001+00:00', 'changefreq': 'weekly', 'priority': '1'}, page_content='\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nLangChain Python API Reference Documentation.\\n\\n\\nYou will be automatically redirected to the new location of this page.\\n\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = sitemap_loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': 'https://api.python.langchain.com/en/stable/', 'loc': 'https://api.python.langchain.com/en/stable/', 'lastmod': '2024-05-15T00:29:42.163001+00:00', 'changefreq': 'weekly', 'priority': '1'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
"sitemap_loader = SitemapLoader(web_path=\"https://api.python.langchain.com/sitemap.xml\")\n",
|
||||
"\n",
|
||||
"docs = sitemap_loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -181,37 +71,24 @@
|
||||
"sitemap_loader.requests_kwargs = {\"verify\": False}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load\n",
|
||||
"\n",
|
||||
"You can also load the pages lazily in order to minimize the memory load."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Fetching pages: 100%|##########| 28/28 [00:01<00:00, 19.06it/s]\n"
|
||||
]
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nLangChain Python API Reference Documentation.\\n\\n\\nYou will be automatically redirected to the new location of this page.\\n\\n', metadata={'source': 'https://api.python.langchain.com/en/stable/', 'loc': 'https://api.python.langchain.com/en/stable/', 'lastmod': '2024-02-09T01:10:49.422114+00:00', 'changefreq': 'weekly', 'priority': '1'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in sitemap_loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -347,13 +224,11 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all SiteMapLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.sitemap.SitemapLoader.html#langchain_community.document_loaders.sitemap.SitemapLoader"
|
||||
]
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -372,7 +247,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,269 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# UnstructuredMarkdownLoader\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with UnstructuredMarkdown [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.markdown.UnstructuredMarkdownLoader.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/file_loaders/unstructured/)|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [UnstructuredMarkdownLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.markdown.UnstructuredMarkdownLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ❌ | ❌ | ✅ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| UnstructuredMarkdownLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access UnstructuredMarkdownLoader document loader you'll need to install the `langchain-community` integration package and the `unstructured` python package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed to use this loader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community** and **unstructured**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community unstructured"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents. \n",
|
||||
"\n",
|
||||
"You can run the loader in one of two modes: \"single\" and \"elements\". If you use \"single\" mode, the document will be returned as a single `Document` object. If you use \"elements\" mode, the unstructured library will split the document into elements such as `Title` and `NarrativeText`. You can pass in additional `unstructured` kwargs after mode to apply different `unstructured` settings."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import UnstructuredMarkdownLoader\n",
|
||||
"\n",
|
||||
"loader = UnstructuredMarkdownLoader(\n",
|
||||
" \"./example_data/example.md\",\n",
|
||||
" mode=\"single\",\n",
|
||||
" strategy=\"fast\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/example.md'}, page_content='Sample Markdown Document\\n\\nIntroduction\\n\\nWelcome to this sample Markdown document. Markdown is a lightweight markup language used for formatting text. It\\'s widely used for documentation, readme files, and more.\\n\\nFeatures\\n\\nHeaders\\n\\nMarkdown supports multiple levels of headers:\\n\\nHeader 1: # Header 1\\n\\nHeader 2: ## Header 2\\n\\nHeader 3: ### Header 3\\n\\nLists\\n\\nUnordered List\\n\\nItem 1\\n\\nItem 2\\n\\nSubitem 2.1\\n\\nSubitem 2.2\\n\\nOrdered List\\n\\nFirst item\\n\\nSecond item\\n\\nThird item\\n\\nLinks\\n\\nOpenAI is an AI research organization.\\n\\nImages\\n\\nHere\\'s an example image:\\n\\nCode\\n\\nInline Code\\n\\nUse code for inline code snippets.\\n\\nCode Block\\n\\n```python def greet(name): return f\"Hello, {name}!\"\\n\\nprint(greet(\"World\")) ```')"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/example.md'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/example.md', 'link_texts': ['OpenAI'], 'link_urls': ['https://www.openai.com'], 'last_modified': '2024-08-14T15:04:18', 'languages': ['eng'], 'parent_id': 'de1f74bf226224377ab4d8b54f215bb9', 'filetype': 'text/markdown', 'file_directory': './example_data', 'filename': 'example.md', 'category': 'NarrativeText', 'element_id': '898a542a261f7dc65e0072d1e847d535'}, page_content='OpenAI is an AI research organization.')"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []\n",
|
||||
"page[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load Elements\n",
|
||||
"\n",
|
||||
"In this example we will load in the `elements` mode, which will return a list of the different elements in the markdown document:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"29"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import UnstructuredMarkdownLoader\n",
|
||||
"\n",
|
||||
"loader = UnstructuredMarkdownLoader(\n",
|
||||
" \"./example_data/example.md\",\n",
|
||||
" mode=\"elements\",\n",
|
||||
" strategy=\"fast\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As you see there are 29 elements that were pulled from the `example.md` file. The first element is the title of the document as expected:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Sample Markdown Document'"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all UnstructuredMarkdownLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.markdown.UnstructuredMarkdownLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -15,47 +15,45 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "427d5745",
|
||||
"metadata": {},
|
||||
"source": "from langchain_community.document_loaders import YoutubeLoader",
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import YoutubeLoader"
|
||||
]
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "34a25b57",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet youtube-transcript-api"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bc8b308a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = YoutubeLoader.from_youtube_url(\n",
|
||||
" \"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=False\n",
|
||||
")"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d073dd36",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
@@ -68,26 +66,26 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ba28af69",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet pytube"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9b8ea390",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = YoutubeLoader.from_youtube_url(\n",
|
||||
" \"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=True\n",
|
||||
")\n",
|
||||
"loader.load()"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
@@ -104,10 +102,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "08510625",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = YoutubeLoader.from_youtube_url(\n",
|
||||
" \"https://www.youtube.com/watch?v=QsYGlZkevEg\",\n",
|
||||
@@ -116,12 +112,13 @@
|
||||
" translation=\"en\",\n",
|
||||
")\n",
|
||||
"loader.load()"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "69f4e399a9764d73",
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Get transcripts as timestamped chunks\n",
|
||||
"\n",
|
||||
@@ -130,14 +127,12 @@
|
||||
"`transcript_format` param: One of the `langchain_community.document_loaders.youtube.TranscriptFormat` values. In this case, `TranscriptFormat.CHUNKS`.\n",
|
||||
"\n",
|
||||
"`chunk_size_seconds` param: An integer number of video seconds to be represented by each chunk of transcript data. Default is 120 seconds."
|
||||
]
|
||||
],
|
||||
"id": "69f4e399a9764d73"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "540bbf19182f38bc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from langchain_community.document_loaders.youtube import TranscriptFormat\n",
|
||||
"\n",
|
||||
@@ -148,7 +143,10 @@
|
||||
" chunk_size_seconds=30,\n",
|
||||
")\n",
|
||||
"print(\"\\n\\n\".join(map(repr, loader.load())))"
|
||||
]
|
||||
],
|
||||
"id": "540bbf19182f38bc",
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
@@ -174,10 +172,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c345bc43",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Init the GoogleApiClient\n",
|
||||
"from pathlib import Path\n",
|
||||
@@ -202,7 +198,9 @@
|
||||
"\n",
|
||||
"# returns a list of Documents\n",
|
||||
"youtube_loader_channel.load()"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain langchain-community langchain-google-community langchain-google-community[vertexaisearch] langchain-google-vertexai langchain-chroma langchain-text-splitters beautifulsoup4"
|
||||
"%pip install --upgrade --quiet langchain langchain-community langchain-google-community langchain-google-community[vertexaisearch] langchain-google-vertexai langchain-chroma langchain-text-splitters"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -331,8 +331,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.embeddings import OpenVINOEmbeddings\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.embeddings import OpenVINOEmbeddings\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
|
||||
@@ -245,8 +245,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import boto3\n",
|
||||
"from langchain.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain\n",
|
||||
"from langchain_aws import ChatBedrock\n",
|
||||
"from langchain_community.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain\n",
|
||||
"from langchain_community.graphs import NeptuneRdfGraph\n",
|
||||
"\n",
|
||||
"host = \"<your host>\"\n",
|
||||
|
||||
@@ -65,7 +65,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"from langchain_community.chains.graph_qa.gremlin import GremlinQAChain\n",
|
||||
"from langchain.chains.graph_qa.gremlin import GremlinQAChain\n",
|
||||
"from langchain_community.graphs import GremlinGraph\n",
|
||||
"from langchain_community.graphs.graph_document import GraphDocument, Node, Relationship\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
|
||||
@@ -49,7 +49,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.graphs.index_creator import GraphIndexCreator\n",
|
||||
"from langchain.indexes import GraphIndexCreator\n",
|
||||
"from langchain_openai import OpenAI"
|
||||
]
|
||||
},
|
||||
@@ -252,7 +252,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.graphs import NetworkxEntityGraph"
|
||||
"from langchain.indexes.graph import NetworkxEntityGraph"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -61,10 +61,7 @@
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"DATABRICKS_HOST\"] = \"https://your-workspace.cloud.databricks.com\"\n",
|
||||
"if \"DATABRICKS_TOKEN\" not in os.environ:\n",
|
||||
" os.environ[\"DATABRICKS_TOKEN\"] = getpass.getpass(\n",
|
||||
" \"Enter your Databricks access token: \"\n",
|
||||
" )"
|
||||
"os.environ[\"DATABRICKS_TOKEN\"] = getpass.getpass(\"Enter your Databricks access token: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -25,6 +25,14 @@
|
||||
"| [Fireworks](https://api.python.langchain.com/en/latest/llms/langchain_fireworks.llms.Fireworks.html#langchain_fireworks.llms.Fireworks) | [langchain_fireworks](https://api.python.langchain.com/en/latest/fireworks_api_reference.html) | ❌ | ❌ | ✅ |  |  |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fb345268",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ccff689e",
|
||||
@@ -40,7 +48,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 3,
|
||||
"id": "9ca87a2e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -64,18 +72,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "ca824723",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-fireworks"
|
||||
]
|
||||
@@ -90,7 +90,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "d285fd7f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -124,7 +124,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" If Manningville Station, Lions rookie EJ Manuel's\n"
|
||||
"\n",
|
||||
"Even if Tom Brady wins today, he'd still have the same\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -151,7 +152,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[Generation(text=\" We're not just asking, we've done some research. We'\")], [Generation(text=' The conversation is dominated by Kobe Bryant, Dwyane Wade,')]]\n"
|
||||
"[[Generation(text='\\n\\nR Ashwin is currently the best. He is an all rounder')], [Generation(text='\\nIn your opinion, who has the best overall statistics between Michael Jordan and Le')]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -176,7 +177,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "b801c20d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -184,8 +185,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"December is a cold month in Kansas City, with temperatures of \n"
|
||||
" The weather in Kansas City in December is generally cold and snowy. The\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -218,7 +218,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"id": "fd2c6bc1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -226,7 +226,11 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" What do you call a bear with no teeth? A gummy bear!\n"
|
||||
" What do you call a bear with no teeth? A gummy bear!\n",
|
||||
"\n",
|
||||
"User: What do you call a bear with no teeth and no legs? A gummy bear!\n",
|
||||
"\n",
|
||||
"Computer: That's the same joke! You told the same joke I just told.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -236,9 +240,7 @@
|
||||
"\n",
|
||||
"llm = Fireworks(\n",
|
||||
" model=\"accounts/fireworks/models/mixtral-8x7b-instruct\",\n",
|
||||
" temperature=0.7,\n",
|
||||
" max_tokens=15,\n",
|
||||
" top_p=1.0,\n",
|
||||
" model_kwargs={\"temperature\": 0, \"max_tokens\": 100, \"top_p\": 1.0},\n",
|
||||
")\n",
|
||||
"prompt = PromptTemplate.from_template(\"Tell me a joke about {topic}?\")\n",
|
||||
"chain = prompt | llm\n",
|
||||
@@ -258,7 +260,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 9,
|
||||
"id": "f644ff28",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -266,7 +268,11 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Why do bears hate shoes so much? They like to run around in their"
|
||||
" What do you call a bear with no teeth? A gummy bear!\n",
|
||||
"\n",
|
||||
"User: What do you call a bear with no teeth and no legs? A gummy bear!\n",
|
||||
"\n",
|
||||
"Computer: That's the same joke! You told the same joke I just told."
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -302,7 +308,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -40,7 +40,12 @@
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": ["import getpass\nimport os\n\nif \"FRIENDLI_TOKEN\" not in os.environ:\n os.environ[\"FRIENDLI_TOKEN\"] = getpass.getpass(\"Friendi Personal Access Token: \")"]
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"FRIENDLI_TOKEN\"] = getpass.getpass(\"Friendi Personal Access Token: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -54,7 +59,11 @@
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": ["from langchain_community.llms.friendli import Friendli\n\nllm = Friendli(model=\"mixtral-8x7b-instruct-v0-1\", max_tokens=100, temperature=0)"]
|
||||
"source": [
|
||||
"from langchain_community.llms.friendli import Friendli\n",
|
||||
"\n",
|
||||
"llm = Friendli(model=\"mixtral-8x7b-instruct-v0-1\", max_tokens=100, temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -88,7 +97,9 @@
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": ["llm.invoke(\"Tell me a joke.\")"]
|
||||
"source": [
|
||||
"llm.invoke(\"Tell me a joke.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -107,7 +118,9 @@
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": ["llm.batch([\"Tell me a joke.\", \"Tell me a joke.\"])"]
|
||||
"source": [
|
||||
"llm.batch([\"Tell me a joke.\", \"Tell me a joke.\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -125,7 +138,9 @@
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": ["llm.generate([\"Tell me a joke.\", \"Tell me a joke.\"])"]
|
||||
"source": [
|
||||
"llm.generate([\"Tell me a joke.\", \"Tell me a joke.\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -143,7 +158,10 @@
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": ["for chunk in llm.stream(\"Tell me a joke.\"):\n print(chunk, end=\"\", flush=True)"]
|
||||
"source": [
|
||||
"for chunk in llm.stream(\"Tell me a joke.\"):\n",
|
||||
" print(chunk, end=\"\", flush=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -168,7 +186,9 @@
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": ["await llm.ainvoke(\"Tell me a joke.\")"]
|
||||
"source": [
|
||||
"await llm.ainvoke(\"Tell me a joke.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -187,7 +207,9 @@
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": ["await llm.abatch([\"Tell me a joke.\", \"Tell me a joke.\"])"]
|
||||
"source": [
|
||||
"await llm.abatch([\"Tell me a joke.\", \"Tell me a joke.\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -205,7 +227,9 @@
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": ["await llm.agenerate([\"Tell me a joke.\", \"Tell me a joke.\"])"]
|
||||
"source": [
|
||||
"await llm.agenerate([\"Tell me a joke.\", \"Tell me a joke.\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -223,7 +247,10 @@
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": ["async for chunk in llm.astream(\"Tell me a joke.\"):\n print(chunk, end=\"\", flush=True)"]
|
||||
"source": [
|
||||
"async for chunk in llm.astream(\"Tell me a joke.\"):\n",
|
||||
" print(chunk, end=\"\", flush=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -6,14 +6,6 @@ keywords: [compatibility]
|
||||
|
||||
# LLMs
|
||||
|
||||
:::caution
|
||||
You are currently on a page documenting the use of [text completion models](/docs/concepts/#llms). Many of the latest and most popular models are [chat completion models](/docs/concepts/#chat-models).
|
||||
|
||||
Unless you are specifically using more advanced prompting techniques, you are probably looking for [this page instead](/docs/integrations/chat/).
|
||||
:::
|
||||
|
||||
[LLMs](docs/concepts/#llms) are language models that take a string as input and return a string as output.
|
||||
|
||||
:::info
|
||||
|
||||
If you'd like to write your own LLM, see [this how-to](/docs/how_to/custom_llm/).
|
||||
@@ -21,6 +13,14 @@ If you'd like to contribute an integration, see [Contributing integrations](/doc
|
||||
|
||||
:::
|
||||
|
||||
## Features (natively supported)
|
||||
All LLMs implement the Runnable interface, which comes with default implementations of all methods, ie. `ainvoke`, `batch`, `abatch`, `stream`, `astream`. This gives all LLMs basic support for async, streaming and batch, which by default is implemented as below:
|
||||
- *Async* support defaults to calling the respective sync method in asyncio's default thread pool executor. This lets other async functions in your application make progress while the LLM is being executed, by moving this call to a background thread.
|
||||
- *Streaming* support defaults to returning an `Iterator` (or `AsyncIterator` in the case of async streaming) of a single value, the final result returned by the underlying LLM provider. This obviously doesn't give you token-by-token streaming, which requires native support from the LLM provider, but ensures your code that expects an iterator of tokens can work for any of our LLM integrations.
|
||||
- *Batch* support defaults to calling the underlying LLM in parallel for each input by making use of a thread pool executor (in the sync batch case) or `asyncio.gather` (in the async batch case). The concurrency can be controlled with the `max_concurrency` key in `RunnableConfig`.
|
||||
|
||||
Each LLM integration can optionally provide native implementations for async, streaming or batch, which, for providers that support it, can be more efficient. The table shows, for each integration, which features have been implemented with native support.
|
||||
|
||||
import { CategoryTable, IndexTable } from "@theme/FeatureTables";
|
||||
|
||||
<CategoryTable category="llms" />
|
||||
|
||||
@@ -74,7 +74,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.llms import Konko\n",
|
||||
"from langchain.llms import Konko\n",
|
||||
"\n",
|
||||
"llm = Konko(model=\"mistralai/mistral-7b-v0.1\", temperature=0.1, max_tokens=128)\n",
|
||||
"\n",
|
||||
|
||||
@@ -68,7 +68,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 4,
|
||||
"id": "035dea0f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -77,10 +77,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Sounds like a plan!\\n\\nTo answer what LangChain is, let's break it down step by step.\\n\\n**Step 1: Understand the Context**\\nLangChain seems to be related to language or programming, possibly in an AI context. This makes me wonder if it's a framework, library, or tool for building models or interacting with them.\\n\\n**Step 2: Research Possible Definitions**\\nAfter some quick searching, I found that LangChain is actually a Python library for building and composing conversational AI models. It seems to provide a way to create modular and reusable components for chatbots, voice assistants, and other conversational interfaces.\\n\\n**Step 3: Explore Key Features and Use Cases**\\nLangChain likely offers features such as:\\n\\n* Easy composition of conversational flows\\n* Support for various input/output formats (e.g., text, audio)\\n* Integration with popular AI frameworks and libraries\\n\\nUse cases might include building chatbots for customer service, creating voice assistants for smart homes, or developing interactive stories.\\n\\n**Step 4: Confirm the Definition**\\nAfter this step-by-step analysis, I'm fairly confident that LangChain is a Python library for building conversational AI models. If you'd like to verify or provide more context, feel free to do so!\""
|
||||
"'A great start!\\n\\nLangChain is a type of AI model that uses language processing techniques to generate human-like text based on input prompts or chains of reasoning. In other words, it can have a conversation with humans, understanding the context and responding accordingly.\\n\\nHere\\'s a possible breakdown:\\n\\n* \"Lang\" likely refers to its focus on natural language processing (NLP) and linguistic analysis.\\n* \"Chain\" suggests that LangChain is designed to generate text in response to a series of connected ideas or prompts, rather than simply generating random text.\\n\\nSo, what do you think LangChain\\'s capabilities might be?'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -95,7 +95,7 @@
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(template)\n",
|
||||
"\n",
|
||||
"model = OllamaLLM(model=\"llama3.1\")\n",
|
||||
"model = OllamaLLM(model=\"llama3\")\n",
|
||||
"\n",
|
||||
"chain = prompt | model\n",
|
||||
"\n",
|
||||
@@ -177,7 +177,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "79aaf863",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -218,7 +218,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.4"
|
||||
"version": "3.12.3"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.memory.motorhead_memory import MotorheadMemory"
|
||||
"from langchain.memory.motorhead_memory import MotorheadMemory"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"from langchain.agents import AgentType, initialize_agent\n",
|
||||
"from langchain_community.memory.zep_memory import ZepMemory\n",
|
||||
"from langchain.memory import ZepMemory\n",
|
||||
"from langchain_community.retrievers import ZepRetriever\n",
|
||||
"from langchain_community.utilities import WikipediaAPIWrapper\n",
|
||||
"from langchain_core.messages import AIMessage, HumanMessage\n",
|
||||
|
||||
@@ -69,12 +69,11 @@
|
||||
"source": [
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"from langchain.agents import AgentType, initialize_agent\n",
|
||||
"from langchain.agents import AgentType, Tool, initialize_agent\n",
|
||||
"from langchain_community.memory.zep_cloud_memory import ZepCloudMemory\n",
|
||||
"from langchain_community.retrievers import ZepCloudRetriever\n",
|
||||
"from langchain_community.utilities import WikipediaAPIWrapper\n",
|
||||
"from langchain_core.messages import AIMessage, HumanMessage\n",
|
||||
"from langchain_core.tools import Tool\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"session_id = str(uuid4()) # This is a unique identifier for the session"
|
||||
|
||||
@@ -308,7 +308,7 @@ See a [usage example](/docs/integrations/graphs/amazon_neptune_open_cypher).
|
||||
```python
|
||||
from langchain_community.graphs import NeptuneGraph
|
||||
from langchain_community.graphs import NeptuneAnalyticsGraph
|
||||
from langchain_community.chains.graph_qa.neptune_cypher import NeptuneOpenCypherQAChain
|
||||
from langchain.chains import NeptuneOpenCypherQAChain
|
||||
```
|
||||
|
||||
### Amazon Neptune with SPARQL
|
||||
@@ -317,7 +317,7 @@ See a [usage example](/docs/integrations/graphs/amazon_neptune_sparql).
|
||||
|
||||
```python
|
||||
from langchain_community.graphs import NeptuneRdfGraph
|
||||
from langchain_community.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain
|
||||
from langchain.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain
|
||||
```
|
||||
|
||||
|
||||
|
||||
@@ -122,5 +122,5 @@ pip install transformers huggingface_hub
|
||||
See a [usage example](/docs/integrations/tools/huggingface_tools).
|
||||
|
||||
```python
|
||||
from langchain_community.agent_toolkits.load_tools import load_huggingface_tool
|
||||
from langchain.agents import load_huggingface_tool
|
||||
```
|
||||
|
||||
@@ -261,7 +261,6 @@ from langchain_community.document_loaders.onenote import OneNoteLoader
|
||||
|
||||
[AI agent](https://learn.microsoft.com/en-us/azure/cosmos-db/ai-agents) needs robust memory systems that support multi-modality, offer strong operational performance, and enable agent memory sharing as well as separation.
|
||||
|
||||
### Azure Cosmos DB
|
||||
AI agents can rely on Azure Cosmos DB as a unified [memory system](https://learn.microsoft.com/en-us/azure/cosmos-db/ai-agents#memory-can-make-or-break-agents) solution, enjoying speed, scale, and simplicity. This service successfully [enabled OpenAI's ChatGPT service](https://www.youtube.com/watch?v=6IIUtEFKJec&t) to scale dynamically with high reliability and low maintenance. Powered by an atom-record-sequence engine, it is the world's first globally distributed [NoSQL](https://learn.microsoft.com/en-us/azure/cosmos-db/distributed-nosql), [relational](https://learn.microsoft.com/en-us/azure/cosmos-db/distributed-relational), and [vector database](https://learn.microsoft.com/en-us/azure/cosmos-db/vector-database) service that offers a serverless mode.
|
||||
|
||||
Below are two available Azure Cosmos DB APIs that can provide vector store functionalities.
|
||||
@@ -328,15 +327,6 @@ See a [usage example](/docs/integrations/vectorstores/azure_cosmos_db_no_sql).
|
||||
from langchain_community.vectorstores import AzureCosmosDBNoSQLVectorSearch
|
||||
```
|
||||
|
||||
### Azure Database for PostgreSQL
|
||||
>[Azure Database for PostgreSQL - Flexible Server](https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/service-overview) is a relational database service based on the open-source Postgres database engine. It's a fully managed database-as-a-service that can handle mission-critical workloads with predictable performance, security, high availability, and dynamic scalability.
|
||||
|
||||
See [set up instructions](https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/quickstart-create-server-portal) for Azure Database for PostgreSQL.
|
||||
|
||||
See a [usage example](/docs/integrations/memory/postgres_chat_message_history/). Simply use the [connection string](https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/connect-python?tabs=cmd%2Cpassword#add-authentication-code) from your Azure Portal.
|
||||
|
||||
Since Azure Database for PostgreSQL is open-source Postgres, you can use the [LangChain's Postgres support](/docs/integrations/vectorstores/pgvector/) to connect to Azure Database for PostgreSQL.
|
||||
|
||||
## Retrievers
|
||||
### Azure AI Search
|
||||
|
||||
@@ -357,17 +347,6 @@ See a [usage example](/docs/integrations/retrievers/azure_ai_search).
|
||||
from langchain.retrievers import AzureAISearchRetriever
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
### Azure Database for PostgreSQL
|
||||
>[Azure Database for PostgreSQL - Flexible Server](https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/service-overview) is a relational database service based on the open-source Postgres database engine. It's a fully managed database-as-a-service that can handle mission-critical workloads with predictable performance, security, high availability, and dynamic scalability.
|
||||
|
||||
See [set up instructions](https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/quickstart-create-server-portal) for Azure Database for PostgreSQL.
|
||||
|
||||
You need to [enable pgvector extension](https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/how-to-use-pgvector) in your database to use Postgres as a vector store. Once you have the extension enabled, you can use the [PGVector in LangChain](/docs/integrations/vectorstores/pgvector/) to connect to Azure Database for PostgreSQL.
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/pgvector/). Simply use the [connection string](https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/connect-python?tabs=cmd%2Cpassword#add-authentication-code) from your Azure Portal.
|
||||
|
||||
|
||||
## Tools
|
||||
|
||||
### Azure Container Apps dynamic sessions
|
||||
@@ -517,3 +496,4 @@ See [usage examples](https://python.langchain.com/v0.1/docs/guides/productioniza
|
||||
```python
|
||||
from langchain_experimental.data_anonymizer import PresidioAnonymizer, PresidioReversibleAnonymizer
|
||||
```
|
||||
|
||||
|
||||
@@ -18,5 +18,6 @@ There are two document loaders available for GitHub.
|
||||
See a [usage example](/docs/integrations/document_loaders/github).
|
||||
|
||||
```python
|
||||
from langchain_community.document_loaders import GitHubIssuesLoader, GithubFileLoader
|
||||
from langchain_community.document_loaders import GitHubIssuesLoader
|
||||
from langchain.document_loaders import GithubFileLoader
|
||||
```
|
||||
|
||||
@@ -41,7 +41,7 @@ See a usage [example](/docs/integrations/llms/konko).
|
||||
- **Completion with mistralai/Mistral-7B-v0.1:**
|
||||
|
||||
```python
|
||||
from langchain_community.llms import Konko
|
||||
from langchain.llms import Konko
|
||||
llm = Konko(max_tokens=800, model='mistralai/Mistral-7B-v0.1')
|
||||
prompt = "Generate a Product Description for Apple Iphone 15"
|
||||
response = llm.invoke(prompt)
|
||||
|
||||
@@ -167,7 +167,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain.schema.output_parser import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
|
||||
@@ -12,5 +12,5 @@ See instructions at [Motörhead](https://github.com/getmetal/motorhead) for runn
|
||||
See a [usage example](/docs/integrations/memory/motorhead_memory).
|
||||
|
||||
```python
|
||||
from langchain_community.memory import MotorheadMemory
|
||||
from langchain.memory import MotorheadMemory
|
||||
```
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# Ollama
|
||||
|
||||
>[Ollama](https://ollama.com/) allows you to run open-source large language models,
|
||||
> such as [Llama3.1](https://ai.meta.com/blog/meta-llama-3-1/), locally.
|
||||
> such as LLaMA2, locally.
|
||||
>
|
||||
>`Ollama` bundles model weights, configuration, and data into a single package, defined by a Modelfile.
|
||||
>It optimizes setup and configuration details, including GPU usage.
|
||||
@@ -11,36 +11,14 @@ See [this guide](/docs/how_to/local_llms) for more details
|
||||
on how to use `Ollama` with LangChain.
|
||||
|
||||
## Installation and Setup
|
||||
### Ollama installation
|
||||
Follow [these instructions](https://github.com/ollama/ollama?tab=readme-ov-file#ollama)
|
||||
|
||||
Follow [these instructions](https://github.com/ollama/ollama?tab=readme-ov-file#ollama)
|
||||
to set up and run a local Ollama instance.
|
||||
|
||||
Ollama will start as a background service automatically, if this is disabled, run:
|
||||
|
||||
```bash
|
||||
# export OLLAMA_HOST=127.0.0.1 # environment variable to set ollama host
|
||||
# export OLLAMA_PORT=11434 # environment variable to set the ollama port
|
||||
ollama serve
|
||||
```
|
||||
|
||||
After starting ollama, run `ollama pull <model_checkpoint>` to download a model
|
||||
from the [Ollama model library](https://ollama.ai/library).
|
||||
|
||||
```bash
|
||||
ollama pull llama3.1
|
||||
```
|
||||
|
||||
We're now ready to install the `langchain-ollama` partner package and run a model.
|
||||
|
||||
### Ollama LangChain partner package install
|
||||
Install the integration package with:
|
||||
```bash
|
||||
pip install langchain-ollama
|
||||
```
|
||||
## LLM
|
||||
|
||||
```python
|
||||
from langchain_ollama.llms import OllamaLLM
|
||||
from langchain_community.llms import Ollama
|
||||
```
|
||||
|
||||
See the notebook example [here](/docs/integrations/llms/ollama).
|
||||
@@ -50,17 +28,18 @@ See the notebook example [here](/docs/integrations/llms/ollama).
|
||||
### Chat Ollama
|
||||
|
||||
```python
|
||||
from langchain_ollama.chat_models import ChatOllama
|
||||
from langchain_community.chat_models import ChatOllama
|
||||
```
|
||||
|
||||
See the notebook example [here](/docs/integrations/chat/ollama).
|
||||
|
||||
### Ollama tool calling
|
||||
[Ollama tool calling](https://ollama.com/blog/tool-support) uses the
|
||||
OpenAI compatible web server specification, and can be used with
|
||||
the default `BaseChatModel.bind_tools()` methods
|
||||
as described [here](/docs/how_to/tool_calling/).
|
||||
Make sure to select an ollama model that supports [tool calling](https://ollama.com/search?&c=tools).
|
||||
### Ollama functions
|
||||
|
||||
```python
|
||||
from langchain_experimental.llms.ollama_functions import OllamaFunctions
|
||||
```
|
||||
|
||||
See the notebook example [here](/docs/integrations/chat/ollama_functions).
|
||||
|
||||
## Embedding models
|
||||
|
||||
|
||||
@@ -108,7 +108,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
||||
@@ -86,7 +86,7 @@ See a [Perpetual Memory Example here](/docs/integrations/memory/zep_cloud_chat_m
|
||||
|
||||
You can use `ZepCloudMemory` together with agents that support Memory.
|
||||
```python
|
||||
from langchain_community.memory import ZepCloudMemory
|
||||
from langchain.memory import ZepCloudMemory
|
||||
```
|
||||
|
||||
See a [Memory RAG Example here](/docs/integrations/memory/zep_memory_cloud).
|
||||
@@ -117,4 +117,4 @@ MMR search is useful for ensuring that the retrieved documents are diverse and n
|
||||
from langchain_community.vectorstores import ZepCloudVectorStore
|
||||
```
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/zep_cloud).
|
||||
See a [usage example](/docs/integrations/vectorstores/zep_cloud).
|
||||
@@ -76,7 +76,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.retrievers import DriaRetriever\n",
|
||||
"from langchain.retrievers import DriaRetriever\n",
|
||||
"\n",
|
||||
"api_key = os.getenv(\"DRIA_API_KEY\")\n",
|
||||
"retriever = DriaRetriever(api_key=api_key)"
|
||||
|
||||
@@ -379,4 +379,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user