mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-04 00:00:34 +00:00
Compare commits
82 Commits
langchain-
...
copilot/fi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0ecdd6a174 | ||
|
|
940ad63c63 | ||
|
|
12c0e9b7d8 | ||
|
|
ed682ae62d | ||
|
|
caf1919217 | ||
|
|
904066f1ec | ||
|
|
96cbd90cba | ||
|
|
a8a2cff129 | ||
|
|
f4ff4514ef | ||
|
|
d1679cec91 | ||
|
|
5295f2add0 | ||
|
|
5f5b87e9a3 | ||
|
|
e0ef98dac0 | ||
|
|
62212c7ee2 | ||
|
|
9d38f170ce | ||
|
|
c6cb1fae61 | ||
|
|
e42b1d23dc | ||
|
|
53d0bfe9cd | ||
|
|
eafab52483 | ||
|
|
efdfa00d10 | ||
|
|
a2ad5aca41 | ||
|
|
5ecbb5f277 | ||
|
|
c1028171af | ||
|
|
f6236d9f12 | ||
|
|
df20f111a8 | ||
|
|
db22311094 | ||
|
|
f624ad489a | ||
|
|
549ecd3e78 | ||
|
|
a0671676ae | ||
|
|
12ae42c5e9 | ||
|
|
e1238b8085 | ||
|
|
8f5ec20ccf | ||
|
|
0d6f915442 | ||
|
|
d53ebf367e | ||
|
|
54542b9385 | ||
|
|
7d2a13f519 | ||
|
|
0b34be4ce5 | ||
|
|
6f3169eb49 | ||
|
|
7995c719c5 | ||
|
|
bdf1cd383c | ||
|
|
77c981999e | ||
|
|
7f015b6f14 | ||
|
|
71ad451e1f | ||
|
|
2c42893703 | ||
|
|
0e139fb9a6 | ||
|
|
622bb05751 | ||
|
|
56dde3ade3 | ||
|
|
bd3d6496f3 | ||
|
|
fb5da8384e | ||
|
|
a7d0e42f3f | ||
|
|
3496e1739e | ||
|
|
0f39155f62 | ||
|
|
6aeda24a07 | ||
|
|
3ed804a5f3 | ||
|
|
ca137bfe62 | ||
|
|
fa487fb62d | ||
|
|
053fb16a05 | ||
|
|
3672bbc71e | ||
|
|
a02ad3d192 | ||
|
|
0c4054a7fc | ||
|
|
75517c3ea9 | ||
|
|
ebf2e11bcb | ||
|
|
e41e6ec6aa | ||
|
|
09769373b3 | ||
|
|
3fc27e7a95 | ||
|
|
8acfd677bc | ||
|
|
af3789b9ed | ||
|
|
a6896794ca | ||
|
|
d40fd5a3ce | ||
|
|
116b758498 | ||
|
|
10996a2821 | ||
|
|
2aed07efb6 | ||
|
|
64dac1faf7 | ||
|
|
58768d8aef | ||
|
|
d65da13299 | ||
|
|
c14bd1fcfe | ||
|
|
a1ccabf85d | ||
|
|
2104cf0d9a | ||
|
|
18c64aed6d | ||
|
|
fc802d8f9f | ||
|
|
b4d87c709c | ||
|
|
383bc8f2ef |
@@ -5,26 +5,31 @@ This project includes a [dev container](https://containers.dev/), which lets you
|
||||
You can use the dev container configuration in this folder to build and run the app without needing to install any of its tools locally! You can use it in [GitHub Codespaces](https://github.com/features/codespaces) or the [VS Code Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers).
|
||||
|
||||
## GitHub Codespaces
|
||||
|
||||
[](https://codespaces.new/langchain-ai/langchain)
|
||||
|
||||
You may use the button above, or follow these steps to open this repo in a Codespace:
|
||||
1. Click the **Code** drop-down menu at the top of https://github.com/langchain-ai/langchain.
|
||||
|
||||
1. Click the **Code** drop-down menu at the top of <https://github.com/langchain-ai/langchain>.
|
||||
1. Click on the **Codespaces** tab.
|
||||
1. Click **Create codespace on master**.
|
||||
|
||||
For more info, check out the [GitHub documentation](https://docs.github.com/en/free-pro-team@latest/github/developing-online-with-codespaces/creating-a-codespace#creating-a-codespace).
|
||||
|
||||
## VS Code Dev Containers
|
||||
|
||||
[](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langchain-ai/langchain)
|
||||
|
||||
Note: If you click the link above you will open the main repo (langchain-ai/langchain) and not your local cloned repo. This is fine if you only want to run and test the library, but if you want to contribute you can use the link below and replace with your username and cloned repo name:
|
||||
```
|
||||
https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/<yourusername>/<yourclonedreponame>
|
||||
> [!NOTE]
|
||||
> If you click the link above you will open the main repo (`langchain-ai/langchain`) and *not* your local cloned repo. This is fine if you only want to run and test the library, but if you want to contribute you can use the link below and replace with your username and cloned repo name:
|
||||
|
||||
```txt
|
||||
https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/<YOUR_USERNAME>/<YOUR_CLONED_REPO_NAME>
|
||||
```
|
||||
|
||||
Then you will have a local cloned repo where you can contribute and then create pull requests.
|
||||
|
||||
If you already have VS Code and Docker installed, you can use the button above to get started. This will cause VS Code to automatically install the Dev Containers extension if needed, clone the source code into a container volume, and spin up a dev container for use.
|
||||
If you already have VS Code and Docker installed, you can use the button above to get started. This will use VSCode to automatically install the Dev Containers extension if needed, clone the source code into a container volume, and spin up a dev container for use.
|
||||
|
||||
Alternatively you can also follow these steps to open this repo in a container using the VS Code Dev Containers extension:
|
||||
|
||||
@@ -40,5 +45,5 @@ You can learn more in the [Dev Containers documentation](https://code.visualstud
|
||||
|
||||
## Tips and tricks
|
||||
|
||||
* If you are working with the same repository folder in a container and Windows, you'll want consistent line endings (otherwise you may see hundreds of changes in the SCM view). The `.gitattributes` file in the root of this repo will disable line ending conversion and should prevent this. See [tips and tricks](https://code.visualstudio.com/docs/devcontainers/tips-and-tricks#_resolving-git-line-ending-issues-in-containers-resulting-in-many-modified-files) for more info.
|
||||
* If you'd like to review the contents of the image used in this dev container, you can check it out in the [devcontainers/images](https://github.com/devcontainers/images/tree/main/src/python) repo.
|
||||
- If you are working with the same repository folder in a container and Windows, you'll want consistent line endings (otherwise you may see hundreds of changes in the SCM view). The `.gitattributes` file in the root of this repo will disable line ending conversion and should prevent this. See [tips and tricks](https://code.visualstudio.com/docs/devcontainers/tips-and-tricks#_resolving-git-line-ending-issues-in-containers-resulting-in-many-modified-files) for more info.
|
||||
- If you'd like to review the contents of the image used in this dev container, you can check it out in the [devcontainers/images](https://github.com/devcontainers/images/tree/main/src/python) repo.
|
||||
|
||||
@@ -1,36 +1,58 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
|
||||
{
|
||||
// Name for the dev container
|
||||
"name": "langchain",
|
||||
|
||||
// Point to a Docker Compose file
|
||||
"dockerComposeFile": "./docker-compose.yaml",
|
||||
|
||||
// Required when using Docker Compose. The name of the service to connect to once running
|
||||
"service": "langchain",
|
||||
|
||||
// The optional 'workspaceFolder' property is the path VS Code should open by default when
|
||||
// connected. This is typically a file mount in .devcontainer/docker-compose.yml
|
||||
"workspaceFolder": "/workspaces/langchain",
|
||||
|
||||
// Prevent the container from shutting down
|
||||
"overrideCommand": true
|
||||
|
||||
// Features to add to the dev container. More info: https://containers.dev/features
|
||||
// "features": {
|
||||
// "ghcr.io/devcontainers-contrib/features/poetry:2": {}
|
||||
// }
|
||||
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
// "forwardPorts": [],
|
||||
|
||||
// Uncomment the next line to run commands after the container is created.
|
||||
// "postCreateCommand": "cat /etc/os-release",
|
||||
|
||||
// Configure tool-specific properties.
|
||||
// "customizations": {},
|
||||
|
||||
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
||||
// "remoteUser": "root"
|
||||
// Name for the dev container
|
||||
"name": "langchain",
|
||||
// Point to a Docker Compose file
|
||||
"dockerComposeFile": "./docker-compose.yaml",
|
||||
// Required when using Docker Compose. The name of the service to connect to once running
|
||||
"service": "langchain",
|
||||
// The optional 'workspaceFolder' property is the path VS Code should open by default when
|
||||
// connected. This is typically a file mount in .devcontainer/docker-compose.yml
|
||||
"workspaceFolder": "/workspaces/langchain",
|
||||
"mounts": [
|
||||
"source=langchain-workspaces,target=/workspaces/langchain,type=volume"
|
||||
],
|
||||
// Prevent the container from shutting down
|
||||
"overrideCommand": true,
|
||||
// Features to add to the dev container. More info: https://containers.dev/features
|
||||
"features": {
|
||||
"ghcr.io/devcontainers/features/git:1": {},
|
||||
"ghcr.io/devcontainers/features/github-cli:1": {}
|
||||
},
|
||||
"containerEnv": {
|
||||
"UV_LINK_MODE": "copy"
|
||||
},
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
// "forwardPorts": [],
|
||||
// Run commands after the container is created
|
||||
"postCreateCommand": "uv sync && echo 'LangChain (Python) dev environment ready!'",
|
||||
// Configure tool-specific properties.
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"ms-python.python",
|
||||
"ms-python.debugpy",
|
||||
"ms-python.mypy-type-checker",
|
||||
"ms-python.isort",
|
||||
"unifiedjs.vscode-mdx",
|
||||
"davidanson.vscode-markdownlint",
|
||||
"ms-toolsai.jupyter",
|
||||
"GitHub.copilot",
|
||||
"GitHub.copilot-chat"
|
||||
],
|
||||
"settings": {
|
||||
"python.defaultInterpreterPath": ".venv/bin/python",
|
||||
"python.formatting.provider": "none",
|
||||
"[python]": {
|
||||
"editor.formatOnSave": true,
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.organizeImports": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
||||
// "remoteUser": "root"
|
||||
}
|
||||
|
||||
@@ -4,26 +4,9 @@ services:
|
||||
build:
|
||||
dockerfile: libs/langchain/dev.Dockerfile
|
||||
context: ..
|
||||
volumes:
|
||||
# Update this to wherever you want VS Code to mount the folder of your project
|
||||
- ..:/workspaces/langchain:cached
|
||||
|
||||
networks:
|
||||
- langchain-network
|
||||
# environment:
|
||||
# MONGO_ROOT_USERNAME: root
|
||||
# MONGO_ROOT_PASSWORD: example123
|
||||
# depends_on:
|
||||
# - mongo
|
||||
# mongo:
|
||||
# image: mongo
|
||||
# restart: unless-stopped
|
||||
# environment:
|
||||
# MONGO_INITDB_ROOT_USERNAME: root
|
||||
# MONGO_INITDB_ROOT_PASSWORD: example123
|
||||
# ports:
|
||||
# - "27017:27017"
|
||||
# networks:
|
||||
# - langchain-network
|
||||
|
||||
networks:
|
||||
langchain-network:
|
||||
|
||||
52
.editorconfig
Normal file
52
.editorconfig
Normal file
@@ -0,0 +1,52 @@
|
||||
# top-most EditorConfig file
|
||||
root = true
|
||||
|
||||
# All files
|
||||
[*]
|
||||
charset = utf-8
|
||||
end_of_line = lf
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
# Python files
|
||||
[*.py]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
max_line_length = 88
|
||||
|
||||
# JSON files
|
||||
[*.json]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
|
||||
# YAML files
|
||||
[*.{yml,yaml}]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
|
||||
# Markdown files
|
||||
[*.md]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
trim_trailing_whitespace = false
|
||||
|
||||
# Configuration files
|
||||
[*.{toml,ini,cfg}]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
|
||||
# Shell scripts
|
||||
[*.sh]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
|
||||
# Makefile
|
||||
[Makefile]
|
||||
indent_style = tab
|
||||
indent_size = 4
|
||||
|
||||
# Jupyter notebooks
|
||||
[*.ipynb]
|
||||
# Jupyter may include trailing whitespace in cell
|
||||
# outputs that's semantically meaningful
|
||||
trim_trailing_whitespace = false
|
||||
328
.github/copilot-instructions.md
vendored
328
.github/copilot-instructions.md
vendored
@@ -1,72 +1,116 @@
|
||||
### 1. Avoid Breaking Changes (Stable Public Interfaces)
|
||||
# Global Development Guidelines for LangChain Projects
|
||||
|
||||
* Carefully preserve **function signatures**, argument positions, and names for any exported/public methods.
|
||||
* Be cautious when **renaming**, **removing**, or **reordering** arguments — even small changes can break downstream consumers.
|
||||
* Use keyword-only arguments or clearly mark experimental features to isolate unstable APIs.
|
||||
## Core Development Principles
|
||||
|
||||
Bad:
|
||||
### 1. Maintain Stable Public Interfaces ⚠️ CRITICAL
|
||||
|
||||
**Always attempt to preserve function signatures, argument positions, and names for exported/public methods.**
|
||||
|
||||
❌ **Bad - Breaking Change:**
|
||||
|
||||
```python
|
||||
def get_user(id, verbose=False): # Changed from `user_id`
|
||||
pass
|
||||
```
|
||||
|
||||
Good:
|
||||
✅ **Good - Stable Interface:**
|
||||
|
||||
```python
|
||||
def get_user(user_id: str, verbose: bool = False): # Maintains stable interface
|
||||
def get_user(user_id: str, verbose: bool = False) -> User:
|
||||
"""Retrieve user by ID with optional verbose output."""
|
||||
pass
|
||||
```
|
||||
|
||||
🧠 *Ask yourself:* “Would this change break someone's code if they used it last week?”
|
||||
**Before making ANY changes to public APIs:**
|
||||
|
||||
---
|
||||
- Check if the function/class is exported in `__init__.py`
|
||||
- Look for existing usage patterns in tests and examples
|
||||
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
|
||||
- Mark experimental features clearly with docstring warnings (using reStructuredText, like `.. warning::`)
|
||||
|
||||
### 2. Simplify Code and Use Clear Variable Names
|
||||
🧠 *Ask yourself:* "Would this change break someone's code if they used it last week?"
|
||||
|
||||
* Prefer descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
|
||||
* Break up overly long or deeply nested functions for **readability and maintainability**.
|
||||
* Avoid unnecessary abstraction or premature optimization.
|
||||
* All generated Python code must include type hints.
|
||||
### 2. Code Quality Standards
|
||||
|
||||
Bad:
|
||||
**All Python code MUST include type hints and return types.**
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def p(u, d):
|
||||
return [x for x in u if x not in d]
|
||||
```
|
||||
|
||||
Good:
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
def filter_unknown_users(users: List[str], known_users: Set[str]) -> List[str]:
|
||||
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
|
||||
"""Filter out users that are not in the known users set.
|
||||
|
||||
Args:
|
||||
users: List of user identifiers to filter.
|
||||
known_users: Set of known/valid user identifiers.
|
||||
|
||||
Returns:
|
||||
List of users that are not in the known_users set.
|
||||
"""
|
||||
return [user for user in users if user not in known_users]
|
||||
```
|
||||
|
||||
---
|
||||
**Style Requirements:**
|
||||
|
||||
### 3. Ensure Unit Tests Cover New and Updated Functionality
|
||||
- Use descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
|
||||
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
|
||||
- Avoid unnecessary abstraction or premature optimization
|
||||
- Follow existing patterns in the codebase you're modifying
|
||||
|
||||
* Every new feature or bugfix should be **covered by a unit test**.
|
||||
* Test edge cases and failure conditions.
|
||||
* Use `pytest`, `unittest`, or the project’s existing framework consistently.
|
||||
### 3. Testing Requirements
|
||||
|
||||
Checklist:
|
||||
**Every new feature or bugfix MUST be covered by unit tests.**
|
||||
|
||||
* [ ] Does the test suite fail if your new logic is broken?
|
||||
* [ ] Are all expected behaviors exercised (happy path, invalid input, etc)?
|
||||
* [ ] Do tests use fixtures or mocks where needed?
|
||||
**Test Organization:**
|
||||
|
||||
---
|
||||
- Unit tests: `tests/unit_tests/` (no network calls allowed)
|
||||
- Integration tests: `tests/integration_tests/` (network calls permitted)
|
||||
- Use `pytest` as the testing framework
|
||||
|
||||
### 4. Look for Suspicious or Risky Code
|
||||
**Test Quality Checklist:**
|
||||
|
||||
* Watch out for:
|
||||
- [ ] Tests fail when your new logic is broken
|
||||
- [ ] Happy path is covered
|
||||
- [ ] Edge cases and error conditions are tested
|
||||
- [ ] Use fixtures/mocks for external dependencies
|
||||
- [ ] Tests are deterministic (no flaky tests)
|
||||
|
||||
* Use of `eval()`, `exec()`, or `pickle` on user-controlled input.
|
||||
* Silent failure modes (`except: pass`).
|
||||
* Unreachable code or commented-out blocks.
|
||||
* Race conditions or resource leaks (file handles, sockets, threads).
|
||||
Checklist questions:
|
||||
|
||||
Bad:
|
||||
- [ ] Does the test suite fail if your new logic is broken?
|
||||
- [ ] Are all expected behaviors exercised (happy path, invalid input, etc)?
|
||||
- [ ] Do tests use fixtures or mocks where needed?
|
||||
|
||||
```python
|
||||
def test_filter_unknown_users():
|
||||
"""Test filtering unknown users from a list."""
|
||||
users = ["alice", "bob", "charlie"]
|
||||
known_users = {"alice", "bob"}
|
||||
|
||||
result = filter_unknown_users(users, known_users)
|
||||
|
||||
assert result == ["charlie"]
|
||||
assert len(result) == 1
|
||||
```
|
||||
|
||||
### 4. Security and Risk Assessment
|
||||
|
||||
**Security Checklist:**
|
||||
|
||||
- No `eval()`, `exec()`, or `pickle` on user-controlled input
|
||||
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
|
||||
- Remove unreachable/commented code before committing
|
||||
- Race conditions or resource leaks (file handles, sockets, threads).
|
||||
- Ensure proper resource cleanup (file handles, connections)
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def load_config(path):
|
||||
@@ -74,7 +118,7 @@ def load_config(path):
|
||||
return eval(f.read()) # ⚠️ Never eval config
|
||||
```
|
||||
|
||||
Good:
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
import json
|
||||
@@ -84,68 +128,198 @@ def load_config(path: str) -> dict:
|
||||
return json.load(f)
|
||||
```
|
||||
|
||||
---
|
||||
### 5. Documentation Standards
|
||||
|
||||
### 5. Use Google-Style Docstrings (with Args section)
|
||||
**Use Google-style docstrings with Args section for all public functions.**
|
||||
|
||||
* All public functions should include a **Google-style docstring**.
|
||||
* Include an `Args:` section where relevant.
|
||||
* Types should NOT be written in the docstring — use type hints instead.
|
||||
|
||||
Bad:
|
||||
❌ **Insufficient Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to, msg):
|
||||
"""Send an email to a recipient."""
|
||||
```
|
||||
|
||||
Good:
|
||||
✅ **Complete Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to: str, msg: str) -> None:
|
||||
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
"""
|
||||
Sends an email to a recipient.
|
||||
Send an email to a recipient with specified priority.
|
||||
|
||||
Args:
|
||||
to: The email address of the recipient.
|
||||
msg: The message body.
|
||||
msg: The message body to send.
|
||||
priority: Email priority level (``'low'``, ``'normal'``, ``'high'``).
|
||||
|
||||
Returns:
|
||||
True if email was sent successfully, False otherwise.
|
||||
|
||||
Raises:
|
||||
InvalidEmailError: If the email address format is invalid.
|
||||
SMTPConnectionError: If unable to connect to email server.
|
||||
"""
|
||||
```
|
||||
|
||||
**Documentation Guidelines:**
|
||||
|
||||
- Types go in function signatures, NOT in docstrings
|
||||
- Focus on "why" rather than "what" in descriptions
|
||||
- Document all parameters, return values, and exceptions
|
||||
- Keep descriptions concise but clear
|
||||
- Use reStructuredText for docstrings to enable rich formatting
|
||||
|
||||
📌 *Tip:* Keep descriptions concise but clear. Only document return values if non-obvious.
|
||||
|
||||
### 6. Architectural Improvements
|
||||
|
||||
**When you encounter code that could be improved, suggest better designs:**
|
||||
|
||||
❌ **Poor Design:**
|
||||
|
||||
```python
|
||||
def process_data(data, db_conn, email_client, logger):
|
||||
# Function doing too many things
|
||||
validated = validate_data(data)
|
||||
result = db_conn.save(validated)
|
||||
email_client.send_notification(result)
|
||||
logger.log(f"Processed {len(data)} items")
|
||||
return result
|
||||
```
|
||||
|
||||
✅ **Better Design:**
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class ProcessingResult:
|
||||
"""Result of data processing operation."""
|
||||
items_processed: int
|
||||
success: bool
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
class DataProcessor:
|
||||
"""Handles data validation, storage, and notification."""
|
||||
|
||||
def __init__(self, db_conn: Database, email_client: EmailClient):
|
||||
self.db = db_conn
|
||||
self.email = email_client
|
||||
|
||||
def process(self, data: List[dict]) -> ProcessingResult:
|
||||
"""Process and store data with notifications."""
|
||||
validated = self._validate_data(data)
|
||||
result = self.db.save(validated)
|
||||
self._notify_completion(result)
|
||||
return result
|
||||
```
|
||||
|
||||
**Design Improvement Areas:**
|
||||
|
||||
If there's a **cleaner**, **more scalable**, or **simpler** design, highlight it and suggest improvements that would:
|
||||
|
||||
- Reduce code duplication through shared utilities
|
||||
- Make unit testing easier
|
||||
- Improve separation of concerns (single responsibility)
|
||||
- Make unit testing easier through dependency injection
|
||||
- Add clarity without adding complexity
|
||||
- Prefer dataclasses for structured data
|
||||
|
||||
## Development Tools & Commands
|
||||
|
||||
### Package Management
|
||||
|
||||
```bash
|
||||
# Add package
|
||||
uv add package-name
|
||||
|
||||
# Sync project dependencies
|
||||
uv sync
|
||||
uv lock
|
||||
```
|
||||
|
||||
### Testing
|
||||
|
||||
```bash
|
||||
# Run unit tests (no network)
|
||||
make test
|
||||
|
||||
# Don't run integration tests, as API keys must be set
|
||||
|
||||
# Run specific test file
|
||||
uv run --group test pytest tests/unit_tests/test_specific.py
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
|
||||
```bash
|
||||
# Lint code
|
||||
make lint
|
||||
|
||||
# Format code
|
||||
make format
|
||||
|
||||
# Type checking
|
||||
uv run --group lint mypy .
|
||||
```
|
||||
|
||||
### Dependency Management Patterns
|
||||
|
||||
**Local Development Dependencies:**
|
||||
|
||||
```toml
|
||||
[tool.uv.sources]
|
||||
langchain-core = { path = "../core", editable = true }
|
||||
langchain-tests = { path = "../standard-tests", editable = true }
|
||||
```
|
||||
|
||||
**For tools, use the `@tool` decorator from `langchain_core.tools`:**
|
||||
|
||||
```python
|
||||
from langchain_core.tools import tool
|
||||
|
||||
@tool
|
||||
def search_database(query: str) -> str:
|
||||
"""Search the database for relevant information.
|
||||
|
||||
Args:
|
||||
query: The search query string.
|
||||
"""
|
||||
# Implementation here
|
||||
return results
|
||||
```
|
||||
|
||||
## Commit Standards
|
||||
|
||||
**Use Conventional Commits format for PR titles:**
|
||||
|
||||
- `feat(core): add multi-tenant support`
|
||||
- `fix(cli): resolve flag parsing error`
|
||||
- `docs: update API usage examples`
|
||||
- `docs(openai): update API usage examples`
|
||||
|
||||
## Framework-Specific Guidelines
|
||||
|
||||
- Follow the existing patterns in `langchain-core` for base abstractions
|
||||
- Use `langchain_core.callbacks` for execution tracking
|
||||
- Implement proper streaming support where applicable
|
||||
- Avoid deprecated components like legacy `LLMChain`
|
||||
|
||||
### Partner Integrations
|
||||
|
||||
- Follow the established patterns in existing partner libraries
|
||||
- Implement standard interfaces (`BaseChatModel`, `BaseEmbeddings`, etc.)
|
||||
- Include comprehensive integration tests
|
||||
- Document API key requirements and authentication
|
||||
|
||||
---
|
||||
|
||||
### 6. Propose Better Designs When Applicable
|
||||
## Quick Reference Checklist
|
||||
|
||||
* If there's a **cleaner**, **more scalable**, or **simpler** design, highlight it.
|
||||
* Suggest improvements, even if they require some refactoring — especially if the new code would:
|
||||
Before submitting code changes:
|
||||
|
||||
* Reduce duplication
|
||||
* Make unit testing easier
|
||||
* Improve separation of concerns
|
||||
* Add clarity without adding complexity
|
||||
|
||||
Instead of:
|
||||
|
||||
```python
|
||||
def save(data, db_conn):
|
||||
# manually serializes fields
|
||||
```
|
||||
|
||||
You might suggest:
|
||||
|
||||
```python
|
||||
# Suggest using dataclasses or Pydantic for automatic serialization and validation
|
||||
```
|
||||
|
||||
### 7. Misc
|
||||
|
||||
* When suggesting package installation commands, use `uv pip install` as this project uses `uv`.
|
||||
* When creating tools for agents, use the @tool decorator from langchain_core.tools. The tool's docstring serves as its functional description for the agent.
|
||||
* Avoid suggesting deprecated components, such as the legacy LLMChain.
|
||||
* We use Conventional Commits format for pull request titles. Example PR titles:
|
||||
* feat(core): add multi‐tenant support
|
||||
* fix(cli): resolve flag parsing error
|
||||
* docs: update API usage examples
|
||||
* docs(openai): update API usage examples
|
||||
- [ ] **Breaking Changes**: Verified no public API changes
|
||||
- [ ] **Type Hints**: All functions have complete type annotations
|
||||
- [ ] **Tests**: New functionality is fully tested
|
||||
- [ ] **Security**: No dangerous patterns (eval, silent failures, etc.)
|
||||
- [ ] **Documentation**: Google-style docstrings for public functions
|
||||
- [ ] **Code Quality**: `make lint` and `make format` pass
|
||||
- [ ] **Architecture**: Suggested improvements where applicable
|
||||
- [ ] **Commit Message**: Follows Conventional Commits format
|
||||
|
||||
1
.github/scripts/check_diff.py
vendored
1
.github/scripts/check_diff.py
vendored
@@ -16,6 +16,7 @@ LANGCHAIN_DIRS = [
|
||||
"libs/core",
|
||||
"libs/text-splitters",
|
||||
"libs/langchain",
|
||||
"libs/langchain_v1",
|
||||
]
|
||||
|
||||
# when set to True, we are ignoring core dependents
|
||||
|
||||
2
.github/scripts/prep_api_docs_build.py
vendored
2
.github/scripts/prep_api_docs_build.py
vendored
@@ -73,6 +73,7 @@ def main():
|
||||
for p in package_yaml["packages"]
|
||||
if (p["repo"].startswith("langchain-ai/") or p.get("include_in_api_ref"))
|
||||
and p["repo"] != "langchain-ai/langchain"
|
||||
and p["name"] != "langchain-ai21" # Skip AI21 due to dependency conflicts
|
||||
])
|
||||
|
||||
# Move libraries to their new locations
|
||||
@@ -82,6 +83,7 @@ def main():
|
||||
if not p.get("disabled", False)
|
||||
and (p["repo"].startswith("langchain-ai/") or p.get("include_in_api_ref"))
|
||||
and p["repo"] != "langchain-ai/langchain"
|
||||
and p["name"] != "langchain-ai21" # Skip AI21 due to dependency conflicts
|
||||
])
|
||||
|
||||
# Delete ones without a pyproject.toml
|
||||
|
||||
12
.github/workflows/_compile_integration_test.yml
vendored
12
.github/workflows/_compile_integration_test.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: compile-integration-test
|
||||
name: '🔗 Compile Integration Tests'
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
@@ -25,24 +25,24 @@ jobs:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
name: "uv run pytest -m compile tests/integration_tests #${{ inputs.python-version }}"
|
||||
name: 'Python ${{ inputs.python-version }}'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ inputs.python-version }} + uv
|
||||
- name: '🐍 Set up Python ${{ inputs.python-version }} + UV'
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- name: Install integration dependencies
|
||||
- name: '📦 Install Integration Dependencies'
|
||||
shell: bash
|
||||
run: uv sync --group test --group test_integration
|
||||
|
||||
- name: Check integration tests compile
|
||||
- name: '🔗 Check Integration Tests Compile'
|
||||
shell: bash
|
||||
run: uv run pytest -m compile tests/integration_tests
|
||||
|
||||
- name: Ensure the tests did not create any additional files
|
||||
- name: '🧹 Verify Clean Working Directory'
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
10
.github/workflows/_integration_test.yml
vendored
10
.github/workflows/_integration_test.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Integration Tests
|
||||
name: '🚀 Integration Tests'
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -24,20 +24,20 @@ jobs:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
runs-on: ubuntu-latest
|
||||
name: Python ${{ inputs.python-version }}
|
||||
name: '🚀 Integration Tests (Python ${{ inputs.python-version }})'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ inputs.python-version }} + uv
|
||||
- name: '🐍 Set up Python ${{ inputs.python-version }} + UV'
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
- name: '📦 Install Integration Dependencies'
|
||||
shell: bash
|
||||
run: uv sync --group test --group test_integration
|
||||
|
||||
- name: Run integration tests
|
||||
- name: '🚀 Run Integration Tests'
|
||||
shell: bash
|
||||
env:
|
||||
AI21_API_KEY: ${{ secrets.AI21_API_KEY }}
|
||||
|
||||
22
.github/workflows/_lint.yml
vendored
22
.github/workflows/_lint.yml
vendored
@@ -1,4 +1,6 @@
|
||||
name: lint
|
||||
name: '🧹 Code Linting'
|
||||
# Runs code quality checks using ruff, mypy, and other linting tools
|
||||
# Checks both package code and test code for consistency
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
@@ -24,19 +26,21 @@ env:
|
||||
UV_FROZEN: "true"
|
||||
|
||||
jobs:
|
||||
# Linting job - runs quality checks on package and test code
|
||||
build:
|
||||
name: "make lint #${{ inputs.python-version }}"
|
||||
name: 'Python ${{ inputs.python-version }}'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: '📋 Checkout Code'
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ inputs.python-version }} + uv
|
||||
- name: '🐍 Set up Python ${{ inputs.python-version }} + UV'
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
- name: '📦 Install Lint & Typing Dependencies'
|
||||
# Also installs dev/lint/test/typing dependencies, to ensure we have
|
||||
# type hints for as many of our libraries as possible.
|
||||
# This helps catch errors that require dependencies to be spotted, for example:
|
||||
@@ -49,12 +53,12 @@ jobs:
|
||||
run: |
|
||||
uv sync --group lint --group typing
|
||||
|
||||
- name: Analysing the code with our lint
|
||||
- name: '🔍 Analyze Package Code with Linters'
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
make lint_package
|
||||
|
||||
- name: Install unit test dependencies
|
||||
- name: '📦 Install Unit Test Dependencies'
|
||||
# Also installs dev/lint/test/typing dependencies, to ensure we have
|
||||
# type hints for as many of our libraries as possible.
|
||||
# This helps catch errors that require dependencies to be spotted, for example:
|
||||
@@ -67,13 +71,13 @@ jobs:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
uv sync --inexact --group test
|
||||
- name: Install unit+integration test dependencies
|
||||
- name: '📦 Install Unit + Integration Test Dependencies'
|
||||
if: ${{ startsWith(inputs.working-directory, 'libs/partners/') }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
uv sync --inexact --group test --group test_integration
|
||||
|
||||
- name: Analysing the code with our lint
|
||||
- name: '🔍 Analyze Test Code with Linters'
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
make lint_tests
|
||||
|
||||
10
.github/workflows/_release.yml
vendored
10
.github/workflows/_release.yml
vendored
@@ -1,5 +1,5 @@
|
||||
name: Release
|
||||
run-name: Release ${{ inputs.working-directory }} by @${{ github.actor }}
|
||||
name: '🚀 Package Release'
|
||||
run-name: '🚀 Release ${{ inputs.working-directory }} by @${{ github.actor }}'
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
@@ -18,7 +18,7 @@ on:
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
description: "Release from a non-master branch (danger!)"
|
||||
description: "Release from a non-master branch (danger!) - Only use for hotfixes"
|
||||
|
||||
env:
|
||||
PYTHON_VERSION: "3.11"
|
||||
@@ -26,6 +26,8 @@ env:
|
||||
UV_NO_SYNC: "true"
|
||||
|
||||
jobs:
|
||||
# Build the distribution package and extract version info
|
||||
# Runs in isolated environment with minimal permissions for security
|
||||
build:
|
||||
if: github.ref == 'refs/heads/master' || inputs.dangerous-nonmaster-release
|
||||
environment: Scheduled testing
|
||||
@@ -340,7 +342,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
partner: [openai]
|
||||
partner: [openai, anthropic]
|
||||
fail-fast: false # Continue testing other partners if one fails
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
|
||||
22
.github/workflows/_test.yml
vendored
22
.github/workflows/_test.yml
vendored
@@ -1,4 +1,6 @@
|
||||
name: test
|
||||
name: '🧪 Unit Testing'
|
||||
# Runs unit tests with both current and minimum supported dependency versions
|
||||
# to ensure compatibility across the supported range
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
@@ -20,31 +22,33 @@ env:
|
||||
UV_NO_SYNC: "true"
|
||||
|
||||
jobs:
|
||||
# Main test job - runs unit tests with current deps, then retests with minimum versions
|
||||
build:
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
name: "make test #${{ inputs.python-version }}"
|
||||
name: 'Python ${{ inputs.python-version }}'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: '📋 Checkout Code'
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ inputs.python-version }} + uv
|
||||
- name: '🐍 Set up Python ${{ inputs.python-version }} + UV'
|
||||
uses: "./.github/actions/uv_setup"
|
||||
id: setup-python
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
- name: Install dependencies
|
||||
- name: '📦 Install Test Dependencies'
|
||||
shell: bash
|
||||
run: uv sync --group test --dev
|
||||
|
||||
- name: Run core tests
|
||||
- name: '🧪 Run Core Unit Tests'
|
||||
shell: bash
|
||||
run: |
|
||||
make test
|
||||
|
||||
- name: Get minimum versions
|
||||
- name: '🔍 Calculate Minimum Dependency Versions'
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
id: min-version
|
||||
shell: bash
|
||||
@@ -55,7 +59,7 @@ jobs:
|
||||
echo "min-versions=$min_versions" >> "$GITHUB_OUTPUT"
|
||||
echo "min-versions=$min_versions"
|
||||
|
||||
- name: Run unit tests with minimum dependency versions
|
||||
- name: '🧪 Run Tests with Minimum Dependencies'
|
||||
if: ${{ steps.min-version.outputs.min-versions != '' }}
|
||||
env:
|
||||
MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
|
||||
@@ -64,7 +68,7 @@ jobs:
|
||||
make tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: Ensure the tests did not create any additional files
|
||||
- name: '🧹 Verify Clean Working Directory'
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
17
.github/workflows/_test_doc_imports.yml
vendored
17
.github/workflows/_test_doc_imports.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: test_doc_imports
|
||||
name: '📑 Documentation Import Testing'
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
@@ -18,29 +18,30 @@ jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
name: "check doc imports #${{ inputs.python-version }}"
|
||||
name: '🔍 Check Doc Imports (Python ${{ inputs.python-version }})'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: '📋 Checkout Code'
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ inputs.python-version }} + uv
|
||||
- name: '🐍 Set up Python ${{ inputs.python-version }} + UV'
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
- name: '📦 Install Test Dependencies'
|
||||
shell: bash
|
||||
run: uv sync --group test
|
||||
|
||||
- name: Install langchain editable
|
||||
- name: '📦 Install LangChain in Editable Mode'
|
||||
run: |
|
||||
VIRTUAL_ENV=.venv uv pip install langchain-experimental langchain-community -e libs/core libs/langchain
|
||||
|
||||
- name: Check doc imports
|
||||
- name: '🔍 Validate Documentation Import Statements'
|
||||
shell: bash
|
||||
run: |
|
||||
uv run python docs/scripts/check_imports.py
|
||||
|
||||
- name: Ensure the test did not create any additional files
|
||||
- name: '🧹 Verify Clean Working Directory'
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
17
.github/workflows/_test_pydantic.yml
vendored
17
.github/workflows/_test_pydantic.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: test pydantic intermediate versions
|
||||
name: '🐍 Pydantic Version Testing'
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
@@ -31,29 +31,30 @@ jobs:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
name: "make test # pydantic: ~=${{ inputs.pydantic-version }}, python: ${{ inputs.python-version }}, "
|
||||
name: 'Pydantic ~=${{ inputs.pydantic-version }}'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: '📋 Checkout Code'
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ inputs.python-version }} + uv
|
||||
- name: '🐍 Set up Python ${{ inputs.python-version }} + UV'
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
- name: '📦 Install Test Dependencies'
|
||||
shell: bash
|
||||
run: uv sync --group test
|
||||
|
||||
- name: Overwrite pydantic version
|
||||
- name: '🔄 Install Specific Pydantic Version'
|
||||
shell: bash
|
||||
run: VIRTUAL_ENV=.venv uv pip install pydantic~=${{ inputs.pydantic-version }}
|
||||
|
||||
- name: Run core tests
|
||||
- name: '🧪 Run Core Tests'
|
||||
shell: bash
|
||||
run: |
|
||||
make test
|
||||
|
||||
- name: Ensure the tests did not create any additional files
|
||||
- name: '🧹 Verify Clean Working Directory'
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
10
.github/workflows/_test_release.yml
vendored
10
.github/workflows/_test_release.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: test-release
|
||||
name: '🧪 Test Release Package'
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python + uv
|
||||
- name: '🐍 Set up Python + UV'
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
@@ -45,17 +45,17 @@ jobs:
|
||||
# > It is strongly advised to separate jobs for building [...]
|
||||
# > from the publish job.
|
||||
# https://github.com/pypa/gh-action-pypi-publish#non-goals
|
||||
- name: Build project for distribution
|
||||
- name: '📦 Build Project for Distribution'
|
||||
run: uv build
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: Upload build
|
||||
- name: '⬆️ Upload Build Artifacts'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: test-dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
|
||||
- name: Check Version
|
||||
- name: '🔍 Extract Version Information'
|
||||
id: check-version
|
||||
shell: python
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
44
.github/workflows/api_doc_build.yml
vendored
44
.github/workflows/api_doc_build.yml
vendored
@@ -1,17 +1,20 @@
|
||||
name: API Docs Build
|
||||
name: '📚 API Documentation Build'
|
||||
# Runs daily or can be triggered manually for immediate updates
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 13 * * *'
|
||||
- cron: '0 13 * * *' # Daily at 1PM UTC
|
||||
env:
|
||||
PYTHON_VERSION: "3.11"
|
||||
|
||||
jobs:
|
||||
# Only runs on main repository to prevent unnecessary builds on forks
|
||||
build:
|
||||
if: github.repository == 'langchain-ai/langchain' || github.event_name != 'schedule'
|
||||
runs-on: ubuntu-latest
|
||||
permissions: write-all
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
@@ -22,7 +25,7 @@ jobs:
|
||||
path: langchain-api-docs-html
|
||||
token: ${{ secrets.TOKEN_GITHUB_API_DOCS_HTML }}
|
||||
|
||||
- name: Get repos with yq
|
||||
- name: '📋 Extract Repository List with yq'
|
||||
id: get-unsorted-repos
|
||||
uses: mikefarah/yq@master
|
||||
with:
|
||||
@@ -41,7 +44,7 @@ jobs:
|
||||
| .repo
|
||||
' langchain/libs/packages.yml
|
||||
|
||||
- name: Parse YAML and checkout repos
|
||||
- name: '📋 Parse YAML & Checkout Repositories'
|
||||
env:
|
||||
REPOS_UNSORTED: ${{ steps.get-unsorted-repos.outputs.result }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
@@ -49,48 +52,59 @@ jobs:
|
||||
# Get unique repositories
|
||||
REPOS=$(echo "$REPOS_UNSORTED" | sort -u)
|
||||
|
||||
# Checkout each unique repository that is in langchain-ai org
|
||||
# Checkout each unique repository
|
||||
for repo in $REPOS; do
|
||||
# Validate repository format (allow any org with proper format)
|
||||
if [[ ! "$repo" =~ ^[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+$ ]]; then
|
||||
echo "Error: Invalid repository format: $repo"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
REPO_NAME=$(echo $repo | cut -d'/' -f2)
|
||||
|
||||
# Additional validation for repo name
|
||||
if [[ ! "$REPO_NAME" =~ ^[a-zA-Z0-9_.-]+$ ]]; then
|
||||
echo "Error: Invalid repository name: $REPO_NAME"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Checking out $repo to $REPO_NAME"
|
||||
git clone --depth 1 https://github.com/$repo.git $REPO_NAME
|
||||
done
|
||||
|
||||
- name: Setup Python ${{ env.PYTHON_VERSION }}
|
||||
- name: '🐍 Setup Python ${{ env.PYTHON_VERSION }}'
|
||||
uses: actions/setup-python@v5
|
||||
id: setup-python
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install initial py deps
|
||||
- name: '📦 Install Initial Python Dependencies'
|
||||
working-directory: langchain
|
||||
run: |
|
||||
python -m pip install -U uv
|
||||
python -m uv pip install --upgrade --no-cache-dir pip setuptools pyyaml
|
||||
|
||||
- name: Move libs
|
||||
- name: '📦 Organize Library Directories'
|
||||
run: python langchain/.github/scripts/prep_api_docs_build.py
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Rm old html
|
||||
- name: '🧹 Remove Old HTML Files'
|
||||
run:
|
||||
rm -rf langchain-api-docs-html/api_reference_build/html
|
||||
|
||||
- name: Install dependencies
|
||||
- name: '📦 Install Documentation Dependencies'
|
||||
working-directory: langchain
|
||||
run: |
|
||||
python -m uv pip install $(ls ./libs/partners | xargs -I {} echo "./libs/partners/{}") --overrides ./docs/vercel_overrides.txt
|
||||
python -m uv pip install libs/core libs/langchain libs/text-splitters libs/community libs/experimental libs/standard-tests
|
||||
python -m uv pip install -r docs/api_reference/requirements.txt
|
||||
|
||||
- name: Set Git config
|
||||
- name: '🔧 Configure Git Settings'
|
||||
working-directory: langchain
|
||||
run: |
|
||||
git config --local user.email "actions@github.com"
|
||||
git config --local user.name "Github Actions"
|
||||
|
||||
- name: Build docs
|
||||
- name: '📚 Build API Documentation'
|
||||
working-directory: langchain
|
||||
run: |
|
||||
python docs/api_reference/create_api_rst.py
|
||||
|
||||
8
.github/workflows/check-broken-links.yml
vendored
8
.github/workflows/check-broken-links.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Check Broken Links
|
||||
name: '🔗 Check Broken Links'
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -14,15 +14,15 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Use Node.js 18.x
|
||||
- name: '🟢 Setup Node.js 18.x'
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 18.x
|
||||
cache: "yarn"
|
||||
cache-dependency-path: ./docs/yarn.lock
|
||||
- name: Install dependencies
|
||||
- name: '📦 Install Node Dependencies'
|
||||
run: yarn install --immutable --mode=skip-build
|
||||
working-directory: ./docs
|
||||
- name: Check broken links
|
||||
- name: '🔍 Scan Documentation for Broken Links'
|
||||
run: yarn check-broken-links
|
||||
working-directory: ./docs
|
||||
|
||||
6
.github/workflows/check_core_versions.yml
vendored
6
.github/workflows/check_core_versions.yml
vendored
@@ -1,4 +1,6 @@
|
||||
name: Check `core` Version Equality
|
||||
name: '🔍 Check `core` Version Equality'
|
||||
# Ensures version numbers in pyproject.toml and version.py stay in sync
|
||||
# Prevents releases with mismatched version numbers
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -16,7 +18,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Check version equality
|
||||
- name: '✅ Verify pyproject.toml & version.py Match'
|
||||
run: |
|
||||
PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' libs/core/pyproject.toml)
|
||||
VERSION_PY_VERSION=$(grep -Po '(?<=^VERSION = ")[^"]*' libs/core/langchain_core/version.py)
|
||||
|
||||
42
.github/workflows/check_diffs.yml
vendored
42
.github/workflows/check_diffs.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: CI
|
||||
name: '🔧 CI'
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -6,6 +6,7 @@ on:
|
||||
pull_request:
|
||||
merge_group:
|
||||
|
||||
# Optimizes CI performance by canceling redundant workflow runs
|
||||
# If another push to the same PR or branch happens while this workflow is still running,
|
||||
# cancel the earlier run in favor of the next run.
|
||||
#
|
||||
@@ -24,16 +25,23 @@ env:
|
||||
UV_NO_SYNC: "true"
|
||||
|
||||
jobs:
|
||||
# This job analyzes which files changed and creates a dynamic test matrix
|
||||
# to only run tests/lints for the affected packages, improving CI efficiency
|
||||
build:
|
||||
name: 'Detect Changes & Set Matrix'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
- name: '📋 Checkout Code'
|
||||
uses: actions/checkout@v4
|
||||
- name: '🐍 Setup Python 3.11'
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
- id: files
|
||||
- name: '📂 Get Changed Files'
|
||||
id: files
|
||||
uses: Ana06/get-changed-files@v2.3.0
|
||||
- id: set-matrix
|
||||
- name: '🔍 Analyze Changed Files & Generate Build Matrix'
|
||||
id: set-matrix
|
||||
run: |
|
||||
python -m pip install packaging requests
|
||||
python .github/scripts/check_diff.py ${{ steps.files.outputs.all }} >> $GITHUB_OUTPUT
|
||||
@@ -45,8 +53,8 @@ jobs:
|
||||
dependencies: ${{ steps.set-matrix.outputs.dependencies }}
|
||||
test-doc-imports: ${{ steps.set-matrix.outputs.test-doc-imports }}
|
||||
test-pydantic: ${{ steps.set-matrix.outputs.test-pydantic }}
|
||||
# Run linting only on packages that have changed files
|
||||
lint:
|
||||
name: cd ${{ matrix.job-configs.working-directory }}
|
||||
needs: [ build ]
|
||||
if: ${{ needs.build.outputs.lint != '[]' }}
|
||||
strategy:
|
||||
@@ -59,8 +67,8 @@ jobs:
|
||||
python-version: ${{ matrix.job-configs.python-version }}
|
||||
secrets: inherit
|
||||
|
||||
# Run unit tests only on packages that have changed files
|
||||
test:
|
||||
name: cd ${{ matrix.job-configs.working-directory }}
|
||||
needs: [ build ]
|
||||
if: ${{ needs.build.outputs.test != '[]' }}
|
||||
strategy:
|
||||
@@ -73,8 +81,8 @@ jobs:
|
||||
python-version: ${{ matrix.job-configs.python-version }}
|
||||
secrets: inherit
|
||||
|
||||
# Test compatibility with different Pydantic versions for affected packages
|
||||
test-pydantic:
|
||||
name: cd ${{ matrix.job-configs.working-directory }}
|
||||
needs: [ build ]
|
||||
if: ${{ needs.build.outputs.test-pydantic != '[]' }}
|
||||
strategy:
|
||||
@@ -95,12 +103,12 @@ jobs:
|
||||
job-configs: ${{ fromJson(needs.build.outputs.test-doc-imports) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_test_doc_imports.yml
|
||||
secrets: inherit
|
||||
with:
|
||||
python-version: ${{ matrix.job-configs.python-version }}
|
||||
secrets: inherit
|
||||
|
||||
# Verify integration tests compile without actually running them (faster feedback)
|
||||
compile-integration-tests:
|
||||
name: cd ${{ matrix.job-configs.working-directory }}
|
||||
needs: [ build ]
|
||||
if: ${{ needs.build.outputs.compile-integration-tests != '[]' }}
|
||||
strategy:
|
||||
@@ -113,8 +121,9 @@ jobs:
|
||||
python-version: ${{ matrix.job-configs.python-version }}
|
||||
secrets: inherit
|
||||
|
||||
# Run extended test suites that require additional dependencies
|
||||
extended-tests:
|
||||
name: "cd ${{ matrix.job-configs.working-directory }} / make extended_tests #${{ matrix.job-configs.python-version }}"
|
||||
name: 'Extended Tests'
|
||||
needs: [ build ]
|
||||
if: ${{ needs.build.outputs.extended-tests != '[]' }}
|
||||
strategy:
|
||||
@@ -130,12 +139,12 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.job-configs.python-version }} + uv
|
||||
- name: '🐍 Set up Python ${{ matrix.job-configs.python-version }} + UV'
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.job-configs.python-version }}
|
||||
|
||||
- name: Install dependencies and run extended tests
|
||||
- name: '📦 Install Dependencies & Run Extended Tests'
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Running extended tests, installing dependencies with uv..."
|
||||
@@ -144,7 +153,7 @@ jobs:
|
||||
VIRTUAL_ENV=.venv uv pip install -r extended_testing_deps.txt
|
||||
VIRTUAL_ENV=.venv make extended_tests
|
||||
|
||||
- name: Ensure the tests did not create any additional files
|
||||
- name: '🧹 Verify Clean Working Directory'
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
@@ -156,8 +165,9 @@ jobs:
|
||||
# and `set -e` above will cause the step to fail.
|
||||
echo "$STATUS" | grep 'nothing to commit, working tree clean'
|
||||
|
||||
# Final status check - ensures all required jobs passed before allowing merge
|
||||
ci_success:
|
||||
name: "CI Success"
|
||||
name: '✅ CI Success'
|
||||
needs: [build, lint, test, compile-integration-tests, extended-tests, test-doc-imports, test-pydantic]
|
||||
if: |
|
||||
always()
|
||||
@@ -167,7 +177,7 @@ jobs:
|
||||
RESULTS_JSON: ${{ toJSON(needs.*.result) }}
|
||||
EXIT_CODE: ${{!contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') && '0' || '1'}}
|
||||
steps:
|
||||
- name: "CI Success"
|
||||
- name: '🎉 All Checks Passed'
|
||||
run: |
|
||||
echo $JOBS_JSON
|
||||
echo $RESULTS_JSON
|
||||
|
||||
4
.github/workflows/check_new_docs.yml
vendored
4
.github/workflows/check_new_docs.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Integration Docs Lint
|
||||
name: '📑 Integration Docs Lint'
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -33,6 +33,6 @@ jobs:
|
||||
*.ipynb
|
||||
*.md
|
||||
*.mdx
|
||||
- name: Check new docs
|
||||
- name: '🔍 Check New Documentation Templates'
|
||||
run: |
|
||||
python docs/scripts/check_templates.py ${{ steps.files.outputs.added }}
|
||||
|
||||
35
.github/workflows/codespell.yml
vendored
35
.github/workflows/codespell.yml
vendored
@@ -1,35 +0,0 @@
|
||||
name: CI / cd . / make spell_check
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master, v0.1, v0.2]
|
||||
pull_request:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
codespell:
|
||||
name: (Check for spelling errors)
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
pip install toml
|
||||
|
||||
- name: Extract Ignore Words List
|
||||
run: |
|
||||
# Use a Python script to extract the ignore words list from pyproject.toml
|
||||
python .github/workflows/extract_ignored_words_list.py
|
||||
id: extract_ignore_words
|
||||
|
||||
# - name: Codespell
|
||||
# uses: codespell-project/actions-codespell@v2
|
||||
# with:
|
||||
# skip: guide_imports.json,*.ambr,./cookbook/data/imdb_top_1000.csv,*.lock
|
||||
# ignore_words_list: ${{ steps.extract_ignore_words.outputs.ignore_words_list }}
|
||||
# exclude_file: ./.github/workflows/codespell-exclude
|
||||
10
.github/workflows/codspeed.yml
vendored
10
.github/workflows/codspeed.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: CodSpeed
|
||||
name: '⚡ CodSpeed'
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -18,7 +18,7 @@ env:
|
||||
|
||||
jobs:
|
||||
codspeed:
|
||||
name: Run benchmarks
|
||||
name: 'Benchmark'
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -38,7 +38,7 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
# We have to use 3.12 as 3.13 is not yet supported
|
||||
- name: Install uv
|
||||
- name: '📦 Install UV Package Manager'
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
python-version: "3.12"
|
||||
@@ -47,11 +47,11 @@ jobs:
|
||||
with:
|
||||
python-version: "3.12"
|
||||
|
||||
- name: Install dependencies
|
||||
- name: '📦 Install Test Dependencies'
|
||||
run: uv sync --group test
|
||||
working-directory: ${{ matrix.working-directory }}
|
||||
|
||||
- name: Run benchmarks ${{ matrix.working-directory }}
|
||||
- name: '⚡ Run Benchmarks: ${{ matrix.working-directory }}'
|
||||
uses: CodSpeedHQ/action@v3
|
||||
with:
|
||||
token: ${{ secrets.CODSPEED_TOKEN }}
|
||||
|
||||
6
.github/workflows/people.yml
vendored
6
.github/workflows/people.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: LangChain People
|
||||
name: '👥 LangChain People'
|
||||
|
||||
on:
|
||||
schedule:
|
||||
@@ -14,13 +14,13 @@ jobs:
|
||||
permissions:
|
||||
contents: write
|
||||
steps:
|
||||
- name: Dump GitHub context
|
||||
- name: '📋 Dump GitHub Context'
|
||||
env:
|
||||
GITHUB_CONTEXT: ${{ toJson(github) }}
|
||||
run: echo "$GITHUB_CONTEXT"
|
||||
- uses: actions/checkout@v4
|
||||
# Ref: https://github.com/actions/runner/issues/2033
|
||||
- name: Fix git safe.directory in container
|
||||
- name: '🔧 Fix Git Safe Directory in Container'
|
||||
run: mkdir -p /home/runner/work/_temp/_github_home && printf "[safe]\n\tdirectory = /github/workspace" > /home/runner/work/_temp/_github_home/.gitconfig
|
||||
- uses: ./.github/actions/people
|
||||
with:
|
||||
|
||||
9
.github/workflows/pr_lint.yml
vendored
9
.github/workflows/pr_lint.yml
vendored
@@ -4,6 +4,7 @@
|
||||
# Purpose:
|
||||
# Enforces Conventional Commits format for pull request titles to maintain a
|
||||
# clear, consistent, and machine-readable change history across our repository.
|
||||
# This helps with automated changelog generation and semantic versioning.
|
||||
#
|
||||
# Enforced Commit Message Format (Conventional Commits 1.0.0):
|
||||
# <type>[optional scope]: <description>
|
||||
@@ -45,7 +46,7 @@
|
||||
# • Conventional Commits spec: https://www.conventionalcommits.org/en/v1.0.0/
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
name: PR Title Lint
|
||||
name: '🏷️ PR Title Lint'
|
||||
|
||||
permissions:
|
||||
pull-requests: read
|
||||
@@ -55,11 +56,12 @@ on:
|
||||
types: [opened, edited, synchronize]
|
||||
|
||||
jobs:
|
||||
# Validates that PR title follows Conventional Commits specification
|
||||
lint-pr-title:
|
||||
name: Validate PR Title
|
||||
name: 'Validate PR Title Format'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Validate PR Title
|
||||
- name: '✅ Validate Conventional Commits Format'
|
||||
uses: amannn/action-semantic-pull-request@v5
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
@@ -81,6 +83,7 @@ jobs:
|
||||
core
|
||||
cli
|
||||
langchain
|
||||
langchain_v1
|
||||
standard-tests
|
||||
text-splitters
|
||||
docs
|
||||
|
||||
18
.github/workflows/run_notebooks.yml
vendored
18
.github/workflows/run_notebooks.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Run Notebooks
|
||||
name: '📝 Run Documentation Notebooks'
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -24,43 +24,43 @@ jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.repository == 'langchain-ai/langchain' || github.event_name != 'schedule'
|
||||
name: "Test docs"
|
||||
name: '📑 Test Documentation Notebooks'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python + uv
|
||||
- name: '🐍 Set up Python + UV'
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ github.event.inputs.python_version || '3.11' }}
|
||||
|
||||
- name: 'Authenticate to Google Cloud'
|
||||
- name: '🔐 Authenticate to Google Cloud'
|
||||
id: 'auth'
|
||||
uses: google-github-actions/auth@v2
|
||||
with:
|
||||
credentials_json: '${{ secrets.GOOGLE_CREDENTIALS }}'
|
||||
|
||||
- name: Configure AWS Credentials
|
||||
- name: '🔐 Configure AWS Credentials'
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ secrets.AWS_REGION }}
|
||||
|
||||
- name: Install dependencies
|
||||
- name: '📦 Install Dependencies'
|
||||
run: |
|
||||
uv sync --group dev --group test
|
||||
|
||||
- name: Pre-download files
|
||||
- name: '📦 Pre-download Test Files'
|
||||
run: |
|
||||
uv run python docs/scripts/cache_data.py
|
||||
curl -s https://raw.githubusercontent.com/lerocha/chinook-database/master/ChinookDatabase/DataSources/Chinook_Sqlite.sql | sqlite3 docs/docs/how_to/Chinook.db
|
||||
cp docs/docs/how_to/Chinook.db docs/docs/tutorials/Chinook.db
|
||||
|
||||
- name: Prepare notebooks
|
||||
- name: '🔧 Prepare Notebooks for CI'
|
||||
run: |
|
||||
uv run python docs/scripts/prepare_notebooks_for_ci.py --comment-install-cells --working-directory ${{ github.event.inputs.working-directory || 'all' }}
|
||||
|
||||
- name: Run notebooks
|
||||
- name: '🚀 Execute Notebooks'
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
|
||||
|
||||
37
.github/workflows/scheduled_test.yml
vendored
37
.github/workflows/scheduled_test.yml
vendored
@@ -1,7 +1,7 @@
|
||||
name: Scheduled Tests
|
||||
name: '⏰ Scheduled Integration Tests'
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
|
||||
workflow_dispatch: # Allows maintainers to trigger the workflow manually in GitHub UI
|
||||
inputs:
|
||||
working-directory-force:
|
||||
type: string
|
||||
@@ -10,7 +10,7 @@ on:
|
||||
type: string
|
||||
description: "Python version to use - defaults to 3.9 and 3.11 in matrix - example value: 3.9"
|
||||
schedule:
|
||||
- cron: '0 13 * * *'
|
||||
- cron: '0 13 * * *' # Runs daily at 1PM UTC (9AM EDT/6AM PDT)
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -22,14 +22,16 @@ env:
|
||||
POETRY_LIBS: ("libs/partners/google-vertexai" "libs/partners/google-genai" "libs/partners/aws")
|
||||
|
||||
jobs:
|
||||
# Generate dynamic test matrix based on input parameters or defaults
|
||||
# Only runs on the main repo (for scheduled runs) or when manually triggered
|
||||
compute-matrix:
|
||||
if: github.repository_owner == 'langchain-ai' || github.event_name != 'schedule'
|
||||
runs-on: ubuntu-latest
|
||||
name: Compute matrix
|
||||
name: '📋 Compute Test Matrix'
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- name: Set matrix
|
||||
- name: '🔢 Generate Python & Library Matrix'
|
||||
id: set-matrix
|
||||
env:
|
||||
DEFAULT_LIBS: ${{ env.DEFAULT_LIBS }}
|
||||
@@ -50,9 +52,11 @@ jobs:
|
||||
matrix="{\"python-version\": $python_version, \"working-directory\": $working_directory}"
|
||||
echo $matrix
|
||||
echo "matrix=$matrix" >> $GITHUB_OUTPUT
|
||||
# Run integration tests against partner libraries with live API credentials
|
||||
# Tests are run with both Poetry and UV depending on the library's setup
|
||||
build:
|
||||
if: github.repository_owner == 'langchain-ai' || github.event_name != 'schedule'
|
||||
name: Python ${{ matrix.python-version }} - ${{ matrix.working-directory }}
|
||||
name: '🐍 Python ${{ matrix.python-version }}: ${{ matrix.working-directory }}'
|
||||
runs-on: ubuntu-latest
|
||||
needs: [compute-matrix]
|
||||
timeout-minutes: 20
|
||||
@@ -75,7 +79,7 @@ jobs:
|
||||
repository: langchain-ai/langchain-aws
|
||||
path: langchain-aws
|
||||
|
||||
- name: Move libs
|
||||
- name: '📦 Organize External Libraries'
|
||||
run: |
|
||||
rm -rf \
|
||||
langchain/libs/partners/google-genai \
|
||||
@@ -84,7 +88,7 @@ jobs:
|
||||
mv langchain-google/libs/vertexai langchain/libs/partners/google-vertexai
|
||||
mv langchain-aws/libs/aws langchain/libs/partners/aws
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} with poetry
|
||||
- name: '🐍 Set up Python ${{ matrix.python-version }} + Poetry'
|
||||
if: contains(env.POETRY_LIBS, matrix.working-directory)
|
||||
uses: "./langchain/.github/actions/poetry_setup"
|
||||
with:
|
||||
@@ -93,40 +97,40 @@ jobs:
|
||||
working-directory: langchain/${{ matrix.working-directory }}
|
||||
cache-key: scheduled
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + uv
|
||||
- name: '🐍 Set up Python ${{ matrix.python-version }} + UV'
|
||||
if: "!contains(env.POETRY_LIBS, matrix.working-directory)"
|
||||
uses: "./langchain/.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: 'Authenticate to Google Cloud'
|
||||
- name: '🔐 Authenticate to Google Cloud'
|
||||
id: 'auth'
|
||||
uses: google-github-actions/auth@v2
|
||||
with:
|
||||
credentials_json: '${{ secrets.GOOGLE_CREDENTIALS }}'
|
||||
|
||||
- name: Configure AWS Credentials
|
||||
- name: '🔐 Configure AWS Credentials'
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ secrets.AWS_REGION }}
|
||||
|
||||
- name: Install dependencies (poetry)
|
||||
- name: '📦 Install Dependencies (Poetry)'
|
||||
if: contains(env.POETRY_LIBS, matrix.working-directory)
|
||||
run: |
|
||||
echo "Running scheduled tests, installing dependencies with poetry..."
|
||||
cd langchain/${{ matrix.working-directory }}
|
||||
poetry install --with=test_integration,test
|
||||
|
||||
- name: Install dependencies (uv)
|
||||
- name: '📦 Install Dependencies (UV)'
|
||||
if: "!contains(env.POETRY_LIBS, matrix.working-directory)"
|
||||
run: |
|
||||
echo "Running scheduled tests, installing dependencies with uv..."
|
||||
cd langchain/${{ matrix.working-directory }}
|
||||
uv sync --group test --group test_integration
|
||||
|
||||
- name: Run integration tests
|
||||
- name: '🚀 Run Integration Tests'
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
@@ -155,14 +159,15 @@ jobs:
|
||||
cd langchain/${{ matrix.working-directory }}
|
||||
make integration_tests
|
||||
|
||||
- name: Remove external libraries
|
||||
- name: '🧹 Clean up External Libraries'
|
||||
# Clean up external libraries to avoid affecting git status check
|
||||
run: |
|
||||
rm -rf \
|
||||
langchain/libs/partners/google-genai \
|
||||
langchain/libs/partners/google-vertexai \
|
||||
langchain/libs/partners/aws
|
||||
|
||||
- name: Ensure tests did not create additional files
|
||||
- name: '🧹 Verify Clean Working Directory'
|
||||
working-directory: langchain
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,5 +1,4 @@
|
||||
.vs/
|
||||
.vscode/
|
||||
.idea/
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
|
||||
14
.markdownlint.json
Normal file
14
.markdownlint.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"MD013": false,
|
||||
"MD024": {
|
||||
"siblings_only": true
|
||||
},
|
||||
"MD025": false,
|
||||
"MD033": false,
|
||||
"MD034": false,
|
||||
"MD036": false,
|
||||
"MD041": false,
|
||||
"MD046": {
|
||||
"style": "fenced"
|
||||
}
|
||||
}
|
||||
@@ -1,111 +1,111 @@
|
||||
repos:
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: core
|
||||
name: format core
|
||||
language: system
|
||||
entry: make -C libs/core format
|
||||
files: ^libs/core/
|
||||
pass_filenames: false
|
||||
- id: langchain
|
||||
name: format langchain
|
||||
language: system
|
||||
entry: make -C libs/langchain format
|
||||
files: ^libs/langchain/
|
||||
pass_filenames: false
|
||||
- id: standard-tests
|
||||
name: format standard-tests
|
||||
language: system
|
||||
entry: make -C libs/standard-tests format
|
||||
files: ^libs/standard-tests/
|
||||
pass_filenames: false
|
||||
- id: text-splitters
|
||||
name: format text-splitters
|
||||
language: system
|
||||
entry: make -C libs/text-splitters format
|
||||
files: ^libs/text-splitters/
|
||||
pass_filenames: false
|
||||
- id: anthropic
|
||||
name: format partners/anthropic
|
||||
language: system
|
||||
entry: make -C libs/partners/anthropic format
|
||||
files: ^libs/partners/anthropic/
|
||||
pass_filenames: false
|
||||
- id: chroma
|
||||
name: format partners/chroma
|
||||
language: system
|
||||
entry: make -C libs/partners/chroma format
|
||||
files: ^libs/partners/chroma/
|
||||
pass_filenames: false
|
||||
- id: couchbase
|
||||
name: format partners/couchbase
|
||||
language: system
|
||||
entry: make -C libs/partners/couchbase format
|
||||
files: ^libs/partners/couchbase/
|
||||
pass_filenames: false
|
||||
- id: exa
|
||||
name: format partners/exa
|
||||
language: system
|
||||
entry: make -C libs/partners/exa format
|
||||
files: ^libs/partners/exa/
|
||||
pass_filenames: false
|
||||
- id: fireworks
|
||||
name: format partners/fireworks
|
||||
language: system
|
||||
entry: make -C libs/partners/fireworks format
|
||||
files: ^libs/partners/fireworks/
|
||||
pass_filenames: false
|
||||
- id: groq
|
||||
name: format partners/groq
|
||||
language: system
|
||||
entry: make -C libs/partners/groq format
|
||||
files: ^libs/partners/groq/
|
||||
pass_filenames: false
|
||||
- id: huggingface
|
||||
name: format partners/huggingface
|
||||
language: system
|
||||
entry: make -C libs/partners/huggingface format
|
||||
files: ^libs/partners/huggingface/
|
||||
pass_filenames: false
|
||||
- id: mistralai
|
||||
name: format partners/mistralai
|
||||
language: system
|
||||
entry: make -C libs/partners/mistralai format
|
||||
files: ^libs/partners/mistralai/
|
||||
pass_filenames: false
|
||||
- id: nomic
|
||||
name: format partners/nomic
|
||||
language: system
|
||||
entry: make -C libs/partners/nomic format
|
||||
files: ^libs/partners/nomic/
|
||||
pass_filenames: false
|
||||
- id: ollama
|
||||
name: format partners/ollama
|
||||
language: system
|
||||
entry: make -C libs/partners/ollama format
|
||||
files: ^libs/partners/ollama/
|
||||
pass_filenames: false
|
||||
- id: openai
|
||||
name: format partners/openai
|
||||
language: system
|
||||
entry: make -C libs/partners/openai format
|
||||
files: ^libs/partners/openai/
|
||||
pass_filenames: false
|
||||
- id: prompty
|
||||
name: format partners/prompty
|
||||
language: system
|
||||
entry: make -C libs/partners/prompty format
|
||||
files: ^libs/partners/prompty/
|
||||
pass_filenames: false
|
||||
- id: qdrant
|
||||
name: format partners/qdrant
|
||||
language: system
|
||||
entry: make -C libs/partners/qdrant format
|
||||
files: ^libs/partners/qdrant/
|
||||
pass_filenames: false
|
||||
- id: root
|
||||
name: format docs, cookbook
|
||||
language: system
|
||||
entry: make format
|
||||
files: ^(docs|cookbook)/
|
||||
pass_filenames: false
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: core
|
||||
name: format core
|
||||
language: system
|
||||
entry: make -C libs/core format
|
||||
files: ^libs/core/
|
||||
pass_filenames: false
|
||||
- id: langchain
|
||||
name: format langchain
|
||||
language: system
|
||||
entry: make -C libs/langchain format
|
||||
files: ^libs/langchain/
|
||||
pass_filenames: false
|
||||
- id: standard-tests
|
||||
name: format standard-tests
|
||||
language: system
|
||||
entry: make -C libs/standard-tests format
|
||||
files: ^libs/standard-tests/
|
||||
pass_filenames: false
|
||||
- id: text-splitters
|
||||
name: format text-splitters
|
||||
language: system
|
||||
entry: make -C libs/text-splitters format
|
||||
files: ^libs/text-splitters/
|
||||
pass_filenames: false
|
||||
- id: anthropic
|
||||
name: format partners/anthropic
|
||||
language: system
|
||||
entry: make -C libs/partners/anthropic format
|
||||
files: ^libs/partners/anthropic/
|
||||
pass_filenames: false
|
||||
- id: chroma
|
||||
name: format partners/chroma
|
||||
language: system
|
||||
entry: make -C libs/partners/chroma format
|
||||
files: ^libs/partners/chroma/
|
||||
pass_filenames: false
|
||||
- id: couchbase
|
||||
name: format partners/couchbase
|
||||
language: system
|
||||
entry: make -C libs/partners/couchbase format
|
||||
files: ^libs/partners/couchbase/
|
||||
pass_filenames: false
|
||||
- id: exa
|
||||
name: format partners/exa
|
||||
language: system
|
||||
entry: make -C libs/partners/exa format
|
||||
files: ^libs/partners/exa/
|
||||
pass_filenames: false
|
||||
- id: fireworks
|
||||
name: format partners/fireworks
|
||||
language: system
|
||||
entry: make -C libs/partners/fireworks format
|
||||
files: ^libs/partners/fireworks/
|
||||
pass_filenames: false
|
||||
- id: groq
|
||||
name: format partners/groq
|
||||
language: system
|
||||
entry: make -C libs/partners/groq format
|
||||
files: ^libs/partners/groq/
|
||||
pass_filenames: false
|
||||
- id: huggingface
|
||||
name: format partners/huggingface
|
||||
language: system
|
||||
entry: make -C libs/partners/huggingface format
|
||||
files: ^libs/partners/huggingface/
|
||||
pass_filenames: false
|
||||
- id: mistralai
|
||||
name: format partners/mistralai
|
||||
language: system
|
||||
entry: make -C libs/partners/mistralai format
|
||||
files: ^libs/partners/mistralai/
|
||||
pass_filenames: false
|
||||
- id: nomic
|
||||
name: format partners/nomic
|
||||
language: system
|
||||
entry: make -C libs/partners/nomic format
|
||||
files: ^libs/partners/nomic/
|
||||
pass_filenames: false
|
||||
- id: ollama
|
||||
name: format partners/ollama
|
||||
language: system
|
||||
entry: make -C libs/partners/ollama format
|
||||
files: ^libs/partners/ollama/
|
||||
pass_filenames: false
|
||||
- id: openai
|
||||
name: format partners/openai
|
||||
language: system
|
||||
entry: make -C libs/partners/openai format
|
||||
files: ^libs/partners/openai/
|
||||
pass_filenames: false
|
||||
- id: prompty
|
||||
name: format partners/prompty
|
||||
language: system
|
||||
entry: make -C libs/partners/prompty format
|
||||
files: ^libs/partners/prompty/
|
||||
pass_filenames: false
|
||||
- id: qdrant
|
||||
name: format partners/qdrant
|
||||
language: system
|
||||
entry: make -C libs/partners/qdrant format
|
||||
files: ^libs/partners/qdrant/
|
||||
pass_filenames: false
|
||||
- id: root
|
||||
name: format docs, cookbook
|
||||
language: system
|
||||
entry: make format
|
||||
files: ^(docs|cookbook)/
|
||||
pass_filenames: false
|
||||
|
||||
@@ -13,7 +13,7 @@ build:
|
||||
|
||||
# Build documentation in the docs/ directory with Sphinx
|
||||
sphinx:
|
||||
configuration: docs/api_reference/conf.py
|
||||
configuration: docs/api_reference/conf.py
|
||||
|
||||
# If using Sphinx, optionally build your docs in additional formats such as PDF
|
||||
formats:
|
||||
@@ -21,5 +21,5 @@ formats:
|
||||
|
||||
# Optionally declare the Python requirements required to build your docs
|
||||
python:
|
||||
install:
|
||||
- requirements: docs/api_reference/requirements.txt
|
||||
install:
|
||||
- requirements: docs/api_reference/requirements.txt
|
||||
|
||||
21
.vscode/extensions.json
vendored
Normal file
21
.vscode/extensions.json
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"ms-python.python",
|
||||
"charliermarsh.ruff",
|
||||
"ms-python.mypy-type-checker",
|
||||
"ms-toolsai.jupyter",
|
||||
"ms-toolsai.jupyter-keymap",
|
||||
"ms-toolsai.jupyter-renderers",
|
||||
"ms-toolsai.vscode-jupyter-cell-tags",
|
||||
"ms-toolsai.vscode-jupyter-slideshow",
|
||||
"yzhang.markdown-all-in-one",
|
||||
"davidanson.vscode-markdownlint",
|
||||
"bierner.markdown-mermaid",
|
||||
"bierner.markdown-preview-github-styles",
|
||||
"eamodio.gitlens",
|
||||
"github.vscode-pull-request-github",
|
||||
"github.vscode-github-actions",
|
||||
"redhat.vscode-yaml",
|
||||
"editorconfig.editorconfig",
|
||||
],
|
||||
}
|
||||
80
.vscode/settings.json
vendored
Normal file
80
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
{
|
||||
"python.analysis.include": [
|
||||
"libs/**",
|
||||
"docs/**",
|
||||
"cookbook/**"
|
||||
],
|
||||
"python.analysis.exclude": [
|
||||
"**/node_modules",
|
||||
"**/__pycache__",
|
||||
"**/.pytest_cache",
|
||||
"**/.*",
|
||||
"_dist/**",
|
||||
"docs/_build/**",
|
||||
"docs/api_reference/_build/**"
|
||||
],
|
||||
"python.analysis.autoImportCompletions": true,
|
||||
"python.analysis.typeCheckingMode": "basic",
|
||||
"python.testing.cwd": "${workspaceFolder}",
|
||||
"python.linting.enabled": true,
|
||||
"python.linting.ruffEnabled": true,
|
||||
"[python]": {
|
||||
"editor.formatOnSave": true,
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.organizeImports": "explicit",
|
||||
"source.fixAll": "explicit"
|
||||
},
|
||||
"editor.defaultFormatter": "charliermarsh.ruff"
|
||||
},
|
||||
"editor.rulers": [
|
||||
88
|
||||
],
|
||||
"editor.tabSize": 4,
|
||||
"editor.insertSpaces": true,
|
||||
"editor.trimAutoWhitespace": true,
|
||||
"files.trimTrailingWhitespace": true,
|
||||
"files.insertFinalNewline": true,
|
||||
"files.exclude": {
|
||||
"**/__pycache__": true,
|
||||
"**/.pytest_cache": true,
|
||||
"**/*.pyc": true,
|
||||
"**/.mypy_cache": true,
|
||||
"**/.ruff_cache": true,
|
||||
"_dist/**": true,
|
||||
"docs/_build/**": true,
|
||||
"docs/api_reference/_build/**": true,
|
||||
"**/node_modules": true,
|
||||
"**/.git": false
|
||||
},
|
||||
"search.exclude": {
|
||||
"**/__pycache__": true,
|
||||
"**/*.pyc": true,
|
||||
"_dist/**": true,
|
||||
"docs/_build/**": true,
|
||||
"docs/api_reference/_build/**": true,
|
||||
"**/node_modules": true,
|
||||
"**/.git": true,
|
||||
"uv.lock": true,
|
||||
"yarn.lock": true
|
||||
},
|
||||
"git.autofetch": true,
|
||||
"git.enableSmartCommit": true,
|
||||
"jupyter.askForKernelRestart": false,
|
||||
"jupyter.interactiveWindow.textEditor.executeSelection": true,
|
||||
"[markdown]": {
|
||||
"editor.wordWrap": "on",
|
||||
"editor.quickSuggestions": {
|
||||
"comments": "off",
|
||||
"strings": "off",
|
||||
"other": "off"
|
||||
}
|
||||
},
|
||||
"[yaml]": {
|
||||
"editor.tabSize": 2,
|
||||
"editor.insertSpaces": true
|
||||
},
|
||||
"[json]": {
|
||||
"editor.tabSize": 2,
|
||||
"editor.insertSpaces": true
|
||||
},
|
||||
}
|
||||
@@ -7,5 +7,5 @@ Please see the following guides for migrating LangChain code:
|
||||
* Migrating from [LangChain 0.0.x Chains](https://python.langchain.com/docs/versions/migrating_chains/)
|
||||
* Upgrade to [LangGraph Memory](https://python.langchain.com/docs/versions/migrating_memory/)
|
||||
|
||||
The [LangChain CLI](https://python.langchain.com/docs/versions/v0_3/#migrate-using-langchain-cli) can help you automatically upgrade your code to use non-deprecated imports.
|
||||
The [LangChain CLI](https://python.langchain.com/docs/versions/v0_3/#migrate-using-langchain-cli) can help you automatically upgrade your code to use non-deprecated imports.
|
||||
This will be especially helpful if you're still on either version 0.0.x or 0.1.x of LangChain.
|
||||
|
||||
67
Makefile
67
Makefile
@@ -8,9 +8,6 @@ help: Makefile
|
||||
@printf "\n\033[1mUsage: make <TARGETS> ...\033[0m\n\n\033[1mTargets:\033[0m\n\n"
|
||||
@sed -n 's/^## //p' $< | awk -F':' '{printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' | sort | sed -e 's/^/ /'
|
||||
|
||||
## all: Default target, shows help.
|
||||
all: help
|
||||
|
||||
## clean: Clean documentation and API documentation artifacts.
|
||||
clean: docs_clean api_docs_clean
|
||||
|
||||
@@ -19,49 +16,79 @@ clean: docs_clean api_docs_clean
|
||||
######################
|
||||
|
||||
## docs_build: Build the documentation.
|
||||
docs_build:
|
||||
docs_build: docs_clean
|
||||
@echo "📚 Building LangChain documentation..."
|
||||
cd docs && make build
|
||||
@echo "✅ Documentation build complete!"
|
||||
|
||||
## docs_clean: Clean the documentation build artifacts.
|
||||
docs_clean:
|
||||
@echo "🧹 Cleaning documentation artifacts..."
|
||||
cd docs && make clean
|
||||
@echo "✅ LangChain documentation cleaned"
|
||||
|
||||
## docs_linkcheck: Run linkchecker on the documentation.
|
||||
docs_linkcheck:
|
||||
uv run --no-group test linkchecker _dist/docs/ --ignore-url node_modules
|
||||
@echo "🔗 Checking documentation links..."
|
||||
@if [ -d _dist/docs ]; then \
|
||||
uv run --group test linkchecker _dist/docs/ --ignore-url node_modules; \
|
||||
else \
|
||||
echo "⚠️ Documentation not built. Run 'make docs_build' first."; \
|
||||
exit 1; \
|
||||
fi
|
||||
@echo "✅ Link check complete"
|
||||
|
||||
## api_docs_build: Build the API Reference documentation.
|
||||
api_docs_build:
|
||||
uv run --no-group test python docs/api_reference/create_api_rst.py
|
||||
cd docs/api_reference && uv run --no-group test make html
|
||||
uv run --no-group test python docs/api_reference/scripts/custom_formatter.py docs/api_reference/_build/html/
|
||||
api_docs_build: clean
|
||||
@echo "📖 Building API Reference documentation..."
|
||||
uv pip install -e libs/cli
|
||||
uv run --group docs python docs/api_reference/create_api_rst.py
|
||||
cd docs/api_reference && uv run --group docs make html
|
||||
uv run --group docs python docs/api_reference/scripts/custom_formatter.py docs/api_reference/_build/html/
|
||||
@echo "✅ API documentation built"
|
||||
@echo "🌐 Opening documentation in browser..."
|
||||
open docs/api_reference/_build/html/reference.html
|
||||
|
||||
API_PKG ?= text-splitters
|
||||
|
||||
api_docs_quick_preview:
|
||||
uv run --no-group test python docs/api_reference/create_api_rst.py $(API_PKG)
|
||||
cd docs/api_reference && uv run make html
|
||||
uv run --no-group test python docs/api_reference/scripts/custom_formatter.py docs/api_reference/_build/html/
|
||||
api_docs_quick_preview: clean
|
||||
@echo "⚡ Building quick API preview for $(API_PKG)..."
|
||||
uv run --group docs python docs/api_reference/create_api_rst.py $(API_PKG)
|
||||
cd docs/api_reference && uv run --group docs make html
|
||||
uv run --group docs python docs/api_reference/scripts/custom_formatter.py docs/api_reference/_build/html/
|
||||
@echo "🌐 Opening preview in browser..."
|
||||
open docs/api_reference/_build/html/reference.html
|
||||
|
||||
## api_docs_clean: Clean the API Reference documentation build artifacts.
|
||||
api_docs_clean:
|
||||
@echo "🧹 Cleaning API documentation artifacts..."
|
||||
find ./docs/api_reference -name '*_api_reference.rst' -delete
|
||||
git clean -fdX ./docs/api_reference
|
||||
rm -f docs/api_reference/index.md
|
||||
|
||||
@echo "✅ API documentation cleaned"
|
||||
|
||||
## api_docs_linkcheck: Run linkchecker on the API Reference documentation.
|
||||
api_docs_linkcheck:
|
||||
uv run --no-group test linkchecker docs/api_reference/_build/html/index.html
|
||||
@echo "🔗 Checking API documentation links..."
|
||||
@if [ -f docs/api_reference/_build/html/index.html ]; then \
|
||||
uv run --group test linkchecker docs/api_reference/_build/html/index.html; \
|
||||
else \
|
||||
echo "⚠️ API documentation not built. Run 'make api_docs_build' first."; \
|
||||
exit 1; \
|
||||
fi
|
||||
@echo "✅ API link check complete"
|
||||
|
||||
## spell_check: Run codespell on the project.
|
||||
spell_check:
|
||||
uv run --no-group test codespell --toml pyproject.toml
|
||||
@echo "✏️ Checking spelling across project..."
|
||||
uv run --group codespell codespell --toml pyproject.toml
|
||||
@echo "✅ Spell check complete"
|
||||
|
||||
## spell_fix: Run codespell on the project and fix the errors.
|
||||
spell_fix:
|
||||
uv run --no-group test codespell --toml pyproject.toml -w
|
||||
@echo "✏️ Fixing spelling errors across project..."
|
||||
uv run --group codespell codespell --toml pyproject.toml -w
|
||||
@echo "✅ Spelling errors fixed"
|
||||
|
||||
######################
|
||||
# LINTING AND FORMATTING
|
||||
@@ -69,6 +96,7 @@ spell_fix:
|
||||
|
||||
## lint: Run linting on the project.
|
||||
lint lint_package lint_tests:
|
||||
@echo "🔍 Running code linting and checks..."
|
||||
uv run --group lint ruff check docs cookbook
|
||||
uv run --group lint ruff format docs cookbook cookbook --diff
|
||||
git --no-pager grep 'from langchain import' docs cookbook | grep -vE 'from langchain import (hub)' && echo "Error: no importing langchain from root in docs, except for hub" && exit 1 || exit 0
|
||||
@@ -76,11 +104,16 @@ lint lint_package lint_tests:
|
||||
git --no-pager grep 'api.python.langchain.com' -- docs/docs ':!docs/docs/additional_resources/arxiv_references.mdx' ':!docs/docs/integrations/document_loaders/sitemap.ipynb' || exit 0 && \
|
||||
echo "Error: you should link python.langchain.com/api_reference, not api.python.langchain.com in the docs" && \
|
||||
exit 1
|
||||
@echo "✅ Linting complete"
|
||||
|
||||
## format: Format the project files.
|
||||
format format_diff:
|
||||
@echo "🎨 Formatting project files..."
|
||||
uv run --group lint ruff format docs cookbook
|
||||
uv run --group lint ruff check --fix docs cookbook
|
||||
@echo "✅ Formatting complete"
|
||||
|
||||
update-package-downloads:
|
||||
@echo "📊 Updating package download statistics..."
|
||||
uv run python docs/scripts/packages_yml_get_downloads.py
|
||||
@echo "✅ Package downloads updated"
|
||||
|
||||
@@ -40,9 +40,10 @@ controllable agent workflows.
|
||||
## Why use LangChain?
|
||||
|
||||
LangChain helps developers build applications powered by LLMs through a standard
|
||||
interface for models, embeddings, vector stores, and more.
|
||||
interface for models, embeddings, vector stores, and more.
|
||||
|
||||
Use LangChain for:
|
||||
|
||||
- **Real-time data augmentation**. Easily connect LLMs to diverse data sources and
|
||||
external / internal systems, drawing from LangChain’s vast library of integrations with
|
||||
model providers, tools, vector stores, retrievers, and more.
|
||||
@@ -52,9 +53,10 @@ frontier evolves, adapt quickly — LangChain’s abstractions keep you moving w
|
||||
losing momentum.
|
||||
|
||||
## LangChain’s ecosystem
|
||||
|
||||
While the LangChain framework can be used standalone, it also integrates seamlessly
|
||||
with any LangChain product, giving developers a full suite of tools when building LLM
|
||||
applications.
|
||||
applications.
|
||||
|
||||
To improve your LLM application development, pair LangChain with:
|
||||
|
||||
@@ -73,6 +75,7 @@ teams — and iterate quickly with visual prototyping in
|
||||
[LangGraph Studio](https://langchain-ai.github.io/langgraph/concepts/langgraph_studio/).
|
||||
|
||||
## Additional resources
|
||||
|
||||
- [Tutorials](https://python.langchain.com/docs/tutorials/): Simple walkthroughs with
|
||||
guided examples on getting started with LangChain.
|
||||
- [How-to Guides](https://python.langchain.com/docs/how_to/): Quick, actionable code
|
||||
|
||||
35
SECURITY.md
35
SECURITY.md
@@ -11,6 +11,7 @@ When building such applications developers should remember to follow good securi
|
||||
* [**Defense in Depth**](https://en.wikipedia.org/wiki/Defense_in_depth_(computing)): No security technique is perfect. Fine-tuning and good chain design can reduce, but not eliminate, the odds that a Large Language Model (LLM) may make a mistake. It's best to combine multiple layered security approaches rather than relying on any single layer of defense to ensure security. For example: use both read-only permissions and sandboxing to ensure that LLMs are only able to access data that is explicitly meant for them to use.
|
||||
|
||||
Risks of not doing so include, but are not limited to:
|
||||
|
||||
* Data corruption or loss.
|
||||
* Unauthorized access to confidential information.
|
||||
* Compromised performance or availability of critical resources.
|
||||
@@ -27,10 +28,10 @@ design and secure your applications.
|
||||
|
||||
## Reporting OSS Vulnerabilities
|
||||
|
||||
LangChain is partnered with [huntr by Protect AI](https://huntr.com/) to provide
|
||||
a bounty program for our open source projects.
|
||||
LangChain is partnered with [huntr by Protect AI](https://huntr.com/) to provide
|
||||
a bounty program for our open source projects.
|
||||
|
||||
Please report security vulnerabilities associated with the LangChain
|
||||
Please report security vulnerabilities associated with the LangChain
|
||||
open source projects [here](https://huntr.com/bounties/disclose/?target=https%3A%2F%2Fgithub.com%2Flangchain-ai%2Flangchain&validSearch=true).
|
||||
|
||||
Before reporting a vulnerability, please review:
|
||||
@@ -45,39 +46,39 @@ Before reporting a vulnerability, please review:
|
||||
|
||||
The following packages and repositories are eligible for bug bounties:
|
||||
|
||||
- langchain-core
|
||||
- langchain (see exceptions)
|
||||
- langchain-community (see exceptions)
|
||||
- langgraph
|
||||
- langserve
|
||||
* langchain-core
|
||||
* langchain (see exceptions)
|
||||
* langchain-community (see exceptions)
|
||||
* langgraph
|
||||
* langserve
|
||||
|
||||
### Out of Scope Targets
|
||||
|
||||
All out of scope targets defined by huntr as well as:
|
||||
|
||||
- **langchain-experimental**: This repository is for experimental code and is not
|
||||
* **langchain-experimental**: This repository is for experimental code and is not
|
||||
eligible for bug bounties (see [package warning](https://pypi.org/project/langchain-experimental/)), bug reports to it will be marked as interesting or waste of
|
||||
time and published with no bounty attached.
|
||||
- **tools**: Tools in either langchain or langchain-community are not eligible for bug
|
||||
* **tools**: Tools in either langchain or langchain-community are not eligible for bug
|
||||
bounties. This includes the following directories
|
||||
- libs/langchain/langchain/tools
|
||||
- libs/community/langchain_community/tools
|
||||
- Please review the [Best Practices](#best-practices)
|
||||
* libs/langchain/langchain/tools
|
||||
* libs/community/langchain_community/tools
|
||||
* Please review the [Best Practices](#best-practices)
|
||||
for more details, but generally tools interact with the real world. Developers are
|
||||
expected to understand the security implications of their code and are responsible
|
||||
for the security of their tools.
|
||||
- Code documented with security notices. This will be decided done on a case by
|
||||
* Code documented with security notices. This will be decided on a case by
|
||||
case basis, but likely will not be eligible for a bounty as the code is already
|
||||
documented with guidelines for developers that should be followed for making their
|
||||
application secure.
|
||||
- Any LangSmith related repositories or APIs (see [Reporting LangSmith Vulnerabilities](#reporting-langsmith-vulnerabilities)).
|
||||
* Any LangSmith related repositories or APIs (see [Reporting LangSmith Vulnerabilities](#reporting-langsmith-vulnerabilities)).
|
||||
|
||||
## Reporting LangSmith Vulnerabilities
|
||||
|
||||
Please report security vulnerabilities associated with LangSmith by email to `security@langchain.dev`.
|
||||
|
||||
- LangSmith site: https://smith.langchain.com
|
||||
- SDK client: https://github.com/langchain-ai/langsmith-sdk
|
||||
* LangSmith site: [https://smith.langchain.com](https://smith.langchain.com)
|
||||
* SDK client: [https://github.com/langchain-ai/langsmith-sdk](https://github.com/langchain-ai/langsmith-sdk)
|
||||
|
||||
### Other Security Concerns
|
||||
|
||||
|
||||
@@ -20,11 +20,7 @@
|
||||
"cell_type": "markdown",
|
||||
"id": "5939a54c-3198-4ba4-8346-1cc088c473c0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### You can embed text in the same VectorDB space as images, and retreive text and images as well based on input text or image.\n",
|
||||
"##### Following link demonstrates that.\n",
|
||||
"<a> https://python.langchain.com/v0.2/docs/integrations/text_embedding/open_clip/ </a>"
|
||||
]
|
||||
"source": "##### You can embed text in the same VectorDB space as images, and retrieve text and images as well based on input text or image.\n##### Following link demonstrates that.\n<a> https://python.langchain.com/v0.2/docs/integrations/text_embedding/open_clip/ </a>"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -600,4 +596,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
@@ -552,9 +552,7 @@
|
||||
"cell_type": "markdown",
|
||||
"id": "77deb6a0-0950-450a-916a-f2a029676c20",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Appending all retreived documents in a single document**"
|
||||
]
|
||||
"source": "**Appending all retrieved documents in a single document**"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -758,4 +756,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,9 @@
|
||||
# we build the docs in these stages:
|
||||
# 1. install vercel and python dependencies
|
||||
# 2. copy files from "source dir" to "intermediate dir"
|
||||
# 2. generate files like model feat table, etc in "intermediate dir"
|
||||
# 3. copy files to their right spots (e.g. langserve readme) in "intermediate dir"
|
||||
# 4. build the docs from "intermediate dir" to "output dir"
|
||||
# We build the docs in these stages:
|
||||
# 1. Install vercel and python dependencies
|
||||
# 2. Copy files from "source dir" to "intermediate dir"
|
||||
# 2. Generate files like model feat table, etc in "intermediate dir"
|
||||
# 3. Copy files to their right spots (e.g. langserve readme) in "intermediate dir"
|
||||
# 4. Build the docs from "intermediate dir" to "output dir"
|
||||
|
||||
SOURCE_DIR = docs/
|
||||
INTERMEDIATE_DIR = build/intermediate/docs
|
||||
@@ -18,32 +18,45 @@ PORT ?= 3001
|
||||
clean:
|
||||
rm -rf build
|
||||
|
||||
clean-cache:
|
||||
rm -rf build .venv/deps_installed
|
||||
|
||||
install-vercel-deps:
|
||||
yum -y -q update
|
||||
yum -y -q install gcc bzip2-devel libffi-devel zlib-devel wget tar gzip rsync -y
|
||||
|
||||
install-py-deps:
|
||||
python3 -m venv .venv
|
||||
$(PYTHON) -m pip install -q --upgrade pip
|
||||
$(PYTHON) -m pip install -q --upgrade uv
|
||||
$(PYTHON) -m uv pip install -q --pre -r vercel_requirements.txt
|
||||
$(PYTHON) -m uv pip install -q --pre $$($(PYTHON) scripts/partner_deps_list.py) --overrides vercel_overrides.txt
|
||||
@echo "📦 Installing Python dependencies..."
|
||||
@if [ ! -d .venv ]; then python3 -m venv .venv; fi
|
||||
@if [ ! -f .venv/deps_installed ]; then \
|
||||
$(PYTHON) -m pip install -q --upgrade pip --disable-pip-version-check; \
|
||||
$(PYTHON) -m pip install -q --upgrade uv; \
|
||||
$(PYTHON) -m uv pip install -q --pre -r vercel_requirements.txt; \
|
||||
$(PYTHON) -m uv pip install -q --pre $$($(PYTHON) scripts/partner_deps_list.py) --overrides vercel_overrides.txt; \
|
||||
touch .venv/deps_installed; \
|
||||
fi
|
||||
@echo "✅ Dependencies installed"
|
||||
|
||||
generate-files:
|
||||
@echo "📄 Generating documentation files..."
|
||||
mkdir -p $(INTERMEDIATE_DIR)
|
||||
cp -rp $(SOURCE_DIR)/* $(INTERMEDIATE_DIR)
|
||||
|
||||
$(PYTHON) scripts/tool_feat_table.py $(INTERMEDIATE_DIR)
|
||||
|
||||
$(PYTHON) scripts/kv_store_feat_table.py $(INTERMEDIATE_DIR)
|
||||
|
||||
$(PYTHON) scripts/partner_pkg_table.py $(INTERMEDIATE_DIR)
|
||||
|
||||
curl https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md | sed 's/<=/\<=/g' > $(INTERMEDIATE_DIR)/langserve.md
|
||||
@if [ ! -f build/langserve_readme_cache.md ] || [ $$(find build/langserve_readme_cache.md -mtime +1 -print) ]; then \
|
||||
echo "🌐 Downloading LangServe README..."; \
|
||||
curl -s https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md | sed 's/<=/\<=/g' > build/langserve_readme_cache.md; \
|
||||
fi
|
||||
cp build/langserve_readme_cache.md $(INTERMEDIATE_DIR)/langserve.md
|
||||
cp ../SECURITY.md $(INTERMEDIATE_DIR)/security.md
|
||||
$(PYTHON) scripts/resolve_local_links.py $(INTERMEDIATE_DIR)/langserve.md https://github.com/langchain-ai/langserve/tree/main/
|
||||
@echo "🔧 Generating feature tables and processing links..."
|
||||
$(PYTHON) scripts/tool_feat_table.py $(INTERMEDIATE_DIR) & \
|
||||
$(PYTHON) scripts/kv_store_feat_table.py $(INTERMEDIATE_DIR) & \
|
||||
$(PYTHON) scripts/partner_pkg_table.py $(INTERMEDIATE_DIR) & \
|
||||
$(PYTHON) scripts/resolve_local_links.py $(INTERMEDIATE_DIR)/langserve.md https://github.com/langchain-ai/langserve/tree/main/ & \
|
||||
wait
|
||||
@echo "✅ Files generated"
|
||||
|
||||
copy-infra:
|
||||
@echo "📂 Copying infrastructure files..."
|
||||
mkdir -p $(OUTPUT_NEW_DIR)
|
||||
cp -r src $(OUTPUT_NEW_DIR)
|
||||
cp vercel.json $(OUTPUT_NEW_DIR)
|
||||
@@ -55,15 +68,22 @@ copy-infra:
|
||||
cp -r static $(OUTPUT_NEW_DIR)
|
||||
cp -r ../libs/cli/langchain_cli/integration_template $(OUTPUT_NEW_DIR)/src/theme
|
||||
cp yarn.lock $(OUTPUT_NEW_DIR)
|
||||
@echo "✅ Infrastructure files copied"
|
||||
|
||||
render:
|
||||
@echo "📓 Converting notebooks (this may take a while)..."
|
||||
$(PYTHON) scripts/notebook_convert.py $(INTERMEDIATE_DIR) $(OUTPUT_NEW_DOCS_DIR)
|
||||
@echo "✅ Notebooks converted"
|
||||
|
||||
md-sync:
|
||||
@echo "📝 Syncing markdown files..."
|
||||
rsync -avmq --include="*/" --include="*.mdx" --include="*.md" --include="*.png" --include="*/_category_.yml" --exclude="*" $(INTERMEDIATE_DIR)/ $(OUTPUT_NEW_DOCS_DIR)
|
||||
@echo "✅ Markdown files synced"
|
||||
|
||||
append-related:
|
||||
@echo "🔗 Appending related links..."
|
||||
$(PYTHON) scripts/append_related_links.py $(OUTPUT_NEW_DOCS_DIR)
|
||||
@echo "✅ Related links appended"
|
||||
|
||||
generate-references:
|
||||
$(PYTHON) scripts/generate_api_reference_links.py --docs_dir $(OUTPUT_NEW_DOCS_DIR)
|
||||
@@ -71,6 +91,10 @@ generate-references:
|
||||
update-md: generate-files md-sync
|
||||
|
||||
build: install-py-deps generate-files copy-infra render md-sync append-related
|
||||
@echo ""
|
||||
@echo "🎉 Documentation build complete!"
|
||||
@echo "📖 To view locally, run: cd docs && make start"
|
||||
@echo ""
|
||||
|
||||
vercel-build: install-vercel-deps build generate-references
|
||||
rm -rf docs
|
||||
@@ -84,4 +108,9 @@ vercel-build: install-vercel-deps build generate-references
|
||||
NODE_OPTIONS="--max-old-space-size=5000" yarn run docusaurus build
|
||||
|
||||
start:
|
||||
cd $(OUTPUT_NEW_DIR) && yarn && yarn start --port=$(PORT)
|
||||
@echo "🚀 Starting documentation server on port $(PORT)..."
|
||||
@echo "📖 Installing Node.js dependencies..."
|
||||
cd $(OUTPUT_NEW_DIR) && yarn install --silent
|
||||
@echo "🌐 Starting server at http://localhost:$(PORT)"
|
||||
@echo "Press Ctrl+C to stop the server"
|
||||
cd $(OUTPUT_NEW_DIR) && yarn start --port=$(PORT)
|
||||
|
||||
@@ -262,6 +262,8 @@ myst_enable_extensions = ["colon_fence"]
|
||||
|
||||
# generate autosummary even if no references
|
||||
autosummary_generate = True
|
||||
# Don't fail on autosummary import warnings
|
||||
autosummary_ignore_module_all = False
|
||||
|
||||
html_copy_source = False
|
||||
html_show_sourcelink = False
|
||||
|
||||
@@ -202,6 +202,12 @@ def _load_package_modules(
|
||||
if file_path.name.startswith("_"):
|
||||
continue
|
||||
|
||||
if "integration_template" in file_path.parts:
|
||||
continue
|
||||
|
||||
if "project_template" in file_path.parts:
|
||||
continue
|
||||
|
||||
relative_module_name = file_path.relative_to(package_path)
|
||||
|
||||
# Skip if any module part starts with an underscore
|
||||
@@ -495,15 +501,7 @@ def _package_namespace(package_name: str) -> str:
|
||||
|
||||
def _package_dir(package_name: str = "langchain") -> Path:
|
||||
"""Return the path to the directory containing the documentation."""
|
||||
if package_name in (
|
||||
"langchain",
|
||||
"experimental",
|
||||
"community",
|
||||
"core",
|
||||
"cli",
|
||||
"text-splitters",
|
||||
"standard-tests",
|
||||
):
|
||||
if (ROOT_DIR / "libs" / package_name).exists():
|
||||
return ROOT_DIR / "libs" / package_name / _package_namespace(package_name)
|
||||
else:
|
||||
return (
|
||||
@@ -592,7 +590,12 @@ For the legacy API reference hosted on ReadTheDocs see [https://api.python.langc
|
||||
if integrations:
|
||||
integration_headers = [
|
||||
" ".join(
|
||||
custom_names.get(x, x.title().replace("ai", "AI").replace("db", "DB"))
|
||||
custom_names.get(
|
||||
x,
|
||||
x.title().replace("db", "DB")
|
||||
if dir_ == "langchain_v1"
|
||||
else x.title().replace("ai", "AI").replace("db", "DB"),
|
||||
)
|
||||
for x in dir_.split("-")
|
||||
)
|
||||
for dir_ in integrations
|
||||
@@ -660,18 +663,12 @@ def main(dirs: Optional[list] = None) -> None:
|
||||
print("Starting to build API reference files.")
|
||||
if not dirs:
|
||||
dirs = [
|
||||
dir_
|
||||
for dir_ in os.listdir(ROOT_DIR / "libs")
|
||||
if dir_ not in ("cli", "partners", "packages.yml")
|
||||
and "pyproject.toml" in os.listdir(ROOT_DIR / "libs" / dir_)
|
||||
p.parent.name
|
||||
for p in (ROOT_DIR / "libs").rglob("pyproject.toml")
|
||||
# Exclude packages that are not directly under libs/ or libs/partners/
|
||||
if p.parent.parent.name in ("libs", "partners")
|
||||
]
|
||||
dirs += [
|
||||
dir_
|
||||
for dir_ in os.listdir(ROOT_DIR / "libs" / "partners")
|
||||
if os.path.isdir(ROOT_DIR / "libs" / "partners" / dir_)
|
||||
and "pyproject.toml" in os.listdir(ROOT_DIR / "libs" / "partners" / dir_)
|
||||
]
|
||||
for dir_ in dirs:
|
||||
for dir_ in sorted(dirs):
|
||||
# Skip any hidden directories
|
||||
# Some of these could be present by mistake in the code base
|
||||
# e.g., .pytest_cache from running tests from the wrong location.
|
||||
@@ -682,7 +679,7 @@ def main(dirs: Optional[list] = None) -> None:
|
||||
print("Building package:", dir_)
|
||||
_build_rst_file(package_name=dir_)
|
||||
|
||||
_build_index(dirs)
|
||||
_build_index(sorted(dirs))
|
||||
print("API reference files built.")
|
||||
|
||||
|
||||
|
||||
@@ -20,8 +20,7 @@ LangChain is a framework that consists of a number of packages.
|
||||
|
||||
This package contains base abstractions for different components and ways to compose them together.
|
||||
The interfaces for core components like chat models, vector stores, tools and more are defined here.
|
||||
No third-party integrations are defined here.
|
||||
The dependencies are very lightweight.
|
||||
**No third-party integrations are defined here.** The dependencies are kept purposefully very lightweight.
|
||||
|
||||
## langchain
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Contribute Code
|
||||
# Contribute code
|
||||
|
||||
If you would like to add a new feature or update an existing one, please read the resources below before getting started:
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
This guide walks through how to run the repository locally and check in your first code.
|
||||
For a [development container](https://containers.dev/), see the [.devcontainer folder](https://github.com/langchain-ai/langchain/tree/master/.devcontainer).
|
||||
|
||||
## Dependency Management: `uv` and other env/dependency managers
|
||||
## Dependency management: `uv` and other env/dependency managers
|
||||
|
||||
This project utilizes [uv](https://docs.astral.sh/uv/) v0.5+ as a dependency manager.
|
||||
|
||||
@@ -37,7 +37,7 @@ For this quickstart, start with `langchain`:
|
||||
cd libs/langchain
|
||||
```
|
||||
|
||||
## Local Development Dependencies
|
||||
## Local development dependencies
|
||||
|
||||
Install development requirements (for running langchain, running examples, linting, formatting, tests, and coverage):
|
||||
|
||||
@@ -64,12 +64,6 @@ To run unit tests:
|
||||
make test
|
||||
```
|
||||
|
||||
To run unit tests in Docker:
|
||||
|
||||
```bash
|
||||
make docker_tests
|
||||
```
|
||||
|
||||
There are also [integration tests and code-coverage](../testing.mdx) available.
|
||||
|
||||
### Developing langchain_core
|
||||
@@ -81,11 +75,11 @@ cd libs/core
|
||||
make test
|
||||
```
|
||||
|
||||
## Formatting and Linting
|
||||
## Formatting and linting
|
||||
|
||||
Run these locally before submitting a PR; the CI system will check also.
|
||||
|
||||
### Code Formatting
|
||||
### Code formatting
|
||||
|
||||
Formatting for this project is done via [ruff](https://docs.astral.sh/ruff/rules/).
|
||||
|
||||
@@ -163,7 +157,7 @@ If codespell is incorrectly flagging a word, you can skip spellcheck for that wo
|
||||
ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure'
|
||||
```
|
||||
|
||||
## Working with Optional Dependencies
|
||||
## Working with optional dependencies
|
||||
|
||||
`langchain`, `langchain-community`, and `langchain-experimental` rely on optional dependencies to keep these packages lightweight.
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Contribute Documentation
|
||||
# Contribute documentation
|
||||
|
||||
Documentation is a vital part of LangChain. We welcome both new documentation for new features and
|
||||
community improvements to our current documentation. Please read the resources below before getting started:
|
||||
|
||||
@@ -12,12 +12,11 @@ It covers a wide array of topics, including tutorials, use cases, integrations,
|
||||
and more, offering extensive guidance on building with LangChain.
|
||||
The content for this documentation lives in the `/docs` directory of the monorepo.
|
||||
2. In-code Documentation: This is documentation of the codebase itself, which is also
|
||||
used to generate the externally facing [API Reference](https://python.langchain.com/api_reference/langchain/index.html).
|
||||
used to generate the externally facing [API Reference](https://python.langchain.com/api_reference/).
|
||||
The content for the API reference is autogenerated by scanning the docstrings in the codebase. For this reason we ask that
|
||||
developers document their code well.
|
||||
|
||||
The `API Reference` is largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/)
|
||||
from the code and is hosted by [Read the Docs](https://readthedocs.org/).
|
||||
The API Reference is largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) from the code.
|
||||
|
||||
We appreciate all contributions to the documentation, whether it be fixing a typo,
|
||||
adding a new tutorial or example and whether it be in the main documentation or the API Reference.
|
||||
@@ -25,7 +24,7 @@ adding a new tutorial or example and whether it be in the main documentation or
|
||||
Similar to linting, we recognize documentation can be annoying. If you do not want
|
||||
to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed.
|
||||
|
||||
## 📜 Main Documentation
|
||||
## 📜 Main documentation
|
||||
|
||||
The content for the main documentation is located in the `/docs` directory of the monorepo.
|
||||
|
||||
@@ -42,7 +41,7 @@ After modifying the documentation:
|
||||
3. Make a pull request with the changes.
|
||||
4. You can preview and verify that the changes are what you wanted by clicking the `View deployment` or `Visit Preview` buttons on the pull request `Conversation` page. This will take you to a preview of the documentation changes.
|
||||
|
||||
## ⚒️ Linting and Building Documentation Locally
|
||||
## ⚒️ Linting and building documentation locally
|
||||
|
||||
After writing up the documentation, you may want to lint and build the documentation
|
||||
locally to ensure that it looks good and is free of errors.
|
||||
@@ -57,20 +56,44 @@ The code that builds the documentation is located in the `/docs` directory of th
|
||||
|
||||
In the following commands, the prefix `api_` indicates that those are operations for the API Reference.
|
||||
|
||||
Before building the documentation, it is always a good idea to clean the build directory:
|
||||
|
||||
```bash
|
||||
make docs_clean
|
||||
make api_docs_clean
|
||||
```
|
||||
|
||||
Next, you can build the documentation as outlined below:
|
||||
You can build the documentation as outlined below:
|
||||
|
||||
```bash
|
||||
make docs_build
|
||||
make api_docs_build
|
||||
```
|
||||
|
||||
### Viewing documentation locally
|
||||
|
||||
After building the main documentation, you can view it locally by starting a development server:
|
||||
|
||||
```bash
|
||||
# For main documentation (after running `make docs_build`)
|
||||
cd docs && make start
|
||||
```
|
||||
|
||||
This will start a development server where you can view the documentation in your browser. The exact url will be shown to you during the start process. The server will automatically reload when you make changes to the documentation files under the `build/` directory (e.g. for temporary tests - changes you wish to persist should be put under `docs/docs/`).
|
||||
|
||||
:::tip
|
||||
|
||||
You can specify a different port by setting the `PORT` environment variable:
|
||||
|
||||
```bash
|
||||
cd docs && PORT=3000 make start
|
||||
```
|
||||
|
||||
:::
|
||||
|
||||
The API Reference documentation is built as static HTML files and will be automatically opened directly in your browser.
|
||||
|
||||
You can also view the API Reference for a specific package by specifying the package name and installing the package if necessary dependencies:
|
||||
|
||||
```bash
|
||||
# Opens the API Reference for the `ollama` package in your default browser
|
||||
uv pip install -e libs/partners/ollama
|
||||
make api_docs_quick_preview API_PKG=ollama
|
||||
```
|
||||
|
||||
:::tip
|
||||
|
||||
The `make api_docs_build` command takes a long time. If you're making cosmetic changes to the API docs and want to see how they look, use:
|
||||
@@ -79,18 +102,28 @@ The `make api_docs_build` command takes a long time. If you're making cosmetic c
|
||||
make api_docs_quick_preview
|
||||
```
|
||||
|
||||
which will just build a small subset of the API reference.
|
||||
which will just build a small subset of the API reference (the `text-splitters` package).
|
||||
|
||||
:::
|
||||
|
||||
Finally, run the link checker to ensure all links are valid:
|
||||
Finally, run the link checker from the project root to ensure all links are valid:
|
||||
|
||||
```bash
|
||||
make docs_linkcheck
|
||||
make api_docs_linkcheck
|
||||
```
|
||||
|
||||
### Linting and Formatting
|
||||
To clean up the documentation build artifacts, you can run:
|
||||
|
||||
```bash
|
||||
make clean
|
||||
|
||||
# Or to clean specific documentation artifacts
|
||||
make docs_clean
|
||||
make api_docs_clean
|
||||
```
|
||||
|
||||
### Formatting and linting
|
||||
|
||||
The Main Documentation is linted from the **monorepo root**. To lint the main documentation, run the following from there:
|
||||
|
||||
@@ -104,9 +137,9 @@ If you have formatting-related errors, you can fix them automatically with:
|
||||
make format
|
||||
```
|
||||
|
||||
## ⌨️ In-code Documentation
|
||||
## ⌨️ In-code documentation
|
||||
|
||||
The in-code documentation is largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) from the code and is hosted by [Read the Docs](https://readthedocs.org/).
|
||||
The in-code documentation is largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) from the code following [reStructuredText](https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html).
|
||||
|
||||
For the API reference to be useful, the codebase must be well-documented. This means that all functions, classes, and methods should have a docstring that explains what they do, what the arguments are, and what the return value is. This is a good practice in general, but it is especially important for LangChain because the API reference is the primary resource for developers to understand how to use the codebase.
|
||||
|
||||
@@ -141,16 +174,16 @@ def my_function(arg1: int, arg2: str) -> float:
|
||||
return 3.14
|
||||
```
|
||||
|
||||
### Linting and Formatting
|
||||
### Formatting and linting
|
||||
|
||||
The in-code documentation is linted from the directories belonging to the packages
|
||||
being documented.
|
||||
|
||||
For example, if you're working on the `langchain-community` package, you would change
|
||||
the working directory to the `langchain-community` directory:
|
||||
For example, if you're working on the `langchain-ollama` package, you would change
|
||||
the working directory to the the package directory:
|
||||
|
||||
```bash
|
||||
cd [root]/libs/langchain-community
|
||||
cd [root]/libs/partners/ollama
|
||||
```
|
||||
|
||||
Then you can run the following commands to lint and format the in-code documentation:
|
||||
@@ -160,9 +193,9 @@ make format
|
||||
make lint
|
||||
```
|
||||
|
||||
## Verify Documentation Changes
|
||||
## Verify documentation changes
|
||||
|
||||
After pushing documentation changes to the repository, you can preview and verify that the changes are
|
||||
what you wanted by clicking the `View deployment` or `Visit Preview` buttons on the pull request `Conversation` page.
|
||||
This will take you to a preview of the documentation changes.
|
||||
This preview is created by [Vercel](https://vercel.com/docs/getting-started-with-vercel).
|
||||
This preview is created by [Vercel](https://vercel.com/docs/getting-started-with-vercel).
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
sidebar_class_name: "hidden"
|
||||
---
|
||||
|
||||
# Documentation Style Guide
|
||||
# Documentation style guide
|
||||
|
||||
As LangChain continues to grow, the amount of documentation required to cover the various concepts and integrations continues to grow too.
|
||||
This page provides guidelines for anyone writing documentation for LangChain and outlines some of our philosophies around
|
||||
@@ -158,3 +158,5 @@ Be concise, including in code samples.
|
||||
- Use bullet points and numbered lists to break down information into easily digestible chunks
|
||||
- Use tables (especially for **Reference** sections) and diagrams often to present information visually
|
||||
- Include the table of contents for longer documentation pages to help readers navigate the content, but hide it for shorter pages
|
||||
|
||||
Next, see the [documentation setup guide](setup.mdx) to get started with writing documentation for LangChain.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# How-to Guides
|
||||
# How-to guides
|
||||
|
||||
- [**Documentation**](documentation/index.mdx): Help improve our docs, including this one!
|
||||
- [**Code**](code/index.mdx): Help us write code, fix bugs, or improve our infrastructure.
|
||||
|
||||
@@ -3,7 +3,7 @@ pagination_prev: null
|
||||
pagination_next: contributing/how_to/integrations/package
|
||||
---
|
||||
|
||||
# Contribute Integrations
|
||||
# Contribute integrations
|
||||
|
||||
Integrations are a core component of LangChain.
|
||||
LangChain provides standard interfaces for several different components (language models, vector stores, etc) that are crucial when building LLM applications.
|
||||
@@ -16,7 +16,7 @@ LangChain provides standard interfaces for several different components (languag
|
||||
- **Best Practices:** Through their standard interface, LangChain components encourage and facilitate best practices (streaming, async, etc)
|
||||
|
||||
|
||||
## Components to Integrate
|
||||
## Components to integrate
|
||||
|
||||
:::info
|
||||
|
||||
@@ -71,7 +71,7 @@ In order to contribute an integration, you should follow these steps:
|
||||
5. [Optional] Open and merge a PR to add documentation for your integration to the official LangChain docs.
|
||||
6. [Optional] Engage with the LangChain team for joint co-marketing ([see below](#co-marketing)).
|
||||
|
||||
## Co-Marketing
|
||||
## Co-marketing
|
||||
|
||||
With over 20 million monthly downloads, LangChain has a large audience of developers
|
||||
building LLM applications. Beyond just listing integrations, we aim to highlight
|
||||
@@ -87,5 +87,5 @@ Here are some heuristics for types of content we are excited to promote:
|
||||
- **End-to-end applications:** End-to-end applications are great resources for developers looking to build. We prefer to highlight applications that are more complex/agentic in nature, and that use [LangGraph](https://github.com/langchain-ai/langgraph) as the orchestration framework. We get particularly excited about anything involving long-term memory, human-in-the-loop interaction patterns, or multi-agent architectures.
|
||||
- **Research:** We love highlighting novel research! Whether it is research built on top of LangChain or that integrates with it.
|
||||
|
||||
## Further Reading
|
||||
## Further reading
|
||||
To get started, let's learn [how to implement an integration package](/docs/contributing/how_to/integrations/package/) for LangChain.
|
||||
|
||||
@@ -358,7 +358,7 @@ a schema for the LLM to fill out when calling the tool. Similar to the `name` an
|
||||
description (part of `Field(..., description="description")`) are passed to the LLM,
|
||||
and the values in these fields should be concise and LLM-usable.
|
||||
|
||||
### Run Methods
|
||||
### Run methods
|
||||
|
||||
`_run` is the main method that should be implemented in the subclass. This method
|
||||
takes in the arguments from `args_schema` and runs the tool, returning a string
|
||||
@@ -469,6 +469,6 @@ import RetrieverSource from '/src/theme/integration_template/integration_templat
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
## Next steps
|
||||
|
||||
Now that you've implemented your package, you can move on to [testing your integration](../standard_tests) for your integration and successfully run them.
|
||||
|
||||
@@ -10,7 +10,7 @@ Unit tests run on every pull request, so they should be fast and reliable.
|
||||
|
||||
Integration tests run once a day, and they require more setup, so they should be reserved for confirming interface points with external services.
|
||||
|
||||
## Unit Tests
|
||||
## Unit tests
|
||||
|
||||
Unit tests cover modular logic that does not require calls to outside APIs.
|
||||
If you add new logic, please add a unit test.
|
||||
@@ -27,19 +27,13 @@ To run unit tests:
|
||||
make test
|
||||
```
|
||||
|
||||
To run unit tests in Docker:
|
||||
|
||||
```bash
|
||||
make docker_tests
|
||||
```
|
||||
|
||||
To run a specific test:
|
||||
|
||||
```bash
|
||||
TEST_FILE=tests/unit_tests/test_imports.py make test
|
||||
```
|
||||
|
||||
## Integration Tests
|
||||
## Integration tests
|
||||
|
||||
Integration tests cover logic that requires making calls to outside APIs (often integration with other services).
|
||||
If you add support for a new external API, please add a new integration test.
|
||||
|
||||
@@ -12,7 +12,7 @@ More coming soon! We are working on tutorials to help you make your first contri
|
||||
|
||||
- [**Make your first docs PR**](tutorials/docs.mdx)
|
||||
|
||||
## How-to Guides
|
||||
## How-to guides
|
||||
|
||||
- [**Documentation**](how_to/documentation/index.mdx): Help improve our docs, including this one!
|
||||
- [**Code**](how_to/code/index.mdx): Help us write code, fix bugs, or improve our infrastructure.
|
||||
|
||||
@@ -50,7 +50,7 @@ There are other files in the root directory level, but their presence should be
|
||||
## Documentation
|
||||
|
||||
The `/docs` directory contains the content for the documentation that is shown
|
||||
at https://python.langchain.com/ and the associated API Reference https://python.langchain.com/api_reference/langchain/index.html.
|
||||
at [python.langchain.com](https://python.langchain.com/) and the associated [API Reference](https://python.langchain.com/api_reference/).
|
||||
|
||||
See the [documentation](../how_to/documentation/index.mdx) guidelines to learn how to contribute to the documentation.
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ This tutorial will guide you through making a simple documentation edit, like co
|
||||
|
||||
---
|
||||
|
||||
## Editing a Documentation Page on GitHub
|
||||
## Editing a documentation page on GitHub
|
||||
|
||||
Sometimes you want to make a small change, like fixing a typo, and the easiest way to do this is to use GitHub's editor directly.
|
||||
|
||||
@@ -42,10 +42,14 @@ Sometimes you want to make a small change, like fixing a typo, and the easiest w
|
||||
- Give your PR a title like `docs: Fix typo in X section`.
|
||||
- Follow the checklist in the PR description template.
|
||||
|
||||
## Getting a Review
|
||||
## Getting a review
|
||||
|
||||
Once you've submitted the pull request, it will be reviewed by the maintainers. You may receive feedback or requests for changes. Keep an eye on the PR to address any comments.
|
||||
|
||||
Docs PRs are typically reviewed within a few days, but it may take longer depending on the complexity of the change and the availability of maintainers.
|
||||
|
||||
For more information on reviews, see the [Review Process](../reference/review_process.mdx).
|
||||
|
||||
## More information
|
||||
|
||||
See our [how-to guides](../how_to/documentation/index.mdx) for more information on contributing to documentation:
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain>=0.2.8 langchain-openai langchain-anthropic langchain-google-vertexai"
|
||||
"%pip install -qU langchain langchain-openai langchain-anthropic langchain-google-genai"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -38,7 +38,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 5,
|
||||
"id": "79e14913-803c-4382-9009-5c6af3d75d35",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
@@ -49,38 +49,15 @@
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/4j/2rz3865x6qg07tx43146py8h0000gn/T/ipykernel_95293/571506279.py:4: LangChainBetaWarning: The function `init_chat_model` is in beta. It is actively being worked on, so the API may change.\n",
|
||||
" gpt_4o = init_chat_model(\"gpt-4o\", model_provider=\"openai\", temperature=0)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"GPT-4o: I'm an AI created by OpenAI, and I don't have a personal name. How can I assist you today?\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"GPT-4o: I’m called ChatGPT. How can I assist you today?\n",
|
||||
"\n",
|
||||
"Claude Opus: My name is Claude. It's nice to meet you!\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Gemini 1.5: I am a large language model, trained by Google. \n",
|
||||
"\n",
|
||||
"I don't have a name like a person does. You can call me Bard if you like! 😊 \n",
|
||||
"\n",
|
||||
"Gemini 2.5: I do not have a name. I am a large language model, trained by Google.\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
@@ -88,6 +65,10 @@
|
||||
"source": [
|
||||
"from langchain.chat_models import init_chat_model\n",
|
||||
"\n",
|
||||
"# Don't forget to set your environment variables for the API keys of the respective providers!\n",
|
||||
"# For example, you can set them in your terminal or in a .env file:\n",
|
||||
"# export OPENAI_API_KEY=\"your_openai_api_key\"\n",
|
||||
"\n",
|
||||
"# Returns a langchain_openai.ChatOpenAI instance.\n",
|
||||
"gpt_4o = init_chat_model(\"gpt-4o\", model_provider=\"openai\", temperature=0)\n",
|
||||
"# Returns a langchain_anthropic.ChatAnthropic instance.\n",
|
||||
@@ -96,13 +77,13 @@
|
||||
")\n",
|
||||
"# Returns a langchain_google_vertexai.ChatVertexAI instance.\n",
|
||||
"gemini_15 = init_chat_model(\n",
|
||||
" \"gemini-1.5-pro\", model_provider=\"google_vertexai\", temperature=0\n",
|
||||
" \"gemini-2.5-pro\", model_provider=\"google_genai\", temperature=0\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Since all model integrations implement the ChatModel interface, you can use them in the same way.\n",
|
||||
"print(\"GPT-4o: \" + gpt_4o.invoke(\"what's your name\").content + \"\\n\")\n",
|
||||
"print(\"Claude Opus: \" + claude_opus.invoke(\"what's your name\").content + \"\\n\")\n",
|
||||
"print(\"Gemini 1.5: \" + gemini_15.invoke(\"what's your name\").content + \"\\n\")"
|
||||
"print(\"Gemini 2.5: \" + gemini_15.invoke(\"what's your name\").content + \"\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -117,7 +98,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "0378ccc6-95bc-4d50-be50-fccc193f0a71",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
@@ -131,7 +112,7 @@
|
||||
"source": [
|
||||
"gpt_4o = init_chat_model(\"gpt-4o\", temperature=0)\n",
|
||||
"claude_opus = init_chat_model(\"claude-3-opus-20240229\", temperature=0)\n",
|
||||
"gemini_15 = init_chat_model(\"gemini-1.5-pro\", temperature=0)"
|
||||
"gemini_15 = init_chat_model(\"gemini-2.5-pro\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -146,7 +127,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 7,
|
||||
"id": "6c037f27-12d7-4e83-811e-4245c0e3ba58",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
@@ -160,10 +141,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"I'm an AI created by OpenAI, and I don't have a personal name. How can I assist you today?\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 11, 'total_tokens': 34}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_25624ae3a5', 'finish_reason': 'stop', 'logprobs': None}, id='run-b41df187-4627-490d-af3c-1c96282d3eb0-0', usage_metadata={'input_tokens': 11, 'output_tokens': 23, 'total_tokens': 34})"
|
||||
"AIMessage(content='I’m called ChatGPT. How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 11, 'total_tokens': 24, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_07871e2ad8', 'id': 'chatcmpl-BwCyyBpMqn96KED6zPhLm4k9SQMiQ', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--fada10c3-4128-406c-b83d-a850d16b365f-0', usage_metadata={'input_tokens': 11, 'output_tokens': 13, 'total_tokens': 24, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -178,7 +159,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 8,
|
||||
"id": "321e3036-abd2-4e1f-bcc6-606efd036954",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
@@ -192,10 +173,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"My name is Claude. It's nice to meet you!\", additional_kwargs={}, response_metadata={'id': 'msg_01Fx9P74A7syoFkwE73CdMMY', 'model': 'claude-3-5-sonnet-20240620', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'input_tokens': 11, 'output_tokens': 15}}, id='run-a0fd2bbd-3b7e-46bf-8d69-a48c7e60b03c-0', usage_metadata={'input_tokens': 11, 'output_tokens': 15, 'total_tokens': 26})"
|
||||
"AIMessage(content=\"My name is Claude. It's nice to meet you!\", additional_kwargs={}, response_metadata={'id': 'msg_01VDGrG9D6yefanbBG9zPJrc', 'model': 'claude-3-5-sonnet-20240620', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 11, 'output_tokens': 15, 'server_tool_use': None, 'service_tier': 'standard'}, 'model_name': 'claude-3-5-sonnet-20240620'}, id='run--f0156087-debf-4b4b-9aaa-f3328a81ef92-0', usage_metadata={'input_tokens': 11, 'output_tokens': 15, 'total_tokens': 26, 'input_token_details': {'cache_read': 0, 'cache_creation': 0}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -394,9 +375,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "poetry-venv-2",
|
||||
"display_name": "langchain",
|
||||
"language": "python",
|
||||
"name": "poetry-venv-2"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -408,7 +389,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.10.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -34,6 +34,8 @@ These are the core building blocks you can use when building applications.
|
||||
[Chat Models](/docs/concepts/chat_models) are newer forms of language models that take messages in and output a message.
|
||||
See [supported integrations](/docs/integrations/chat/) for details on getting started with chat models from a specific provider.
|
||||
|
||||
- [How to: init any model in one line](/docs/how_to/chat_models_universal_init/)
|
||||
- [How to: work with local models](/docs/how_to/local_llms)
|
||||
- [How to: do function/tool calling](/docs/how_to/tool_calling)
|
||||
- [How to: get models to return structured output](/docs/how_to/structured_output)
|
||||
- [How to: cache model responses](/docs/how_to/chat_model_caching)
|
||||
@@ -48,8 +50,6 @@ See [supported integrations](/docs/integrations/chat/) for details on getting st
|
||||
- [How to: few shot prompt tool behavior](/docs/how_to/tools_few_shot)
|
||||
- [How to: bind model-specific formatted tools](/docs/how_to/tools_model_specific)
|
||||
- [How to: force a specific tool call](/docs/how_to/tool_choice)
|
||||
- [How to: work with local models](/docs/how_to/local_llms)
|
||||
- [How to: init any model in one line](/docs/how_to/chat_models_universal_init/)
|
||||
- [How to: pass multimodal data directly to models](/docs/how_to/multimodal_inputs/)
|
||||
|
||||
### Messages
|
||||
|
||||
@@ -13,15 +13,15 @@
|
||||
"\n",
|
||||
"This has at least two important benefits:\n",
|
||||
"\n",
|
||||
"1. `Privacy`: Your data is not sent to a third party, and it is not subject to the terms of service of a commercial service\n",
|
||||
"2. `Cost`: There is no inference fee, which is important for token-intensive applications (e.g., [long-running simulations](https://twitter.com/RLanceMartin/status/1691097659262820352?s=20), summarization)\n",
|
||||
"1. **Privacy**: Your data is not sent to a third party, and it is not subject to the terms of service of a commercial service\n",
|
||||
"2. **Cost**: There is no inference fee, which is important for token-intensive applications (e.g., [long-running simulations](https://twitter.com/RLanceMartin/status/1691097659262820352?s=20), summarization)\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"Running an LLM locally requires a few things:\n",
|
||||
"\n",
|
||||
"1. `Open-source LLM`: An open-source LLM that can be freely modified and shared \n",
|
||||
"2. `Inference`: Ability to run this LLM on your device w/ acceptable latency\n",
|
||||
"1. **Open-source LLM**: An open-source LLM that can be freely modified and shared \n",
|
||||
"2. **Inference**: Ability to run this LLM on your device w/ acceptable latency\n",
|
||||
"\n",
|
||||
"### Open-source LLMs\n",
|
||||
"\n",
|
||||
@@ -29,8 +29,8 @@
|
||||
"\n",
|
||||
"These LLMs can be assessed across at least two dimensions (see figure):\n",
|
||||
" \n",
|
||||
"1. `Base model`: What is the base-model and how was it trained?\n",
|
||||
"2. `Fine-tuning approach`: Was the base-model fine-tuned and, if so, what [set of instructions](https://cameronrwolfe.substack.com/p/beyond-llama-the-power-of-open-llms#%C2%A7alpaca-an-instruction-following-llama-model) was used?\n",
|
||||
"1. **Base model**: What is the base-model and how was it trained?\n",
|
||||
"2. **Fine-tuning approach**: Was the base-model fine-tuned and, if so, what [set of instructions](https://cameronrwolfe.substack.com/p/beyond-llama-the-power-of-open-llms#%C2%A7alpaca-an-instruction-following-llama-model) was used?\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -51,8 +51,8 @@
|
||||
"\n",
|
||||
"In general, these frameworks will do a few things:\n",
|
||||
"\n",
|
||||
"1. `Quantization`: Reduce the memory footprint of the raw model weights\n",
|
||||
"2. `Efficient implementation for inference`: Support inference on consumer hardware (e.g., CPU or laptop GPU)\n",
|
||||
"1. **Quantization**: Reduce the memory footprint of the raw model weights\n",
|
||||
"2. **Efficient implementation for inference**: Support inference on consumer hardware (e.g., CPU or laptop GPU)\n",
|
||||
"\n",
|
||||
"In particular, see [this excellent post](https://finbarr.ca/how-is-llama-cpp-possible/) on the importance of quantization.\n",
|
||||
"\n",
|
||||
@@ -679,11 +679,17 @@
|
||||
"\n",
|
||||
"In general, use cases for local LLMs can be driven by at least two factors:\n",
|
||||
"\n",
|
||||
"* `Privacy`: private data (e.g., journals, etc) that a user does not want to share \n",
|
||||
"* `Cost`: text preprocessing (extraction/tagging), summarization, and agent simulations are token-use-intensive tasks\n",
|
||||
"* **Privacy**: private data (e.g., journals, etc) that a user does not want to share \n",
|
||||
"* **Cost**: text preprocessing (extraction/tagging), summarization, and agent simulations are token-use-intensive tasks\n",
|
||||
"\n",
|
||||
"In addition, [here](https://blog.langchain.dev/using-langsmith-to-support-fine-tuning-of-open-source-llms/) is an overview on fine-tuning, which can utilize open-source LLMs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "14c2c170",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -51,7 +51,31 @@
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to the [AWS docs](https://docs.aws.amazon.com/bedrock/latest/userguide/setting-up.html) to sign up to AWS and setup your credentials. You'll also need to turn on model access for your account, which you can do by following [these instructions](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html)."
|
||||
"Head to the [AWS docs](https://docs.aws.amazon.com/bedrock/latest/userguide/setting-up.html) to sign up to AWS and setup your credentials.\n",
|
||||
"\n",
|
||||
"Alternatively, `ChatBedrockConverse` will read from the following environment variables by default:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0f65be92",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"AWS_ACCESS_KEY_ID\"] = \"...\"\n",
|
||||
"# os.environ[\"AWS_SECRET_ACCESS_KEY\"] = \"...\"\n",
|
||||
"\n",
|
||||
"# Not required unless using temporary credentials.\n",
|
||||
"# os.environ[\"AWS_SESSION_TOKEN\"] = \"...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3baad5a9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You'll also need to turn on model access for your account, which you can do by following [these instructions](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -114,6 +138,10 @@
|
||||
"\n",
|
||||
"llm = ChatBedrockConverse(\n",
|
||||
" model_id=\"anthropic.claude-3-5-sonnet-20240620-v1:0\",\n",
|
||||
" # region_name=...,\n",
|
||||
" # aws_access_key_id=...,\n",
|
||||
" # aws_secret_access_key=...,\n",
|
||||
" # aws_session_token=...,\n",
|
||||
" # temperature=...,\n",
|
||||
" # max_tokens=...,\n",
|
||||
" # other params...\n",
|
||||
@@ -237,6 +265,157 @@
|
||||
" print(chunk.text(), end=\"|\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a009400a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Extended Thinking \n",
|
||||
"\n",
|
||||
"This guide focuses on implementing Extended Thinking using AWS Bedrock with LangChain's `ChatBedrockConverse` integration.\n",
|
||||
"\n",
|
||||
"### Supported Models\n",
|
||||
"\n",
|
||||
"Extended Thinking is available for the following Claude models on AWS Bedrock:\n",
|
||||
"\n",
|
||||
"| Model | Model ID |\n",
|
||||
"|-------|----------|\n",
|
||||
"| **Claude Opus 4** | `anthropic.claude-opus-4-20250514-v1:0` |\n",
|
||||
"| **Claude Sonnet 4** | `anthropic.claude-sonnet-4-20250514-v1:0` |\n",
|
||||
"| **Claude 3.7 Sonnet** | `us.anthropic.claude-3-7-sonnet-20250219-v1:0` |\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "abc790ca",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=[{'type': 'reasoning_content', 'reasoning_content': {'text': 'The user wants me to translate \"I love programming\" from English to French.\\n\\n\"I love\" translates to \"J\\'aime\" or \"J\\'adore\" in French\\n\"Programming\" translates to \"la programmation\" in French\\n\\nSo the translation would be \"J\\'aime la programmation\" or \"J\\'adore la programmation\"\\n\\nBoth are correct, but \"J\\'aime\" is more commonly used for expressing love/liking something.', 'signature': 'EpgECkgIBRABGAIqQDub6nRpiusjbxZONXVlGXg5ZjUY1Eka1Yp4oBBHmRqGjId+StTBPuwD3CXLyb2rUDRhSc3hTpTM4krVqlFZrIsSDI/WLa1mu38DDqt1HRoMUjm+jF+03MZFD+WQIjBZtHaYiqgY0JQgU0NdXDwwBSZX44gXwuX9EDekh12VM1ysq+WxVtkp0WMU0dKCJo4q/QKpguFFlZtEZjF9PftzOgTIyy+1H5pY+Dsb2pnrGtfAgwTR7PuZ/d8ibY0A8ywjVEZtGm+PtcnCJiK53BWxhGYOtxnfN/RRKtuZhvPQj+QQOWeRWqH+GcbeISCgyTYn5WG75fmVL707byjQZ3IuhMfyZWmiTFE2fc4Jn/bxX7OsU+DbTWv2K1a+g7eW+dvQwYzCBO1hfEn4699/CHII8UAcHh1L3bnxOWGKkeVQ0KMfgfwVb0vuGG4QBYKIDs87QL414i69D68DxqCTZAHK4lMA6Xs7zW+m0MMCct4iHRnJI8kat1mlBEpMz6NRo9KacZJXpLJxofIU4ho7R5/QHccdni0IidNkUtrLBSB3toNJoQEcStts2UR67NHTxn47zk1/hi4v4Ahtw9OEQFONaH6XaG1wjpqEdjQ8/Tmg9eB6ZLoQ4sQfhcMF8Uo3hHbBY8jA3jZ+9pa9VbuVbO6Eup8NX3XXZm2nk50OMWX7hBwgBmlZbEew6pWFu7+13EkYAQ=='}}, {'type': 'text', 'text': \"J'aime la programmation.\"}], additional_kwargs={}, response_metadata={'ResponseMetadata': {'RequestId': '169ca92f-19c9-480c-9fc3-4e5284507e67', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Tue, 22 Jul 2025 04:40:22 GMT', 'content-type': 'application/json', 'content-length': '1498', 'connection': 'keep-alive', 'x-amzn-requestid': '169ca92f-19c9-480c-9fc3-4e5284507e67'}, 'RetryAttempts': 0}, 'stopReason': 'end_turn', 'metrics': {'latencyMs': [2839]}, 'model_name': 'us.anthropic.claude-sonnet-4-20250514-v1:0'}, id='run--42e05e5d-ba86-4dce-9e29-2a4ba32c5804-0', usage_metadata={'input_tokens': 58, 'output_tokens': 122, 'total_tokens': 180, 'input_token_details': {'cache_creation': 0, 'cache_read': 0}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_aws import ChatBedrockConverse\n",
|
||||
"\n",
|
||||
"llm = ChatBedrockConverse(\n",
|
||||
" model_id=\"us.anthropic.claude-sonnet-4-20250514-v1:0\",\n",
|
||||
" region_name=\"us-west-2\",\n",
|
||||
" max_tokens=4096,\n",
|
||||
" additional_model_request_fields={\n",
|
||||
" \"thinking\": {\"type\": \"enabled\", \"budget_tokens\": 1024},\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"ai_msg = llm.invoke(messages)\n",
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "7fb27b941602401d91542211134fc71a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[{'type': 'reasoning_content', 'reasoning_content': {'text': 'The user wants me to translate \"I love programming\" from English to French.\\n\\n\"I love\" translates to \"J\\'aime\" or \"J\\'adore\" in French\\n\"Programming\" translates to \"la programmation\" in French\\n\\nSo the translation would be \"J\\'aime la programmation\" or \"J\\'adore la programmation\"\\n\\nBoth are correct, but \"J\\'aime\" is more commonly used for expressing love/liking something.', 'signature': 'EpgECkgIBRABGAIqQDub6nRpiusjbxZONXVlGXg5ZjUY1Eka1Yp4oBBHmRqGjId+StTBPuwD3CXLyb2rUDRhSc3hTpTM4krVqlFZrIsSDI/WLa1mu38DDqt1HRoMUjm+jF+03MZFD+WQIjBZtHaYiqgY0JQgU0NdXDwwBSZX44gXwuX9EDekh12VM1ysq+WxVtkp0WMU0dKCJo4q/QKpguFFlZtEZjF9PftzOgTIyy+1H5pY+Dsb2pnrGtfAgwTR7PuZ/d8ibY0A8ywjVEZtGm+PtcnCJiK53BWxhGYOtxnfN/RRKtuZhvPQj+QQOWeRWqH+GcbeISCgyTYn5WG75fmVL707byjQZ3IuhMfyZWmiTFE2fc4Jn/bxX7OsU+DbTWv2K1a+g7eW+dvQwYzCBO1hfEn4699/CHII8UAcHh1L3bnxOWGKkeVQ0KMfgfwVb0vuGG4QBYKIDs87QL414i69D68DxqCTZAHK4lMA6Xs7zW+m0MMCct4iHRnJI8kat1mlBEpMz6NRo9KacZJXpLJxofIU4ho7R5/QHccdni0IidNkUtrLBSB3toNJoQEcStts2UR67NHTxn47zk1/hi4v4Ahtw9OEQFONaH6XaG1wjpqEdjQ8/Tmg9eB6ZLoQ4sQfhcMF8Uo3hHbBY8jA3jZ+9pa9VbuVbO6Eup8NX3XXZm2nk50OMWX7hBwgBmlZbEew6pWFu7+13EkYAQ=='}}, {'type': 'text', 'text': \"J'aime la programmation.\"}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(ai_msg.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f1eb1ce1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### How extended thinking works\n",
|
||||
"\n",
|
||||
"When extended thinking is turned on, Claude creates thinking content blocks where it outputs its internal reasoning. Claude incorporates insights from this reasoning before crafting a final response. The API response will include thinking content blocks, followed by text content blocks."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "951d8206",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[('system',\n",
|
||||
" 'You are a helpful assistant that translates English to French. Translate the user sentence.'),\n",
|
||||
" ('human', 'I love programming.'),\n",
|
||||
" ('ai',\n",
|
||||
" [{'type': 'reasoning_content',\n",
|
||||
" 'reasoning_content': {'text': 'The user wants me to translate \"I love programming\" from English to French.\\n\\n\"I love\" translates to \"J\\'aime\" or \"J\\'adore\" in French\\n\"Programming\" translates to \"la programmation\" in French\\n\\nSo the translation would be \"J\\'aime la programmation\" or \"J\\'adore la programmation\"\\n\\nBoth are correct, but \"J\\'aime\" is more commonly used for expressing love/liking something.',\n",
|
||||
" 'signature': 'EpgECkgIBRABGAIqQDub6nRpiusjbxZONXVlGXg5ZjUY1Eka1Yp4oBBHmRqGjId+StTBPuwD3CXLyb2rUDRhSc3hTpTM4krVqlFZrIsSDI/WLa1mu38DDqt1HRoMUjm+jF+03MZFD+WQIjBZtHaYiqgY0JQgU0NdXDwwBSZX44gXwuX9EDekh12VM1ysq+WxVtkp0WMU0dKCJo4q/QKpguFFlZtEZjF9PftzOgTIyy+1H5pY+Dsb2pnrGtfAgwTR7PuZ/d8ibY0A8ywjVEZtGm+PtcnCJiK53BWxhGYOtxnfN/RRKtuZhvPQj+QQOWeRWqH+GcbeISCgyTYn5WG75fmVL707byjQZ3IuhMfyZWmiTFE2fc4Jn/bxX7OsU+DbTWv2K1a+g7eW+dvQwYzCBO1hfEn4699/CHII8UAcHh1L3bnxOWGKkeVQ0KMfgfwVb0vuGG4QBYKIDs87QL414i69D68DxqCTZAHK4lMA6Xs7zW+m0MMCct4iHRnJI8kat1mlBEpMz6NRo9KacZJXpLJxofIU4ho7R5/QHccdni0IidNkUtrLBSB3toNJoQEcStts2UR67NHTxn47zk1/hi4v4Ahtw9OEQFONaH6XaG1wjpqEdjQ8/Tmg9eB6ZLoQ4sQfhcMF8Uo3hHbBY8jA3jZ+9pa9VbuVbO6Eup8NX3XXZm2nk50OMWX7hBwgBmlZbEew6pWFu7+13EkYAQ=='}},\n",
|
||||
" {'type': 'text', 'text': \"J'aime la programmation.\"}]),\n",
|
||||
" ('human', 'I love AI')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"next_messages = messages + [(\"ai\", ai_msg.content), (\"human\", \"I love AI\")]\n",
|
||||
"next_messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "9d8c506c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=[{'type': 'reasoning_content', 'reasoning_content': {'text': 'The user wants me to translate \"I love AI\" from English to French. \\n\\n\"I love\" translates to \"J\\'aime\" in French.\\n\"AI\" stands for \"Artificial Intelligence\" which in French is \"Intelligence Artificielle\" or abbreviated as \"IA\".\\n\\nSo the translation would be \"J\\'aime l\\'IA\" (using the abbreviation) or \"J\\'aime l\\'intelligence artificielle\" (using the full term).\\n\\nI think using the abbreviation \"IA\" would be more natural and commonly used, similar to how we use \"AI\" in English.', 'signature': 'EoMFCkgIBRABGAIqQOwp9d0YWm8NctfL9lf1MeWR1OxeAKB3Es19Lei2bdHQ4W0ezTK4wVcm/VLM+7kICX2aB9RAmUD5sJxoKHfdX38SDIR/aSJhHZifGOHqwBoMhzNsyPmB7FFNvNESIjBMVRpRUDTFGn5+nL0x5CjWhKA8H/XFnKYRrUyMYb1n7lCQA7BeEjsaWwxZ3YV9rZsq6APuaXaA40Bt+KnpPOo06r72L/DceliRAw1a6cuT5E0Dv0eIAOYblbXaKYn0jy8UzTUuctOP3As/zT5pK5yC+Rx0d2l9kuP3+COERM98u0R04bWn6qh0HcyE+zNc7c4YWkncjdmOxF/j6OxhcMhZEoX2035v9eUJ9+O/u1xaff08YAEfg7TGWrSIwalpjs1mzWA9ijKg8YyjmXjWnMeFn0z6LDqLaaKc+nC8IN9SLwA/eHpf/ayoEgmogn7gWzijW8MDbnlwpQDS75wK7An3RMEcpWD/OXrKb1EhWKEmOBro5BOTGsfK3ZDveRL0aCBINdOu+AHMQDFXJ04cRDEjs9GE3YC218UcFtS42TFO7/Ct5CYCTknETPx93zcGTOM2VPOZ02Uem1A7Nda/Fa4l2b03EUEtwlgske5K1RbeohN9sclxYsxX5nGJ5sSZurVCk9plkyTG3aiPvbohfVVarVgukKoKwoMDYz5rHVscWlUe+qeqJE/H+KKlhtzO+lWWDN4knqeYsZ55flO5Hq4vT20QCYnF8hcUx07ngGKXuGID9n5kFnLsP8sBUHYKm7bmopFFZvfPcmsqiV9yvG/8Ly9DHbmY5ZwxyrbdJCFT6HD6kq/mEBDftZ6dhmyKMimJBfbTj7d3VAILbRgB'}}, {'type': 'text', 'text': \"J'aime l'IA.\"}], additional_kwargs={}, response_metadata={'ResponseMetadata': {'RequestId': '023799d6-7ed5-4e49-8ad7-7460a49a9a45', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Tue, 22 Jul 2025 04:40:34 GMT', 'content-type': 'application/json', 'content-length': '1737', 'connection': 'keep-alive', 'x-amzn-requestid': '023799d6-7ed5-4e49-8ad7-7460a49a9a45'}, 'RetryAttempts': 0}, 'stopReason': 'end_turn', 'metrics': {'latencyMs': [3473]}, 'model_name': 'us.anthropic.claude-sonnet-4-20250514-v1:0'}, id='run--ca8abc92-60a9-4bd1-93b4-7788496eda7a-0', usage_metadata={'input_tokens': 75, 'output_tokens': 153, 'total_tokens': 228, 'input_token_details': {'cache_creation': 0, 'cache_read': 0}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ai_msg = llm.invoke(next_messages)\n",
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "e53e3ebb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[{'type': 'reasoning_content', 'reasoning_content': {'text': 'The user wants me to translate \"I love AI\" from English to French. \\n\\n\"I love\" translates to \"J\\'aime\" in French.\\n\"AI\" stands for \"Artificial Intelligence\" which in French is \"Intelligence Artificielle\" or abbreviated as \"IA\".\\n\\nSo the translation would be \"J\\'aime l\\'IA\" (using the abbreviation) or \"J\\'aime l\\'intelligence artificielle\" (using the full term).\\n\\nI think using the abbreviation \"IA\" would be more natural and commonly used, similar to how we use \"AI\" in English.', 'signature': 'EoMFCkgIBRABGAIqQOwp9d0YWm8NctfL9lf1MeWR1OxeAKB3Es19Lei2bdHQ4W0ezTK4wVcm/VLM+7kICX2aB9RAmUD5sJxoKHfdX38SDIR/aSJhHZifGOHqwBoMhzNsyPmB7FFNvNESIjBMVRpRUDTFGn5+nL0x5CjWhKA8H/XFnKYRrUyMYb1n7lCQA7BeEjsaWwxZ3YV9rZsq6APuaXaA40Bt+KnpPOo06r72L/DceliRAw1a6cuT5E0Dv0eIAOYblbXaKYn0jy8UzTUuctOP3As/zT5pK5yC+Rx0d2l9kuP3+COERM98u0R04bWn6qh0HcyE+zNc7c4YWkncjdmOxF/j6OxhcMhZEoX2035v9eUJ9+O/u1xaff08YAEfg7TGWrSIwalpjs1mzWA9ijKg8YyjmXjWnMeFn0z6LDqLaaKc+nC8IN9SLwA/eHpf/ayoEgmogn7gWzijW8MDbnlwpQDS75wK7An3RMEcpWD/OXrKb1EhWKEmOBro5BOTGsfK3ZDveRL0aCBINdOu+AHMQDFXJ04cRDEjs9GE3YC218UcFtS42TFO7/Ct5CYCTknETPx93zcGTOM2VPOZ02Uem1A7Nda/Fa4l2b03EUEtwlgske5K1RbeohN9sclxYsxX5nGJ5sSZurVCk9plkyTG3aiPvbohfVVarVgukKoKwoMDYz5rHVscWlUe+qeqJE/H+KKlhtzO+lWWDN4knqeYsZ55flO5Hq4vT20QCYnF8hcUx07ngGKXuGID9n5kFnLsP8sBUHYKm7bmopFFZvfPcmsqiV9yvG/8Ly9DHbmY5ZwxyrbdJCFT6HD6kq/mEBDftZ6dhmyKMimJBfbTj7d3VAILbRgB'}}, {'type': 'text', 'text': \"J'aime l'IA.\"}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(ai_msg.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a77519e5-897d-41a0-a9bb-55300fa79efc",
|
||||
@@ -379,7 +558,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -393,7 +572,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
"version": "3.12.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -92,7 +92,7 @@ the support of DB2 vector store and vector search.
|
||||
|
||||
See detailed usage examples in the guide [here](/docs/integrations/vectorstores/db2).
|
||||
|
||||
Installation: This is a seperate package for vector store feature only and can be run
|
||||
Installation: This is a separate package for vector store feature only and can be run
|
||||
without the `langchain-ibm` package.
|
||||
```bash
|
||||
pip install -U langchain-db2
|
||||
|
||||
@@ -20,7 +20,7 @@ pip install langchain-predictionguard
|
||||
|---|---|---|---------------------------------------------------------|-------------------------------------------------------------------------------|
|
||||
|Chat|Build Chat Bots|[Chat](https://docs.predictionguard.com/api-reference/api-reference/chat-completions)| `from langchain_predictionguard import ChatPredictionGuard` | [ChatPredictionGuard.ipynb](/docs/integrations/chat/predictionguard) |
|
||||
|Completions|Generate Text|[Completions](https://docs.predictionguard.com/api-reference/api-reference/completions)| `from langchain_predictionguard import PredictionGuard` | [PredictionGuard.ipynb](/docs/integrations/llms/predictionguard) |
|
||||
|Text Embedding|Embed String to Vectores|[Embeddings](https://docs.predictionguard.com/api-reference/api-reference/embeddings)| `from langchain_predictionguard import PredictionGuardEmbeddings` | [PredictionGuardEmbeddings.ipynb](/docs/integrations/text_embedding/predictionguard) |
|
||||
|Text Embedding|Embed String to Vectors|[Embeddings](https://docs.predictionguard.com/api-reference/api-reference/embeddings)| `from langchain_predictionguard import PredictionGuardEmbeddings` | [PredictionGuardEmbeddings.ipynb](/docs/integrations/text_embedding/predictionguard) |
|
||||
|
||||
## Getting Started
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# PremAI
|
||||
|
||||
[PremAI](https://premai.io/) is an all-in-one platform that simplifies the creation of robust, production-ready applications powered by Generative AI. By streamlining the development process, PremAI allows you to concentrate on enhancing user experience and driving overall growth for your application. You can quickly start using our platform [here](https://docs.premai.io/quick-start).
|
||||
|
||||
[PremAI](https://premai.io/) is an all-in-one platform that simplifies the creation of robust, production-ready applications powered by Generative AI. By streamlining the development process, PremAI allows you to concentrate on enhancing user experience and driving overall growth for your application. You can quickly start using [our platform](https://docs.premai.io/quick-start).
|
||||
|
||||
## ChatPremAI
|
||||
|
||||
@@ -26,10 +25,9 @@ from langchain_community.chat_models import ChatPremAI
|
||||
|
||||
Once we imported our required modules, let's setup our client. For now let's assume that our `project_id` is `8`. But make sure you use your project-id, otherwise it will throw error.
|
||||
|
||||
To use langchain with prem, you do not need to pass any model name or set any parameters with our chat-client. By default it will use the model name and parameters used in the [LaunchPad](https://docs.premai.io/get-started/launchpad).
|
||||
|
||||
> Note: If you change the `model` or any other parameters like `temperature` or `max_tokens` while setting the client, it will override existing default configurations, that was used in LaunchPad.
|
||||
To use langchain with prem, you do not need to pass any model name or set any parameters with our chat-client. By default it will use the model name and parameters used in the [LaunchPad](https://docs.premai.io/get-started/launchpad).
|
||||
|
||||
> Note: If you change the `model` or any other parameters like `temperature` or `max_tokens` while setting the client, it will override existing default configurations, that was used in LaunchPad.
|
||||
|
||||
```python
|
||||
import os
|
||||
@@ -43,9 +41,9 @@ chat = ChatPremAI(project_id=1234, model_name="gpt-4o")
|
||||
|
||||
### Chat Completions
|
||||
|
||||
`ChatPremAI` supports two methods: `invoke` (which is the same as `generate`) and `stream`.
|
||||
`ChatPremAI` supports two methods: `invoke` (which is the same as `generate`) and `stream`.
|
||||
|
||||
The first one will give us a static result. Whereas the second one will stream tokens one by one. Here's how you can generate chat-like completions.
|
||||
The first one will give us a static result. Whereas the second one will stream tokens one by one. Here's how you can generate chat-like completions.
|
||||
|
||||
```python
|
||||
human_message = HumanMessage(content="Who are you?")
|
||||
@@ -72,18 +70,17 @@ chat.invoke(
|
||||
)
|
||||
```
|
||||
|
||||
> If you are going to place system prompt here, then it will override your system prompt that was fixed while deploying the application from the platform.
|
||||
> If you are going to place system prompt here, then it will override your system prompt that was fixed while deploying the application from the platform.
|
||||
|
||||
> You can find all the optional parameters [here](https://docs.premai.io/get-started/sdk#optional-parameters). Any parameters other than [these supported parameters](https://docs.premai.io/get-started/sdk#optional-parameters) will be automatically removed before calling the model.
|
||||
|
||||
|
||||
### Native RAG Support with Prem Repositories
|
||||
|
||||
Prem Repositories which allows users to upload documents (.txt, .pdf etc) and connect those repositories to the LLMs. You can think Prem repositories as native RAG, where each repository can be considered as a vector database. You can connect multiple repositories. You can learn more about repositories [here](https://docs.premai.io/get-started/repositories).
|
||||
|
||||
Repositories are also supported in langchain premai. Here is how you can do it.
|
||||
Repositories are also supported in langchain premai. Here is how you can do it.
|
||||
|
||||
```python
|
||||
```python
|
||||
|
||||
query = "Which models are used for dense retrieval"
|
||||
repository_ids = [1985,]
|
||||
@@ -94,13 +91,13 @@ repositories = dict(
|
||||
)
|
||||
```
|
||||
|
||||
First we start by defining our repository with some repository ids. Make sure that the ids are valid repository ids. You can learn more about how to get the repository id [here](https://docs.premai.io/get-started/repositories).
|
||||
First we start by defining our repository with some repository ids. Make sure that the ids are valid repository ids. You can learn more about how to get the repository id [here](https://docs.premai.io/get-started/repositories).
|
||||
|
||||
> Please note: Similar like `model_name` when you invoke the argument `repositories`, then you are potentially overriding the repositories connected in the launchpad.
|
||||
> Please note: Similar like `model_name` when you invoke the argument `repositories`, then you are potentially overriding the repositories connected in the launchpad.
|
||||
|
||||
Now, we connect the repository with our chat object to invoke RAG based generations.
|
||||
Now, we connect the repository with our chat object to invoke RAG based generations.
|
||||
|
||||
```python
|
||||
```python
|
||||
import json
|
||||
|
||||
response = chat.invoke(query, max_tokens=100, repositories=repositories)
|
||||
@@ -109,7 +106,7 @@ print(response.content)
|
||||
print(json.dumps(response.response_metadata, indent=4))
|
||||
```
|
||||
|
||||
This is how an output looks like.
|
||||
This is how an output looks like.
|
||||
|
||||
```bash
|
||||
Dense retrieval models typically include:
|
||||
@@ -134,11 +131,11 @@ Dense retrieval models typically include:
|
||||
|
||||
So, this also means that you do not need to make your own RAG pipeline when using the Prem Platform. Prem uses it's own RAG technology to deliver best in class performance for Retrieval Augmented Generations.
|
||||
|
||||
> Ideally, you do not need to connect Repository IDs here to get Retrieval Augmented Generations. You can still get the same result if you have connected the repositories in prem platform.
|
||||
> Ideally, you do not need to connect Repository IDs here to get Retrieval Augmented Generations. You can still get the same result if you have connected the repositories in prem platform.
|
||||
|
||||
### Streaming
|
||||
|
||||
In this section, let's see how we can stream tokens using langchain and PremAI. Here's how you do it.
|
||||
In this section, let's see how we can stream tokens using langchain and PremAI. Here's how you do it.
|
||||
|
||||
```python
|
||||
import sys
|
||||
@@ -163,16 +160,15 @@ for chunk in chat.stream(
|
||||
|
||||
This will stream tokens one after the other.
|
||||
|
||||
> Please note: As of now, RAG with streaming is not supported. However we still support it with our API. You can learn more about that [here](https://docs.premai.io/get-started/chat-completion-sse).
|
||||
|
||||
> Please note: As of now, RAG with streaming is not supported. However we still support it with our API. You can learn more about that [here](https://docs.premai.io/get-started/chat-completion-sse).
|
||||
|
||||
## Prem Templates
|
||||
|
||||
Writing Prompt Templates can be super messy. Prompt templates are long, hard to manage, and must be continuously tweaked to improve and keep the same throughout the application.
|
||||
Writing Prompt Templates can be super messy. Prompt templates are long, hard to manage, and must be continuously tweaked to improve and keep the same throughout the application.
|
||||
|
||||
With **Prem**, writing and managing prompts can be super easy. The **_Templates_** tab inside the [launchpad](https://docs.premai.io/get-started/launchpad) helps you write as many prompts you need and use it inside the SDK to make your application running using those prompts. You can read more about Prompt Templates [here](https://docs.premai.io/get-started/prem-templates).
|
||||
With **Prem**, writing and managing prompts can be super easy. The **_Templates_** tab inside the [launchpad](https://docs.premai.io/get-started/launchpad) helps you write as many prompts you need and use it inside the SDK to make your application running using those prompts. You can read more about Prompt Templates [here](https://docs.premai.io/get-started/prem-templates).
|
||||
|
||||
To use Prem Templates natively with LangChain, you need to pass an id the `HumanMessage`. This id should be the name the variable of your prompt template. the `content` in `HumanMessage` should be the value of that variable.
|
||||
To use Prem Templates natively with LangChain, you need to pass an id the `HumanMessage`. This id should be the name the variable of your prompt template. the `content` in `HumanMessage` should be the value of that variable.
|
||||
|
||||
let's say for example, if your prompt template was this:
|
||||
|
||||
@@ -198,7 +194,7 @@ template_id = "78069ce8-xxxxx-xxxxx-xxxx-xxx"
|
||||
response = chat.invoke([human_message], template_id=template_id)
|
||||
```
|
||||
|
||||
Prem Templates are also available for Streaming too.
|
||||
Prem Templates are also available for Streaming too.
|
||||
|
||||
## Prem Embeddings
|
||||
|
||||
@@ -215,7 +211,7 @@ if os.environ.get("PREMAI_API_KEY") is None:
|
||||
|
||||
```
|
||||
|
||||
We support lots of state of the art embedding models. You can view our list of supported LLMs and embedding models [here](https://docs.premai.io/get-started/supported-models). For now let's go for `text-embedding-3-large` model for this example. .
|
||||
We support lots of state of the art embedding models. You can view our list of supported LLMs and embedding models [here](https://docs.premai.io/get-started/supported-models). For now let's go for `text-embedding-3-large` model for this example. .
|
||||
|
||||
```python
|
||||
|
||||
@@ -231,7 +227,7 @@ print(query_result[:5])
|
||||
```
|
||||
|
||||
:::note
|
||||
Setting `model_name` argument in mandatory for PremAIEmbeddings unlike chat.
|
||||
Setting `model_name` argument in mandatory for PremAIEmbeddings unlike chat.
|
||||
:::
|
||||
|
||||
Finally, let's embed some sample document
|
||||
@@ -254,11 +250,13 @@ print(doc_result[0][:5])
|
||||
```python
|
||||
print(f"Dimension of embeddings: {len(query_result)}")
|
||||
```
|
||||
|
||||
Dimension of embeddings: 3072
|
||||
|
||||
```python
|
||||
doc_result[:5]
|
||||
```
|
||||
|
||||
>Result:
|
||||
>
|
||||
>[-0.02129288576543331,
|
||||
@@ -269,20 +267,20 @@ doc_result[:5]
|
||||
|
||||
## Tool/Function Calling
|
||||
|
||||
LangChain PremAI supports tool/function calling. Tool/function calling allows a model to respond to a given prompt by generating output that matches a user-defined schema.
|
||||
LangChain PremAI supports tool/function calling. Tool/function calling allows a model to respond to a given prompt by generating output that matches a user-defined schema.
|
||||
|
||||
- You can learn all about tool calling in details [in our documentation here](https://docs.premai.io/get-started/function-calling).
|
||||
- You can learn more about langchain tool calling in [this part of the docs](https://python.langchain.com/v0.1/docs/modules/model_io/chat/function_calling).
|
||||
|
||||
**NOTE:**
|
||||
|
||||
> The current version of LangChain ChatPremAI do not support function/tool calling with streaming support. Streaming support along with function calling will come soon.
|
||||
> The current version of LangChain ChatPremAI do not support function/tool calling with streaming support. Streaming support along with function calling will come soon.
|
||||
|
||||
### Passing tools to model
|
||||
|
||||
In order to pass tools and let the LLM choose the tool it needs to call, we need to pass a tool schema. A tool schema is the function definition along with proper docstring on what does the function do, what each argument of the function is etc. Below are some simple arithmetic functions with their schema.
|
||||
In order to pass tools and let the LLM choose the tool it needs to call, we need to pass a tool schema. A tool schema is the function definition along with proper docstring on what does the function do, what each argument of the function is etc. Below are some simple arithmetic functions with their schema.
|
||||
|
||||
**NOTE:**
|
||||
**NOTE:**
|
||||
> When defining function/tool schema, do not forget to add information around the function arguments, otherwise it would throw error.
|
||||
|
||||
```python
|
||||
@@ -320,27 +318,28 @@ def multiply(a: int, b: int) -> int:
|
||||
|
||||
### Binding tool schemas with our LLM
|
||||
|
||||
We will now use the `bind_tools` method to convert our above functions to a "tool" and binding it with the model. This means we are going to pass these tool information everytime we invoke the model.
|
||||
We will now use the `bind_tools` method to convert our above functions to a "tool" and binding it with the model. This means we are going to pass these tool information every time we invoke the model.
|
||||
|
||||
```python
|
||||
tools = [add, multiply]
|
||||
llm_with_tools = chat.bind_tools(tools)
|
||||
```
|
||||
|
||||
After this, we get the response from the model which is now binded with the tools.
|
||||
After this, we get the response from the model which is now binded with the tools.
|
||||
|
||||
```python
|
||||
```python
|
||||
query = "What is 3 * 12? Also, what is 11 + 49?"
|
||||
|
||||
messages = [HumanMessage(query)]
|
||||
ai_msg = llm_with_tools.invoke(messages)
|
||||
```
|
||||
|
||||
As we can see, when our chat model is binded with tools, then based on the given prompt, it calls the correct set of the tools and sequentially.
|
||||
As we can see, when our chat model is binded with tools, then based on the given prompt, it calls the correct set of the tools and sequentially.
|
||||
|
||||
```python
|
||||
```python
|
||||
ai_msg.tool_calls
|
||||
```
|
||||
|
||||
**Output**
|
||||
|
||||
```python
|
||||
@@ -352,15 +351,15 @@ ai_msg.tool_calls
|
||||
'id': 'call_MPKYGLHbf39csJIyb5BZ9xIk'}]
|
||||
```
|
||||
|
||||
We append this message shown above to the LLM which acts as a context and makes the LLM aware that what all functions it has called.
|
||||
We append this message shown above to the LLM which acts as a context and makes the LLM aware that what all functions it has called.
|
||||
|
||||
```python
|
||||
```python
|
||||
messages.append(ai_msg)
|
||||
```
|
||||
|
||||
Since tool calling happens into two phases, where:
|
||||
|
||||
1. in our first call, we gathered all the tools that the LLM decided to tool, so that it can get the result as an added context to give more accurate and hallucination free result.
|
||||
1. in our first call, we gathered all the tools that the LLM decided to tool, so that it can get the result as an added context to give more accurate and hallucination free result.
|
||||
|
||||
2. in our second call, we will parse those set of tools decided by LLM and run them (in our case it will be the functions we defined, with the LLM's extracted arguments) and pass this result to the LLM
|
||||
|
||||
@@ -373,12 +372,13 @@ for tool_call in ai_msg.tool_calls:
|
||||
messages.append(ToolMessage(tool_output, tool_call_id=tool_call["id"]))
|
||||
```
|
||||
|
||||
Finally, we call the LLM (binded with the tools) with the function response added in it's context.
|
||||
Finally, we call the LLM (binded with the tools) with the function response added in it's context.
|
||||
|
||||
```python
|
||||
response = llm_with_tools.invoke(messages)
|
||||
print(response.content)
|
||||
```
|
||||
|
||||
**Output**
|
||||
|
||||
```txt
|
||||
@@ -425,4 +425,4 @@ chain.invoke(query)
|
||||
[multiply(a=3, b=12), add(a=11, b=49)]
|
||||
```
|
||||
|
||||
Now, as done above, we parse this and run this functions and call the LLM once again to get the result.
|
||||
Now, as done above, we parse this and run this functions and call the LLM once again to get the result.
|
||||
|
||||
@@ -265,13 +265,7 @@
|
||||
"cell_type": "markdown",
|
||||
"id": "08437fa2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"### Vector search\n",
|
||||
"\n",
|
||||
"Dense vector retrival using fake embeddings in this example."
|
||||
]
|
||||
"source": "## Instantiation\n\n### Vector search\n\nDense vector retrieval using fake embeddings in this example."
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -713,4 +707,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
@@ -1,265 +1,267 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Cohere\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# CohereEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Cohere embedding models using LangChain. For detailed documentation on `CohereEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/cohere/embeddings/langchain_cohere.embeddings.CohereEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Cohere\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Cohere embedding models you'll need to create a/an Cohere account, get an API key, and install the `langchain-cohere` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Head to [cohere.com](https://cohere.com) to sign up to Cohere and generate an API key. Once you’ve done this set the COHERE_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"COHERE_API_KEY\"):\n",
|
||||
" os.environ[\"COHERE_API_KEY\"] = getpass.getpass(\"Enter your Cohere API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": "To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Cohere integration lives in the `langchain-cohere` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-cohere"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_cohere import CohereEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = CohereEmbeddings(\n",
|
||||
" model=\"embed-english-v3.0\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.022979736, -0.030212402, -0.08886719, -0.08569336, 0.007030487, -0.0010671616, -0.033813477, 0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.028869629, -0.030410767, -0.099121094, -0.07116699, -0.012748718, -0.0059432983, -0.04360962, 0.\n",
|
||||
"[-0.047332764, -0.049957275, -0.07458496, -0.034332275, -0.057922363, -0.0112838745, -0.06994629, 0.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `CohereEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/cohere/embeddings/langchain_cohere.embeddings.CohereEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Cohere\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# CohereEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Cohere embedding models using LangChain. For detailed documentation on `CohereEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/cohere/embeddings/langchain_cohere.embeddings.CohereEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Cohere\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Cohere embedding models you'll need to create a/an Cohere account, get an API key, and install the `langchain-cohere` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Head to [cohere.com](https://cohere.com) to sign up to Cohere and generate an API key. Once you’ve done this set the COHERE_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"COHERE_API_KEY\"):\n",
|
||||
" os.environ[\"COHERE_API_KEY\"] = getpass.getpass(\"Enter your Cohere API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Cohere integration lives in the `langchain-cohere` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-cohere"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_cohere import CohereEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = CohereEmbeddings(\n",
|
||||
" model=\"embed-english-v3.0\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.022979736, -0.030212402, -0.08886719, -0.08569336, 0.007030487, -0.0010671616, -0.033813477, 0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.028869629, -0.030410767, -0.099121094, -0.07116699, -0.012748718, -0.0059432983, -0.04360962, 0.\n",
|
||||
"[-0.047332764, -0.049957275, -0.07458496, -0.034332275, -0.057922363, -0.0112838745, -0.06994629, 0.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `CohereEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/cohere/embeddings/langchain_cohere.embeddings.CohereEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
@@ -125,7 +125,7 @@
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
@@ -264,7 +264,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,265 +1,267 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Fireworks\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# FireworksEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Fireworks embedding models using LangChain. For detailed documentation on `FireworksEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/fireworks/embeddings/langchain_fireworks.embeddings.FireworksEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Fireworks\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Fireworks embedding models you'll need to create a Fireworks account, get an API key, and install the `langchain-fireworks` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [fireworks.ai](https://fireworks.ai/) to sign up to Fireworks and generate an API key. Once you’ve done this set the FIREWORKS_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"FIREWORKS_API_KEY\"):\n",
|
||||
" os.environ[\"FIREWORKS_API_KEY\"] = getpass.getpass(\"Enter your Fireworks API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": "To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Fireworks integration lives in the `langchain-fireworks` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-fireworks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_fireworks import FireworksEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = FireworksEmbeddings(\n",
|
||||
" model=\"nomic-ai/nomic-embed-text-v1.5\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.01666259765625, 0.011688232421875, -0.1181640625, -0.10205078125, 0.05438232421875, -0.0890502929\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.016632080078125, 0.01165008544921875, -0.1181640625, -0.10186767578125, 0.05438232421875, -0.0890\n",
|
||||
"[-0.02667236328125, 0.036651611328125, -0.1630859375, -0.0904541015625, -0.022430419921875, -0.09545\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3fba556a-b53d-431c-b0c6-ffb1e2fa5a6e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `FireworksEmbeddings` features and configurations head to the [API reference](https://python.langchain.com/api_reference/fireworks/embeddings/langchain_fireworks.embeddings.FireworksEmbeddings.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Fireworks\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# FireworksEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Fireworks embedding models using LangChain. For detailed documentation on `FireworksEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/fireworks/embeddings/langchain_fireworks.embeddings.FireworksEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Fireworks\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Fireworks embedding models you'll need to create a Fireworks account, get an API key, and install the `langchain-fireworks` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [fireworks.ai](https://fireworks.ai/) to sign up to Fireworks and generate an API key. Once you’ve done this set the FIREWORKS_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"FIREWORKS_API_KEY\"):\n",
|
||||
" os.environ[\"FIREWORKS_API_KEY\"] = getpass.getpass(\"Enter your Fireworks API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Fireworks integration lives in the `langchain-fireworks` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-fireworks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_fireworks import FireworksEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = FireworksEmbeddings(\n",
|
||||
" model=\"nomic-ai/nomic-embed-text-v1.5\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.01666259765625, 0.011688232421875, -0.1181640625, -0.10205078125, 0.05438232421875, -0.0890502929\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.016632080078125, 0.01165008544921875, -0.1181640625, -0.10186767578125, 0.05438232421875, -0.0890\n",
|
||||
"[-0.02667236328125, 0.036651611328125, -0.1630859375, -0.0904541015625, -0.022430419921875, -0.09545\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3fba556a-b53d-431c-b0c6-ffb1e2fa5a6e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `FireworksEmbeddings` features and configurations head to the [API reference](https://python.langchain.com/api_reference/fireworks/embeddings/langchain_fireworks.embeddings.FireworksEmbeddings.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
@@ -203,7 +203,7 @@
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
@@ -327,7 +327,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "langchain_ibm",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -341,9 +341,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.12"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -132,7 +132,7 @@
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
@@ -286,7 +286,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,264 +1,266 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: MistralAI\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MistralAIEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with MistralAI embedding models using LangChain. For detailed documentation on `MistralAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/mistralai/embeddings/langchain_mistralai.embeddings.MistralAIEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"MistralAI\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access MistralAI embedding models you'll need to create a/an MistralAI account, get an API key, and install the `langchain-mistralai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [https://console.mistral.ai/](https://console.mistral.ai/) to sign up to MistralAI and generate an API key. Once you've done this set the MISTRALAI_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"MISTRALAI_API_KEY\"):\n",
|
||||
" os.environ[\"MISTRALAI_API_KEY\"] = getpass.getpass(\"Enter your MistralAI API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": "To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain MistralAI integration lives in the `langchain-mistralai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-mistralai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_mistralai import MistralAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = MistralAIEmbeddings(\n",
|
||||
" model=\"mistral-embed\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.04443359375, 0.01885986328125, 0.018035888671875, -0.00864410400390625, 0.049652099609375, -0.00\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.04443359375, 0.01885986328125, 0.0180511474609375, -0.0086517333984375, 0.049652099609375, -0.00\n",
|
||||
"[-0.02032470703125, 0.02606201171875, 0.051605224609375, -0.0281982421875, 0.055755615234375, 0.0019\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `MistralAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/mistralai/embeddings/langchain_mistralai.embeddings.MistralAIEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: MistralAI\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MistralAIEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with MistralAI embedding models using LangChain. For detailed documentation on `MistralAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/mistralai/embeddings/langchain_mistralai.embeddings.MistralAIEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"MistralAI\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access MistralAI embedding models you'll need to create a/an MistralAI account, get an API key, and install the `langchain-mistralai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [https://console.mistral.ai/](https://console.mistral.ai/) to sign up to MistralAI and generate an API key. Once you've done this set the MISTRALAI_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"MISTRALAI_API_KEY\"):\n",
|
||||
" os.environ[\"MISTRALAI_API_KEY\"] = getpass.getpass(\"Enter your MistralAI API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain MistralAI integration lives in the `langchain-mistralai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-mistralai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_mistralai import MistralAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = MistralAIEmbeddings(\n",
|
||||
" model=\"mistral-embed\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.04443359375, 0.01885986328125, 0.018035888671875, -0.00864410400390625, 0.049652099609375, -0.00\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.04443359375, 0.01885986328125, 0.0180511474609375, -0.0086517333984375, 0.049652099609375, -0.00\n",
|
||||
"[-0.02032470703125, 0.02606201171875, 0.051605224609375, -0.0281982421875, 0.055755615234375, 0.0019\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `MistralAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/mistralai/embeddings/langchain_mistralai.embeddings.MistralAIEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
@@ -128,7 +128,7 @@
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
@@ -277,7 +277,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.16"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -112,7 +112,7 @@
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
@@ -249,7 +249,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -44,15 +45,14 @@
|
||||
"start_time": "2025-03-20T01:53:27.764291Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"NETMIND_API_KEY\"):\n",
|
||||
" os.environ[\"NETMIND_API_KEY\"] = getpass.getpass(\"Enter your Netmind API key: \")"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 1
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -64,6 +64,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -71,12 +72,11 @@
|
||||
"start_time": "2025-03-20T01:53:32.141858Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 2
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -90,6 +90,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "64853226",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -97,22 +98,21 @@
|
||||
"start_time": "2025-03-20T01:53:36.171640Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"%pip install -qU langchain-netmind"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\r\n",
|
||||
"\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m24.0\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m25.0.1\u001B[0m\r\n",
|
||||
"\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\r\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\r\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\r\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 3
|
||||
"source": [
|
||||
"%pip install -qU langchain-netmind"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -126,6 +126,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -133,15 +134,14 @@
|
||||
"start_time": "2025-03-20T01:54:30.146876Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_netmind import NetmindEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = NetmindEmbeddings(\n",
|
||||
" model=\"nvidia/NV-Embed-v2\",\n",
|
||||
")"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 4
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -150,13 +150,14 @@
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -164,6 +165,18 @@
|
||||
"start_time": "2025-03-20T01:54:34.500805Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
@@ -183,20 +196,7 @@
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": 5
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -216,6 +216,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -223,10 +224,6 @@
|
||||
"start_time": "2025-03-20T01:54:45.196528Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -236,7 +233,10 @@
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 6
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -250,6 +250,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -257,14 +258,6 @@
|
||||
"start_time": "2025-03-20T01:54:52.468719Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -275,7 +268,14 @@
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 7
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -291,12 +291,12 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": "",
|
||||
"id": "adb9e45c34733299"
|
||||
"id": "adb9e45c34733299",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -315,7 +315,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,285 +1,287 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Nomic\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# NomicEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Nomic embedding models using LangChain. For detailed documentation on `NomicEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/nomic/embeddings/langchain_nomic.embeddings.NomicEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Nomic\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Nomic embedding models you'll need to create a/an Nomic account, get an API key, and install the `langchain-nomic` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [https://atlas.nomic.ai/](https://atlas.nomic.ai/) to sign up to Nomic and generate an API key. Once you've done this set the `NOMIC_API_KEY` environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"NOMIC_API_KEY\"):\n",
|
||||
" os.environ[\"NOMIC_API_KEY\"] = getpass.getpass(\"Enter your Nomic API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": "To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Nomic integration lives in the `langchain-nomic` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU langchain-nomic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_nomic import NomicEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = NomicEmbeddings(\n",
|
||||
" model=\"nomic-embed-text-v1.5\",\n",
|
||||
" # dimensionality=256,\n",
|
||||
" # Nomic's `nomic-embed-text-v1.5` model was [trained with Matryoshka learning](https://blog.nomic.ai/posts/nomic-embed-matryoshka)\n",
|
||||
" # to enable variable-length embeddings with a single model.\n",
|
||||
" # This means that you can specify the dimensionality of the embeddings at inference time.\n",
|
||||
" # The model supports dimensionality from 64 to 768.\n",
|
||||
" # inference_mode=\"remote\",\n",
|
||||
" # One of `remote`, `local` (Embed4All), or `dynamic` (automatic). Defaults to `remote`.\n",
|
||||
" # api_key=... , # if using remote inference,\n",
|
||||
" # device=\"cpu\",\n",
|
||||
" # The device to use for local embeddings. Choices include\n",
|
||||
" # `cpu`, `gpu`, `nvidia`, `amd`, or a specific device name. See\n",
|
||||
" # the docstring for `GPT4All.__init__` for more info. Typically\n",
|
||||
" # defaults to CPU. Do not use on macOS.\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.024642944, 0.029083252, -0.14013672, -0.09082031, 0.058898926, -0.07489014, -0.0138168335, 0.0037\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.012771606, 0.023727417, -0.12365723, -0.083740234, 0.06530762, -0.07110596, -0.021896362, -0.0068\n",
|
||||
"[-0.019058228, 0.04058838, -0.15222168, -0.06842041, -0.012130737, -0.07128906, -0.04534912, 0.00522\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `NomicEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/nomic/embeddings/langchain_nomic.embeddings.NomicEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Nomic\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# NomicEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Nomic embedding models using LangChain. For detailed documentation on `NomicEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/nomic/embeddings/langchain_nomic.embeddings.NomicEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Nomic\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Nomic embedding models you'll need to create a/an Nomic account, get an API key, and install the `langchain-nomic` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [https://atlas.nomic.ai/](https://atlas.nomic.ai/) to sign up to Nomic and generate an API key. Once you've done this set the `NOMIC_API_KEY` environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"NOMIC_API_KEY\"):\n",
|
||||
" os.environ[\"NOMIC_API_KEY\"] = getpass.getpass(\"Enter your Nomic API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Nomic integration lives in the `langchain-nomic` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU langchain-nomic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_nomic import NomicEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = NomicEmbeddings(\n",
|
||||
" model=\"nomic-embed-text-v1.5\",\n",
|
||||
" # dimensionality=256,\n",
|
||||
" # Nomic's `nomic-embed-text-v1.5` model was [trained with Matryoshka learning](https://blog.nomic.ai/posts/nomic-embed-matryoshka)\n",
|
||||
" # to enable variable-length embeddings with a single model.\n",
|
||||
" # This means that you can specify the dimensionality of the embeddings at inference time.\n",
|
||||
" # The model supports dimensionality from 64 to 768.\n",
|
||||
" # inference_mode=\"remote\",\n",
|
||||
" # One of `remote`, `local` (Embed4All), or `dynamic` (automatic). Defaults to `remote`.\n",
|
||||
" # api_key=... , # if using remote inference,\n",
|
||||
" # device=\"cpu\",\n",
|
||||
" # The device to use for local embeddings. Choices include\n",
|
||||
" # `cpu`, `gpu`, `nvidia`, `amd`, or a specific device name. See\n",
|
||||
" # the docstring for `GPT4All.__init__` for more info. Typically\n",
|
||||
" # defaults to CPU. Do not use on macOS.\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.024642944, 0.029083252, -0.14013672, -0.09082031, 0.058898926, -0.07489014, -0.0138168335, 0.0037\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.012771606, 0.023727417, -0.12365723, -0.083740234, 0.06530762, -0.07110596, -0.021896362, -0.0068\n",
|
||||
"[-0.019058228, 0.04058838, -0.15222168, -0.06842041, -0.012130737, -0.07128906, -0.04534912, 0.00522\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `NomicEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/nomic/embeddings/langchain_nomic.embeddings.NomicEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
@@ -1,270 +1,272 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: OpenAI\n",
|
||||
"keywords: [openaiembeddings]\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with OpenAI embedding models using LangChain. For detailed documentation on `OpenAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/openai/embeddings/langchain_openai.embeddings.base.OpenAIEmbeddings.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"OpenAI\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access OpenAI embedding models you'll need to create a/an OpenAI account, get an API key, and install the `langchain-openai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [platform.openai.com](https://platform.openai.com) to sign up to OpenAI and generate an API key. Once you’ve done this set the OPENAI_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"OPENAI_API_KEY\"):\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": "To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain OpenAI integration lives in the `langchain-openai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(\n",
|
||||
" model=\"text-embedding-3-large\",\n",
|
||||
" # With the `text-embedding-3` class\n",
|
||||
" # of models, you can specify the size\n",
|
||||
" # of the embeddings you want returned.\n",
|
||||
" # dimensions=1024\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.019276829436421394, 0.0037708976306021214, -0.03294256329536438, 0.0037671267054975033, 0.008175\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.019260549917817116, 0.0037612367887049913, -0.03291035071015358, 0.003757466096431017, 0.0082049\n",
|
||||
"[-0.010181212797760963, 0.023419594392180443, -0.04215526953339577, -0.001532090245746076, -0.023573\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `OpenAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/openai/embeddings/langchain_openai.embeddings.base.OpenAIEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: OpenAI\n",
|
||||
"keywords: [openaiembeddings]\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with OpenAI embedding models using LangChain. For detailed documentation on `OpenAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/openai/embeddings/langchain_openai.embeddings.base.OpenAIEmbeddings.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"OpenAI\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access OpenAI embedding models you'll need to create a/an OpenAI account, get an API key, and install the `langchain-openai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [platform.openai.com](https://platform.openai.com) to sign up to OpenAI and generate an API key. Once you’ve done this set the OPENAI_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"OPENAI_API_KEY\"):\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain OpenAI integration lives in the `langchain-openai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(\n",
|
||||
" model=\"text-embedding-3-large\",\n",
|
||||
" # With the `text-embedding-3` class\n",
|
||||
" # of models, you can specify the size\n",
|
||||
" # of the embeddings you want returned.\n",
|
||||
" # dimensions=1024\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.019276829436421394, 0.0037708976306021214, -0.03294256329536438, 0.0037671267054975033, 0.008175\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.019260549917817116, 0.0037612367887049913, -0.03291035071015358, 0.003757466096431017, 0.0082049\n",
|
||||
"[-0.010181212797760963, 0.023419594392180443, -0.04215526953339577, -0.001532090245746076, -0.023573\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `OpenAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/openai/embeddings/langchain_openai.embeddings.base.OpenAIEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
@@ -133,7 +133,7 @@
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
@@ -244,7 +244,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -141,7 +141,7 @@
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
@@ -252,7 +252,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,275 +1,277 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Together AI\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# TogetherEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Together embedding models using LangChain. For detailed documentation on `TogetherEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/together/embeddings/langchain_together.embeddings.TogetherEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Together\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Together embedding models you'll need to create a/an Together account, get an API key, and install the `langchain-together` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [https://api.together.xyz/](https://api.together.xyz/) to sign up to Together and generate an API key. Once you've done this set the TOGETHER_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"TOGETHER_API_KEY\"):\n",
|
||||
" os.environ[\"TOGETHER_API_KEY\"] = getpass.getpass(\"Enter your Together API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": "To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Together integration lives in the `langchain-together` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU langchain-together"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_together import TogetherEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = TogetherEmbeddings(\n",
|
||||
" model=\"togethercomputer/m2-bert-80M-8k-retrieval\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.3812227, -0.052848946, -0.10564975, 0.03480297, 0.2878488, 0.0084609175, 0.11605915, 0.05303011, \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.3812227, -0.052848946, -0.10564975, 0.03480297, 0.2878488, 0.0084609175, 0.11605915, 0.05303011, \n",
|
||||
"[0.066308185, -0.032866564, 0.115751594, 0.19082588, 0.14017, -0.26976448, -0.056340694, -0.26923394\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `TogetherEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/together/embeddings/langchain_together.embeddings.TogetherEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Together AI\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# TogetherEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Together embedding models using LangChain. For detailed documentation on `TogetherEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/together/embeddings/langchain_together.embeddings.TogetherEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Together\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Together embedding models you'll need to create a/an Together account, get an API key, and install the `langchain-together` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [https://api.together.xyz/](https://api.together.xyz/) to sign up to Together and generate an API key. Once you've done this set the TOGETHER_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"TOGETHER_API_KEY\"):\n",
|
||||
" os.environ[\"TOGETHER_API_KEY\"] = getpass.getpass(\"Enter your Together API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Together integration lives in the `langchain-together` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU langchain-together"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_together import TogetherEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = TogetherEmbeddings(\n",
|
||||
" model=\"togethercomputer/m2-bert-80M-8k-retrieval\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.3812227, -0.052848946, -0.10564975, 0.03480297, 0.2878488, 0.0084609175, 0.11605915, 0.05303011, \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.3812227, -0.052848946, -0.10564975, 0.03480297, 0.2878488, 0.0084609175, 0.11605915, 0.05303011, \n",
|
||||
"[0.066308185, -0.032866564, 0.115751594, 0.19082588, 0.14017, -0.26976448, -0.056340694, -0.26923394\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `TogetherEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/together/embeddings/langchain_together.embeddings.TogetherEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
@@ -1,277 +1,279 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: ZhipuAI\n",
|
||||
"keywords: [zhipuaiembeddings]\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ZhipuAIEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with ZhipuAI embedding models using LangChain. For detailed documentation on `ZhipuAIEmbeddings` features and configuration options, please refer to the [API reference](https://bigmodel.cn/dev/api#vector).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Provider | Package |\n",
|
||||
"|:--------:|:-------:|\n",
|
||||
"| [ZhipuAI](/docs/integrations/providers/zhipuai/) | [langchain-community](https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.zhipuai.ZhipuAIEmbeddings.html) |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access ZhipuAI embedding models you'll need to create a/an ZhipuAI account, get an API key, and install the `zhipuai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [https://bigmodel.cn/](https://bigmodel.cn/usercenter/apikeys) to sign up to ZhipuAI and generate an API key. Once you've done this set the ZHIPUAI_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"ZHIPUAI_API_KEY\"):\n",
|
||||
" os.environ[\"ZHIPUAI_API_KEY\"] = getpass.getpass(\"Enter your ZhipuAI API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": "To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain ZhipuAI integration lives in the `zhipuai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU zhipuai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.embeddings import ZhipuAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = ZhipuAIEmbeddings(\n",
|
||||
" model=\"embedding-3\",\n",
|
||||
" # With the `embedding-3` class\n",
|
||||
" # of models, you can specify the size\n",
|
||||
" # of the embeddings you want returned.\n",
|
||||
" # dimensions=1024\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.022979736, 0.007785797, 0.04598999, 0.012741089, -0.01689148, 0.008277893, 0.016464233, 0.009246\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.022979736, 0.007785797, 0.04598999, 0.012741089, -0.01689148, 0.008277893, 0.016464233, 0.009246\n",
|
||||
"[-0.02330017, -0.013916016, 0.00022411346, 0.017196655, -0.034240723, 0.011131287, 0.011497498, -0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `ZhipuAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.zhipuai.ZhipuAIEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: ZhipuAI\n",
|
||||
"keywords: [zhipuaiembeddings]\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ZhipuAIEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with ZhipuAI embedding models using LangChain. For detailed documentation on `ZhipuAIEmbeddings` features and configuration options, please refer to the [API reference](https://bigmodel.cn/dev/api#vector).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Provider | Package |\n",
|
||||
"|:--------:|:-------:|\n",
|
||||
"| [ZhipuAI](/docs/integrations/providers/zhipuai/) | [langchain-community](https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.zhipuai.ZhipuAIEmbeddings.html) |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access ZhipuAI embedding models you'll need to create a/an ZhipuAI account, get an API key, and install the `zhipuai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [https://bigmodel.cn/](https://bigmodel.cn/usercenter/apikeys) to sign up to ZhipuAI and generate an API key. Once you've done this set the ZHIPUAI_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"ZHIPUAI_API_KEY\"):\n",
|
||||
" os.environ[\"ZHIPUAI_API_KEY\"] = getpass.getpass(\"Enter your ZhipuAI API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain ZhipuAI integration lives in the `zhipuai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU zhipuai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.embeddings import ZhipuAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = ZhipuAIEmbeddings(\n",
|
||||
" model=\"embedding-3\",\n",
|
||||
" # With the `embedding-3` class\n",
|
||||
" # of models, you can specify the size\n",
|
||||
" # of the embeddings you want returned.\n",
|
||||
" # dimensions=1024\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.022979736, 0.007785797, 0.04598999, 0.012741089, -0.01689148, 0.008277893, 0.016464233, 0.009246\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.022979736, 0.007785797, 0.04598999, 0.012741089, -0.01689148, 0.008277893, 0.016464233, 0.009246\n",
|
||||
"[-0.02330017, -0.013916016, 0.00022411346, 0.017196655, -0.034240723, 0.011131287, 0.011497498, -0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `ZhipuAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.zhipuai.ZhipuAIEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
@@ -11,6 +11,13 @@
|
||||
"\n",
|
||||
">[Chroma](https://docs.trychroma.com/getting-started) is a AI-native open-source vector database focused on developer productivity and happiness. Chroma is licensed under Apache 2.0. View the full docs of `Chroma` at [this page](https://docs.trychroma.com/reference/py-collection), and find the API reference for the LangChain integration at [this page](https://python.langchain.com/api_reference/chroma/vectorstores/langchain_chroma.vectorstores.Chroma.html).\n",
|
||||
"\n",
|
||||
":::info Chroma Cloud\n",
|
||||
"\n",
|
||||
"Chroma Cloud powers serverless vector and full-text search. It's extremely fast, cost-effective, scalable and painless. Create a DB and try it out in under 30 seconds with $5 of free credits.\n",
|
||||
"\n",
|
||||
"[Get started with Chroma Cloud](https://trychroma.com/signup)\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access `Chroma` vector stores you'll need to install the `langchain-chroma` integration package."
|
||||
@@ -33,7 +40,15 @@
|
||||
"source": [
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"You can use the `Chroma` vector store without any credentials, simply installing the package above is enough!"
|
||||
"You can use the `Chroma` vector store without any credentials, simply installing the package above is enough!\n",
|
||||
"\n",
|
||||
"If you are a [Chroma Cloud](https://trychroma.com/signup) user, set your `CHROMA_TENANT`, `CHROMA_DATABASE`, and `CHROMA_API_KEY` environment variables.\n",
|
||||
"\n",
|
||||
"When you install the `chromadb` package you also get access to the Chroma CLI, which can set these for you. First, [login](https://docs.trychroma.com/docs/cli/login) via the CLI, and then use the [`connect` command](https://docs.trychroma.com/docs/cli/db):\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"chroma db connect [db_name] --env-file\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -73,7 +88,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "d3ed0a9a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -85,9 +100,19 @@
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c6a43e25-227c-4e89-909f-3654fe2710fc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Running Locally (In-Memory)\n",
|
||||
"\n",
|
||||
"You can get a Chroma server running in memory by simply instantiating a `Chroma` instance with a collection name and your embeddings provider:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": null,
|
||||
"id": "3ea11a7b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -97,7 +122,104 @@
|
||||
"vector_store = Chroma(\n",
|
||||
" collection_name=\"example_collection\",\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
" persist_directory=\"./chroma_langchain_db\", # Where to save data locally, remove if not necessary\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "92d04cda-e8cc-48aa-9680-470304e3ff4c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you don't need data persistence, this is a great option for experimenting while building your AI application with Langchain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ad6adc53-4b3f-458e-8e2e-efcc3f99f0c5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Running Locally (with Data Persistence)\n",
|
||||
"\n",
|
||||
"You can provide the `persist_directory` argument to save your data across multiple runs of your program:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5a858e77-fd6d-44f0-840f-8f71eaeae6f7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_chroma import Chroma\n",
|
||||
"\n",
|
||||
"vector_store = Chroma(\n",
|
||||
" collection_name=\"example_collection\",\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
" persist_directory=\"./chroma_langchain_db\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "47bf272e-af0b-450e-8a86-3e8292273cde",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Connecting to a Chroma Server\n",
|
||||
"\n",
|
||||
"If you have a Chroma server running locally, or you have [deployed](https://docs.trychroma.com/guides/deploy/client-server-mode) one yourself, you can connect to it by providing the `host` argument.\n",
|
||||
"\n",
|
||||
"For example, you can start a Chroma server running locally with `chroma run`, and then connect it with `host='localhost'`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "679d619f-b8ee-4abb-8ac0-77ec859ddff1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_chroma import Chroma\n",
|
||||
"\n",
|
||||
"vector_store = Chroma(\n",
|
||||
" collection_name=\"example_collection\",\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
" host=\"localhost\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3c06ed9-c010-4764-bd6e-2a0c71201d5b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For other deployments you can use the `port`, `ssl`, and `headers` arguments to customize your connection."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0f3238e1-ca57-482d-878d-b09bd2c8015c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Chroma Cloud\n",
|
||||
"\n",
|
||||
"Chroma Cloud users can also build with Langchain. Provide your `Chroma` instance with your Chroma Cloud API key, tenant, and DB name:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e080d2d2-c501-467e-9842-e2045d86cdb5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_chroma import Chroma\n",
|
||||
"\n",
|
||||
"vector_store = Chroma(\n",
|
||||
" collection_name=\"example_collection\",\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
" chroma_cloud_api_key=os.getenv(\"CHROMA_API_KEY\"),\n",
|
||||
" tenant=os.getenv(\"CHROMA_TENANT\"),\n",
|
||||
" database=os.getenv(\"CHROMA_DATABASE\"),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -111,21 +233,132 @@
|
||||
"You can also initialize from a `Chroma` client, which is particularly useful if you want easier access to the underlying database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "38e9f893-60df-4a4f-b570-2d1c463cc1e4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Running Locally (In-Memory)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "3fe4457f",
|
||||
"execution_count": null,
|
||||
"id": "09bfb62f-7c6b-43d3-a69a-0601899c6942",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb\n",
|
||||
"\n",
|
||||
"persistent_client = chromadb.PersistentClient()\n",
|
||||
"collection = persistent_client.get_or_create_collection(\"collection_name\")\n",
|
||||
"collection.add(ids=[\"1\", \"2\", \"3\"], documents=[\"a\", \"b\", \"c\"])\n",
|
||||
"client = chromadb.Client()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f3eac2de-0cca-4d57-b67d-04cc78bb59c1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Running Locally (with Data Persistence)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "ffc7f2ad-0d6c-4911-a4cf-a82bf7649478",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb\n",
|
||||
"\n",
|
||||
"client = chromadb.PersistentClient(path=\"./chroma_langchain_db\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "41cc98d5-94f3-4a2f-903e-61c4a38d8f9c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Connecting to a Chroma Server\n",
|
||||
"\n",
|
||||
"For example, if you are running a Chroma server locally (using `chroma run`):"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bb5828e3-c0a5-4f97-8d2e-23d82257743e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb\n",
|
||||
"\n",
|
||||
"client = chromadb.HttpClient(host=\"localhost\", port=8000, ssl=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "254ecfdb-f247-4a3d-a52a-e515b17b7ba2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Chroma Cloud"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fbbf8042-7ae7-4221-96e3-dc2048dd0f45",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"After setting your `CHROMA_API_KEY`, `CHROMA_TENANT`, and `CHROMA_DATABASE`, you can simply instantiate:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "89e86a01-a347-4041-a4a1-01eecd299235",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb\n",
|
||||
"\n",
|
||||
"client = chromadb.CloudClient()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8fdd8bbb-45ab-43d8-bdc1-7220b14cfc52",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Access your Chroma DB"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6da21a1a-8d0d-4a4b-bac5-008839e89540",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"collection = client.get_or_create_collection(\"collection_name\")\n",
|
||||
"collection.add(ids=[\"1\", \"2\", \"3\"], documents=[\"a\", \"b\", \"c\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "581906ba-8082-450c-a3c4-19284539980b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Create a Chroma Vectorstore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3fe4457f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store_from_client = Chroma(\n",
|
||||
" client=persistent_client,\n",
|
||||
" client=client,\n",
|
||||
" collection_name=\"collection_name\",\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
")"
|
||||
@@ -147,30 +380,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": null,
|
||||
"id": "da279339",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['f22ed484-6db3-4b76-adb1-18a777426cd6',\n",
|
||||
" 'e0d5bab4-6453-4511-9a37-023d9d288faa',\n",
|
||||
" '877d76b8-3580-4d9e-a13f-eed0fa3d134a',\n",
|
||||
" '26eaccab-81ce-4c0a-8e76-bf542647df18',\n",
|
||||
" 'bcaa8239-7986-4050-bf40-e14fb7dab997',\n",
|
||||
" 'cdc44b38-a83f-4e49-b249-7765b334e09d',\n",
|
||||
" 'a7a35354-2687-4bc2-8242-3849a4d18d34',\n",
|
||||
" '8780caf1-d946-4f27-a707-67d037e9e1d8',\n",
|
||||
" 'dec6af2a-7326-408f-893d-7d7d717dfda9',\n",
|
||||
" '3b18e210-bb59-47a0-8e17-c8e51176ea5e']"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
@@ -265,7 +478,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"id": "ef5dbd1e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -301,7 +514,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"id": "56f17791",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -327,19 +540,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"id": "e2b96fcf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n",
|
||||
"* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
|
||||
@@ -362,18 +566,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": null,
|
||||
"id": "2768a331",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* [SIM=1.726390] The stock market is down 500 points today due to fears of a recession. [{'source': 'news'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_with_score(\n",
|
||||
" \"Will it be hot tomorrow?\", k=1, filter={\"source\": \"news\"}\n",
|
||||
@@ -394,18 +590,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": null,
|
||||
"id": "8ea434a5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* I had chocolate chip pancakes and fried eggs for breakfast this morning. [{'source': 'tweet'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_by_vector(\n",
|
||||
" embedding=embeddings.embed_query(\"I love green eggs and ham!\"), k=1\n",
|
||||
@@ -430,21 +618,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": null,
|
||||
"id": "7b6f7867",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = vector_store.as_retriever(\n",
|
||||
" search_type=\"mmr\", search_kwargs={\"k\": 1, \"fetch_k\": 5}\n",
|
||||
@@ -461,7 +638,7 @@
|
||||
"\n",
|
||||
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
|
||||
"\n",
|
||||
"- [Tutorials](/docs/tutorials/)\n",
|
||||
"- [Tutorials](/docs/tutorials/rag)\n",
|
||||
"- [How-to: Question and answer with RAG](https://python.langchain.com/docs/how_to/#qa-with-rag)\n",
|
||||
"- [Retrieval conceptual docs](https://python.langchain.com/docs/concepts/retrieval)"
|
||||
]
|
||||
@@ -493,7 +670,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.12.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
782
docs/docs/integrations/vectorstores/pgvectorstore.ipynb
Normal file
782
docs/docs/integrations/vectorstores/pgvectorstore.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -447,11 +447,7 @@
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We'll define a helper funciton to create a uuid for a document and associated vector embedding based on its timestamp. We'll use this function to create a uuid for each git log entry.\n",
|
||||
"\n",
|
||||
"Important note: If you are working with documents and want the current date and time associated with vector for time-based search, you can skip this step. A uuid will be automatically generated when the documents are ingested by default."
|
||||
]
|
||||
"source": "We'll define a helper function to create a uuid for a document and associated vector embedding based on its timestamp. We'll use this function to create a uuid for each git log entry.\n\nImportant note: If you are working with documents and want the current date and time associated with vector for time-based search, you can skip this step. A uuid will be automatically generated when the documents are ingested by default."
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -1729,4 +1725,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,6 @@
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from langchain_community import document_loaders
|
||||
from langchain_core.document_loaders.base import BaseLoader
|
||||
|
||||
KV_STORE_TEMPLATE = """\
|
||||
---
|
||||
sidebar_class_name: hidden
|
||||
|
||||
@@ -175,8 +175,23 @@ def _modify_frontmatter(
|
||||
def _convert_notebook(
|
||||
notebook_path: Path, output_path: Path, intermediate_docs_dir: Path
|
||||
) -> Path:
|
||||
with open(notebook_path) as f:
|
||||
nb = nbformat.read(f, as_version=4)
|
||||
import json
|
||||
import uuid
|
||||
|
||||
with open(notebook_path, "r", encoding="utf-8") as f:
|
||||
nb_json = json.load(f)
|
||||
|
||||
# Fix missing and duplicate cell IDs before nbformat validation
|
||||
seen_ids = set()
|
||||
for cell in nb_json.get("cells", []):
|
||||
if "id" not in cell or not cell.get("id") or cell.get("id") in seen_ids:
|
||||
cell["id"] = str(uuid.uuid4())[:8]
|
||||
seen_ids.add(cell["id"])
|
||||
|
||||
nb = nbformat.reads(json.dumps(nb_json), as_version=4)
|
||||
|
||||
# Upgrade notebook format
|
||||
nb = nbformat.v4.upgrade(nb)
|
||||
|
||||
body, resources = exporter.from_notebook_node(nb)
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ from langchain_couchbase import CouchbaseSearchVectorStore
|
||||
from langchain_milvus import Milvus
|
||||
from langchain_mongodb import MongoDBAtlasVectorSearch
|
||||
from langchain_pinecone import PineconeVectorStore
|
||||
from langchain_postgres import PGVectorStore
|
||||
from langchain_qdrant import QdrantVectorStore
|
||||
|
||||
vectorstore_list = [
|
||||
@@ -22,6 +23,7 @@ vectorstore_list = [
|
||||
]
|
||||
|
||||
from_partners = [
|
||||
("PGVectorStore", PGVectorStore),
|
||||
("Chroma", Chroma),
|
||||
("AstraDBVectorStore", AstraDBVectorStore),
|
||||
("QdrantVectorStore", QdrantVectorStore),
|
||||
@@ -52,6 +54,17 @@ The table below lists the features for some of our most popular vector stores.
|
||||
|
||||
def get_vectorstore_table():
|
||||
vectorstore_feat_table = {
|
||||
"PGVectorStore": {
|
||||
"Delete by ID": True,
|
||||
"Filtering": True,
|
||||
"similarity_search_by_vector": True,
|
||||
"similarity_search_with_score": True,
|
||||
"asearch": True,
|
||||
"Passes Standard Tests": True,
|
||||
"Multi Tenancy": False,
|
||||
"Local/Cloud": "Local",
|
||||
"IDs in add Documents": True,
|
||||
},
|
||||
"FAISS": {
|
||||
"Delete by ID": True,
|
||||
"Filtering": True,
|
||||
|
||||
@@ -315,8 +315,8 @@ module.exports = {
|
||||
},
|
||||
],
|
||||
link: {
|
||||
type: "doc",
|
||||
id: "integrations/stores/index",
|
||||
type: "generated-index",
|
||||
slug: "integrations/stores",
|
||||
},
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1029,7 +1029,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "Chroma",
|
||||
@@ -1039,10 +1039,10 @@ const FEATURE_TABLES = {
|
||||
searchByVector: true,
|
||||
searchWithScore: true,
|
||||
async: true,
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
passesStandardTests: true,
|
||||
multiTenancy: true,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "Clickhouse",
|
||||
@@ -1055,7 +1055,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "CouchbaseSearchVectorStore",
|
||||
@@ -1081,7 +1081,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: false,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "ElasticsearchStore",
|
||||
@@ -1094,7 +1094,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "FAISS",
|
||||
@@ -1107,7 +1107,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "InMemoryVectorStore",
|
||||
@@ -1120,7 +1120,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "Milvus",
|
||||
@@ -1146,7 +1146,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "openGauss",
|
||||
@@ -1172,7 +1172,20 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "PGVectorStore",
|
||||
link: "pgvectorstore",
|
||||
deleteById: true,
|
||||
filtering: true,
|
||||
searchByVector: true,
|
||||
searchWithScore: true,
|
||||
async: true,
|
||||
passesStandardTests: true,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "PineconeVectorStore",
|
||||
@@ -1185,7 +1198,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "QdrantVectorStore",
|
||||
@@ -1211,7 +1224,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "Weaviate",
|
||||
@@ -1224,7 +1237,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: true,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "SQLServer",
|
||||
@@ -1237,7 +1250,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: false,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
@@ -60,6 +60,13 @@ export default function VectorStoreTabs(props) {
|
||||
packageName: "langchain-postgres",
|
||||
default: false,
|
||||
},
|
||||
{
|
||||
value: "PGVectorStore",
|
||||
label: "PGVectorStore",
|
||||
text: `from langchain_postgres import PGEngine, PGVectorStore\n${useFakeEmbeddings ? fakeEmbeddingsString : ""}\n$engine = PGEngine.from_connection_string(\n url="postgresql+psycopg://..."\n)\n\n${vectorStoreVarName} = PGVectorStore.create_sync(\n engine=pg_engine,\n table_name='test_table',\n embedding_service=embedding\n)`,
|
||||
packageName: "langchain-postgres",
|
||||
default: false,
|
||||
},
|
||||
{
|
||||
value: "Pinecone",
|
||||
label: "Pinecone",
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
httpx
|
||||
grpcio
|
||||
aiohttp<3.11
|
||||
protobuf<3.21
|
||||
protobuf<5.0
|
||||
tenacity
|
||||
urllib3
|
||||
pypdf
|
||||
# Fix numpy conflicts between langchain-astradb and langchain-chroma
|
||||
numpy>=1.26.0,<2.0.0
|
||||
# Fix simsimd build error in langchain-weaviate
|
||||
simsimd>=5.0.0
|
||||
# Fix sentencepiece build error - use newer version that supports modern CMake
|
||||
sentencepiece>=0.2.1
|
||||
|
||||
@@ -67,12 +67,11 @@ def serve(
|
||||
] = None,
|
||||
) -> None:
|
||||
"""Start the LangServe app, whether it's a template or an app."""
|
||||
# see if is a template
|
||||
try:
|
||||
project_dir = get_package_root()
|
||||
pyproject = project_dir / "pyproject.toml"
|
||||
get_langserve_export(pyproject)
|
||||
except KeyError:
|
||||
except (KeyError, FileNotFoundError):
|
||||
# not a template
|
||||
app_namespace.serve(port=port, host=host)
|
||||
else:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.PHONY: all format lint test tests integration_tests docker_tests help extended_tests
|
||||
.PHONY: all format lint test tests integration_tests help extended_tests
|
||||
|
||||
# Default target executed when no arguments are given to make.
|
||||
all: help
|
||||
|
||||
@@ -61,6 +61,7 @@ class __ModuleName__Loader(BaseLoader):
|
||||
.. code-block:: python
|
||||
|
||||
TODO: Example output
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
# TODO: This method must be implemented to load documents.
|
||||
|
||||
@@ -61,6 +61,7 @@ class __ModuleName__Tool(BaseTool): # type: ignore[override]
|
||||
.. code-block:: python
|
||||
|
||||
# TODO: output of invocation
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
# TODO: Set tool name and description
|
||||
|
||||
@@ -56,7 +56,6 @@ select = [
|
||||
"C4", # flake8-comprehensions
|
||||
"COM", # flake8-commas
|
||||
"D", # pydocstyle
|
||||
"DOC", # pydoclint
|
||||
"E", # pycodestyle error
|
||||
"EM", # flake8-errmsg
|
||||
"F", # pyflakes
|
||||
|
||||
1420
libs/cli/uv.lock
generated
1420
libs/cli/uv.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
||||
.PHONY: all format lint test tests test_watch integration_tests docker_tests help extended_tests
|
||||
.PHONY: all format lint test tests test_watch integration_tests help extended_tests
|
||||
|
||||
# Default target executed when no arguments are given to make.
|
||||
all: help
|
||||
|
||||
@@ -4,7 +4,7 @@ The interfaces for core components like chat models, LLMs, vector stores, retrie
|
||||
and more are defined here. The universal invocation protocol (Runnables) along with
|
||||
a syntax for combining components (LangChain Expression Language) are also defined here.
|
||||
|
||||
No third-party integrations are defined here. The dependencies are kept purposefully
|
||||
**No third-party integrations are defined here.** The dependencies are kept purposefully
|
||||
very lightweight.
|
||||
"""
|
||||
|
||||
|
||||
@@ -70,6 +70,7 @@ def beta(
|
||||
@beta
|
||||
def the_function_to_annotate():
|
||||
pass
|
||||
|
||||
"""
|
||||
|
||||
def beta(
|
||||
|
||||
@@ -136,6 +136,7 @@ def deprecated(
|
||||
@deprecated('1.4.0')
|
||||
def the_function_to_deprecate():
|
||||
pass
|
||||
|
||||
"""
|
||||
_validate_deprecation_params(
|
||||
removal, alternative, alternative_import, pending=pending
|
||||
@@ -549,6 +550,7 @@ def rename_parameter(
|
||||
|
||||
@_api.rename_parameter("3.1", "bad_name", "good_name")
|
||||
def func(good_name): ...
|
||||
|
||||
"""
|
||||
|
||||
def decorator(f: Callable[_P, _R]) -> Callable[_P, _R]:
|
||||
|
||||
@@ -363,6 +363,7 @@ class Context:
|
||||
print(output["result"]) # Output: "hello"
|
||||
print(output["context"]) # Output: "What's your name?"
|
||||
print(output["input"]) # Output: "What's your name?
|
||||
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -53,6 +53,7 @@ class FileCallbackHandler(BaseCallbackHandler):
|
||||
When not used as a context manager, a deprecation warning will be issued
|
||||
on first use. The file will be opened immediately in ``__init__`` and closed
|
||||
in ``__del__`` or when ``close()`` is called explicitly.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
||||
@@ -105,6 +105,7 @@ def trace_as_chain_group(
|
||||
# Use the callback manager for the chain group
|
||||
res = llm.invoke(llm_input, {"callbacks": manager})
|
||||
manager.on_chain_end({"output": res})
|
||||
|
||||
""" # noqa: E501
|
||||
from langchain_core.tracers.context import _get_trace_callbacks
|
||||
|
||||
@@ -186,6 +187,7 @@ async def atrace_as_chain_group(
|
||||
# Use the async callback manager for the chain group
|
||||
res = await llm.ainvoke(llm_input, {"callbacks": manager})
|
||||
await manager.on_chain_end({"output": res})
|
||||
|
||||
""" # noqa: E501
|
||||
from langchain_core.tracers.context import _get_trace_callbacks
|
||||
|
||||
@@ -2575,6 +2577,7 @@ async def adispatch_custom_event(
|
||||
behalf.
|
||||
|
||||
.. versionadded:: 0.2.15
|
||||
|
||||
"""
|
||||
from langchain_core.runnables.config import (
|
||||
ensure_config,
|
||||
@@ -2645,6 +2648,7 @@ def dispatch_custom_event(
|
||||
foo_.invoke({"a": "1"}, {"callbacks": [CustomCallbackManager()]})
|
||||
|
||||
.. versionadded:: 0.2.15
|
||||
|
||||
"""
|
||||
from langchain_core.runnables.config import (
|
||||
ensure_config,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user