Compare commits

..

272 Commits

Author SHA1 Message Date
Bagatur
4063bf144a langchain[patch]: release 0.0.344 (#14095) 2023-11-30 15:57:11 -08:00
Bagatur
efce352d6b core[patch]: release 0.0.8 (#14086) 2023-11-30 15:12:06 -08:00
Harutaka Kawamura
0d08a692a3 langchain[minor]: Migrate mlflow and databricks classes to deployments APIs. (#13699)
## Description

Related to https://github.com/mlflow/mlflow/pull/10420. MLflow AI
gateway will be deprecated and replaced by the `mlflow.deployments`
module. Happy to split this PR if it's too large.

```
pip install git+https://github.com/langchain-ai/langchain.git@refs/pull/13699/merge#subdirectory=libs/langchain
```

## Dependencies

Install mlflow from https://github.com/mlflow/mlflow/pull/10420:

```
pip install git+https://github.com/mlflow/mlflow.git@refs/pull/10420/merge
```

## Testing plan

The following code works fine on local and databricks:

<details><summary>Click</summary>
<p>

```python
"""
Setup
-----
mlflow deployments start-server --config-path examples/gateway/openai/config.yaml
databricks secrets create-scope <scope>
databricks secrets put-secret <scope> openai-api-key --string-value $OPENAI_API_KEY

Run
---
python /path/to/this/file.py secrets/<scope>/openai-api-key
"""
from langchain.chat_models import ChatMlflow, ChatDatabricks
from langchain.embeddings import MlflowEmbeddings, DatabricksEmbeddings
from langchain.llms import Databricks, Mlflow
from langchain.schema.messages import HumanMessage
from langchain.chains.loading import load_chain
from mlflow.deployments import get_deploy_client
import uuid
import sys
import tempfile
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

###############################
# MLflow
###############################
chat = ChatMlflow(
    target_uri="http://127.0.0.1:5000", endpoint="chat", params={"temperature": 0.1}
)
print(chat([HumanMessage(content="hello")]))

embeddings = MlflowEmbeddings(target_uri="http://127.0.0.1:5000", endpoint="embeddings")
print(embeddings.embed_query("hello")[:3])
print(embeddings.embed_documents(["hello", "world"])[0][:3])

llm = Mlflow(
    target_uri="http://127.0.0.1:5000",
    endpoint="completions",
    params={"temperature": 0.1},
)
print(llm("I am"))

llm_chain = LLMChain(
    llm=llm,
    prompt=PromptTemplate(
        input_variables=["adjective"],
        template="Tell me a {adjective} joke",
    ),
)
print(llm_chain.run(adjective="funny"))

# serialization/deserialization
with tempfile.TemporaryDirectory() as tmpdir:
    print(tmpdir)
    path = f"{tmpdir}/llm.yaml"
    llm_chain.save(path)
    loaded_chain = load_chain(path)
    print(loaded_chain("funny"))

###############################
# Databricks
###############################
secret = sys.argv[1]
client = get_deploy_client("databricks")

# External - chat
name = f"chat-{uuid.uuid4()}"
client.create_endpoint(
    name=name,
    config={
        "served_entities": [
            {
                "name": "test",
                "external_model": {
                    "name": "gpt-4",
                    "provider": "openai",
                    "task": "llm/v1/chat",
                    "openai_config": {
                        "openai_api_key": "{{" + secret + "}}",
                    },
                },
            }
        ],
    },
)
try:
    chat = ChatDatabricks(
        target_uri="databricks", endpoint=name, params={"temperature": 0.1}
    )
    print(chat([HumanMessage(content="hello")]))
finally:
    client.delete_endpoint(endpoint=name)

# External - embeddings
name = f"embeddings-{uuid.uuid4()}"
client.create_endpoint(
    name=name,
    config={
        "served_entities": [
            {
                "name": "test",
                "external_model": {
                    "name": "text-embedding-ada-002",
                    "provider": "openai",
                    "task": "llm/v1/embeddings",
                    "openai_config": {
                        "openai_api_key": "{{" + secret + "}}",
                    },
                },
            }
        ],
    },
)
try:
    embeddings = DatabricksEmbeddings(target_uri="databricks", endpoint=name)
    print(embeddings.embed_query("hello")[:3])
    print(embeddings.embed_documents(["hello", "world"])[0][:3])
finally:
    client.delete_endpoint(endpoint=name)

# External - completions
name = f"completions-{uuid.uuid4()}"
client.create_endpoint(
    name=name,
    config={
        "served_entities": [
            {
                "name": "test",
                "external_model": {
                    "name": "gpt-3.5-turbo-instruct",
                    "provider": "openai",
                    "task": "llm/v1/completions",
                    "openai_config": {
                        "openai_api_key": "{{" + secret + "}}",
                    },
                },
            }
        ],
    },
)
try:
    llm = Databricks(
        endpoint_name=name,
        model_kwargs={"temperature": 0.1},
    )
    print(llm("I am"))
finally:
    client.delete_endpoint(endpoint=name)


# Foundation model - chat
chat = ChatDatabricks(
    endpoint="databricks-llama-2-70b-chat", params={"temperature": 0.1}
)
print(chat([HumanMessage(content="hello")]))

# Foundation model - embeddings
embeddings = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
print(embeddings.embed_query("hello")[:3])

# Foundation model - completions
llm = Databricks(
    endpoint_name="databricks-mpt-7b-instruct", model_kwargs={"temperature": 0.1}
)
print(llm("hello"))
llm_chain = LLMChain(
    llm=llm,
    prompt=PromptTemplate(
        input_variables=["adjective"],
        template="Tell me a {adjective} joke",
    ),
)
print(llm_chain.run(adjective="funny"))

# serialization/deserialization
with tempfile.TemporaryDirectory() as tmpdir:
    print(tmpdir)
    path = f"{tmpdir}/llm.yaml"
    llm_chain.save(path)
    loaded_chain = load_chain(path)
    print(loaded_chain("funny"))

```

Output:

```
content='Hello! How can I assist you today?'
[-0.025058426, -0.01938856, -0.027781019]
[-0.025058426, -0.01938856, -0.027781019]
sorry, but I cannot continue the sentence as it is incomplete. Can you please provide more information or context?
Sure, here's a classic one for you:

Why don't scientists trust atoms?

Because they make up everything!
/var/folders/dz/cd_nvlf14g9g__n3ph0d_0pm0000gp/T/tmpx_4no6ad
{'adjective': 'funny', 'text': "Sure, here's a classic one for you:\n\nWhy don't scientists trust atoms?\n\nBecause they make up everything!"}
content='Hello! How can I assist you today?'
[-0.025058426, -0.01938856, -0.027781019]
[-0.025058426, -0.01938856, -0.027781019]
 a 23 year old female and I am currently studying for my master's degree
content="\nHello! It's nice to meet you. Is there something I can help you with or would you like to chat for a bit?"
[0.051055908203125, 0.007221221923828125, 0.003879547119140625]
[0.051055908203125, 0.007221221923828125, 0.003879547119140625]

hello back
 Well, I don't really know many jokes, but I do know this funny story...
/var/folders/dz/cd_nvlf14g9g__n3ph0d_0pm0000gp/T/tmp7_ds72ex
{'adjective': 'funny', 'text': " Well, I don't really know many jokes, but I do know this funny story..."}
```

</p>
</details>

The existing workflow doesn't break:

<details><summary>click</summary>
<p>

```python
import uuid

import mlflow
from mlflow.models import ModelSignature
from mlflow.types.schema import ColSpec, Schema


class MyModel(mlflow.pyfunc.PythonModel):
    def predict(self, context, model_input):
        return str(uuid.uuid4())


with mlflow.start_run():
    mlflow.pyfunc.log_model(
        "model",
        python_model=MyModel(),
        pip_requirements=["mlflow==2.8.1", "cloudpickle<3"],
        signature=ModelSignature(
            inputs=Schema(
                [
                    ColSpec("string", "prompt"),
                    ColSpec("string", "stop"),
                ]
            ),
            outputs=Schema(
                [
                    ColSpec(name=None, type="string"),
                ]
            ),
        ),
        registered_model_name=f"lang-{uuid.uuid4()}",
    )

# Manually create a serving endpoint with the registered model and run
from langchain.llms import Databricks

llm = Databricks(endpoint_name="<name>")
llm("hello")  # 9d0b2491-3d13-487c-bc02-1287f06ecae7
```

</p>
</details> 

## Follow-up tasks

(This PR is too large. I'll file a separate one for follow-up tasks.)

- Update `docs/docs/integrations/providers/mlflow_ai_gateway.mdx` and
`docs/docs/integrations/providers/databricks.md`.

---------

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-30 15:06:58 -08:00
Tyler Hutcherson
dc31714ec5 templates[patch]: Rag redis template dependency update (#13614)
- **Description:** Update RAG Redis template readme and dependencies.
2023-11-30 12:22:13 -08:00
Jeremy Naccache
a14cf87576 core[patch]: Add **kwargs to Langchain's dumps() to allow passing of json.dumps() … (#10628)
…parameters.

In Langchain's `dumps()` function, I've added a `**kwargs` parameter.
This allows users to pass additional parameters to the underlying
`json.dumps()` function, providing greater flexibility and control over
JSON serialization.

Many parameters available in `json.dumps()` can be useful or even
necessary in specific situations. For example, when using an Agent with
return_intermediate_steps set to true, the output is a list of
AgentAction objects. These objects can't be serialized without using
Langchain's `dumps()` function.

The issue arises when using the Agent with a language other than
English, which may contain non-ASCII characters like 'é'. The default
behavior of `json.dumps()` sets ensure_ascii to true, converting
`{"name": "José"}` into `{"name": "Jos\u00e9"}`. This can make the
output hard to read, especially in the case of intermediate steps in
agent logs.

By allowing users to pass additional parameters to `json.dumps()` via
Langchain's dumps(), we can solve this problem. For instance, users can
set `ensure_ascii=False` to maintain the original characters.

This update also enables users to pass other useful `json.dumps()`
parameters like `sort_keys`, providing even more flexibility.

The implementation takes into account edge cases where a user might pass
a "default" parameter, which is already defined by `dumps()`, or an
"indent" parameter, which is also predefined if `pretty=True` is set.

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
2023-11-30 08:52:24 -08:00
Erick Friis
8078caf764 templates[patch]: rag-google-cloud-sdp readme (#14043) 2023-11-30 08:17:51 -08:00
Yong woo Song
f4d520ccb5 Fix .env file path in integration_test README.md (#14028)
<!-- Thank you for contributing to LangChain!



Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->

### Description
Hello, 

The [integration_test
README](https://github.com/langchain-ai/langchain/tree/master/libs/langchain/tests)
was indicating incorrect paths for the `.env.example` and `.env` files.

`tests/.env.example` ->`tests/integration_tests/.env.example`

While it’s a minor error, it could **potentially lead to confusion** for
the document’s readers, so I’ve made the necessary corrections.

Thank you! ☺️

### Related Issue
- https://github.com/langchain-ai/langchain/pull/2806
2023-11-29 22:14:28 -05:00
Rohan Dey
41a4c06a94 Added support for a Pandas DataFrame OutputParser (#13257)
**Description:**

Added support for a Pandas DataFrame OutputParser with format
instructions, along with unit tests and a demo notebook. Namely, we've
added the ability to request data from a DataFrame, have the LLM parse
the request, and then use that request to retrieve a well-formatted
response.

Within LangChain, it seamlessly integrates with language models like
OpenAI's `text-davinci-003`, facilitating streamlined interaction using
the format instructions (just like the other output parsers).

This parser structures its requests as
`<operation/column/row>[<optional_array_params>]`. The instructions
detail permissible operations, valid columns, and array formats,
ensuring clarity and adherence to the required format.

For example:

- When the LLM receives the input: "Retrieve the mean of `num_legs` from
rows 1 to 3."
- The provided format instructions guide the LLM to structure the
request as: "mean:num_legs[1..3]".

The parser processes this formatted request, leveraging the LLM's
understanding to extract the mean of `num_legs` from rows 1 to 3 within
the Pandas DataFrame.

This integration allows users to communicate requests naturally, with
the LLM transforming these instructions into structured commands
understood by the `PandasDataFrameOutputParser`. The format instructions
act as a bridge between natural language queries and precise DataFrame
operations, optimizing communication and data retrieval.

**Issue:**

- https://github.com/langchain-ai/langchain/issues/11532

**Dependencies:**

No additional dependencies :)

**Tag maintainer:**

@baskaryan 

**Twitter handle:**

No need. :)

---------

Co-authored-by: Wasee Alam <waseealam@protonmail.com>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-11-29 22:08:50 -05:00
Masanori Taniguchi
235bdb9fa7 Support Vald secure connection (#13269)
**Description:** 
When using Vald, only insecure grpc connection was supported, so secure
connection is now supported.
In addition, grpc metadata can be added to Vald requests to enable
authentication with a token.

<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->
2023-11-29 22:07:29 -05:00
Nico Puhlmann
54355b651a Update index.mdx (#13285)
grammar correction

<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-11-29 22:06:33 -05:00
sudranga
d1d693b2a7 Fix issue where response_if_no_docs_found is not implemented on async… (#13297)
Response_if_no_docs_found is not implemented in
ConversationalRetrievalChain for async code paths. Implemented it and
added test cases

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-11-29 22:06:13 -05:00
AthulVincent
67c55cb5b0 Implemented MongoDB Atlas Self-Query Retriever (#13321)
# Description 
This PR implements Self-Query Retriever for MongoDB Atlas vector store.

I've implemented the comparators and operators that are supported by
MongoDB Atlas vector store according to the section titled "Atlas Vector
Search Pre-Filter" from
https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/.

Namely:
```
allowed_comparators = [
      Comparator.EQ,
      Comparator.NE,
      Comparator.GT,
      Comparator.GTE,
      Comparator.LT,
      Comparator.LTE,
      Comparator.IN,
      Comparator.NIN,
  ]

"""Subset of allowed logical operators."""
allowed_operators = [
    Operator.AND,
    Operator.OR
]
```
Translations from comparators/operators to MongoDB Atlas filter
operators(you can find the syntax in the "Atlas Vector Search
Pre-Filter" section from the previous link) are done using the following
dictionary:
```
map_dict = {
            Operator.AND: "$and",
            Operator.OR: "$or",
            Comparator.EQ: "$eq",
            Comparator.NE: "$ne",
            Comparator.GTE: "$gte",
            Comparator.LTE: "$lte",
            Comparator.LT: "$lt",
            Comparator.GT: "$gt",
            Comparator.IN: "$in",
            Comparator.NIN: "$nin",
        }
```

In visit_structured_query() the filters are passed as "pre_filter" and
not "filter" as in the MongoDB link above since langchain's
implementation of MongoDB atlas vector
store(libs\langchain\langchain\vectorstores\mongodb_atlas.py) in
_similarity_search_with_score() sets the "filter" key to have the value
of the "pre_filter" argument.
```
params["filter"] = pre_filter
```
Test cases and documentation have also been added.

# Issue
#11616 

# Dependencies
No new dependencies have been added.

# Documentation
I have created the notebook mongodb_atlas_self_query.ipynb outlining the
steps to get the self-query mechanism working.

I worked closely with [@Farhan-Faisal](https://github.com/Farhan-Faisal)
on this PR.

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-29 22:05:06 -05:00
Josef Zoller
c2e3963da4 Merriam-Webster Dictionary Tool (#12044)
# Description

We implemented a simple tool for accessing the Merriam-Webster
Collegiate Dictionary API
(https://dictionaryapi.com/products/api-collegiate-dictionary).

Here's a simple usage example:

```py
from langchain.llms import OpenAI
from langchain.agents import load_tools, initialize_agent, AgentType

llm = OpenAI()
tools = load_tools(["serpapi", "merriam-webster"], llm=llm) # Serp API gives our agent access to Google
agent = initialize_agent(
  tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True
)
agent.run("What is the english word for the german word Himbeere? Define that word.")
```

Sample output:

```
> Entering new AgentExecutor chain...
 I need to find the english word for Himbeere and then get the definition of that word.
Action: Search
Action Input: "English word for Himbeere"
Observation: {'type': 'translation_result'}
Thought: Now I have the english word, I can look up the definition.
Action: MerriamWebster
Action Input: raspberry
Observation: Definitions of 'raspberry':

1. rasp-ber-ry, noun: any of various usually black or red edible berries that are aggregate fruits consisting of numerous small drupes on a fleshy receptacle and that are usually rounder and smaller than the closely related blackberries
2. rasp-ber-ry, noun: a perennial plant (genus Rubus) of the rose family that bears raspberries
3. rasp-ber-ry, noun: a sound of contempt made by protruding the tongue between the lips and expelling air forcibly to produce a vibration; broadly : an expression of disapproval or contempt
4. black raspberry, noun: a raspberry (Rubus occidentalis) of eastern North America that has a purplish-black fruit and is the source of several cultivated varieties —called also blackcap

Thought: I now know the final answer.
Final Answer: Raspberry is an english word for Himbeere and it is defined as any of various usually black or red edible berries that are aggregate fruits consisting of numerous small drupes on a fleshy receptacle and that are usually rounder and smaller than the closely related blackberries.

> Finished chain.
```

# Issue

This closes #12039.

# Dependencies

We added no extra dependencies.

<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->

---------

Co-authored-by: Lara <63805048+larkgz@users.noreply.github.com>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-11-29 20:28:29 -05:00
Mohammad Mohtashim
f3dd4a10cf DROP BOX Loader Documentation Update (#14047)
- **Description:** Update the document for drop box loader + made the
messages more verbose when loading pdf file since people were getting
confused
  - **Issue:** #13952
  - **Tag maintainer:** @baskaryan, @eyurtsev, @hwchase17,

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
2023-11-29 17:25:35 -08:00
Cheng (William) Huang
a00db4b28f Add multi-input Reddit search tool (#13893)
- **Description:** Added a tool called RedditSearchRun and an
accompanying API wrapper, which searches Reddit for posts with support
for time filtering, post sorting, query string and subreddit filtering.
  - **Issue:** #13891 
  - **Dependencies:** `praw` module is used to search Reddit
- **Tag maintainer:** @baskaryan , and any of the other maintainers if
needed
  - **Twitter handle:** None.

  Hello,

This is our first PR and we hope that our changes will be helpful to the
community. We have run `make format`, `make lint` and `make test`
locally before submitting the PR. To our knowledge, our changes do not
introduce any new errors.

Our PR integrates the `praw` package which is already used by
RedditPostsLoader in LangChain. Nonetheless, we have added integration
tests and edited unit tests to test our changes. An example notebook is
also provided. These changes were put together by me, @Anika2000,
@CharlesXu123, and @Jeremy-Cheng-stack

Thank you in advance to the maintainers for their time.

---------

Co-authored-by: What-Is-A-Username <49571870+What-Is-A-Username@users.noreply.github.com>
Co-authored-by: Anika2000 <anika.sultana@mail.utoronto.ca>
Co-authored-by: Jeremy Cheng <81793294+Jeremy-Cheng-stack@users.noreply.github.com>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-11-29 20:16:40 -05:00
Jawad Arshad
00a6e8962c langchain[minor]: Add serpapi tools (#13934)
- **Description:** Added some of the more endpoints supported by serpapi
that are not suported on langchain at the moment, like google trends,
google finance, google jobs, and google lens
- **Issue:** [Add support for many of the querying endpoints with
serpapi #11811](https://github.com/langchain-ai/langchain/issues/11811)

---------

Co-authored-by: zushenglu <58179949+zushenglu@users.noreply.github.com>
Co-authored-by: Erick Friis <erick@langchain.dev>
Co-authored-by: Ian Xu <ian.xu@mail.utoronto.ca>
Co-authored-by: zushenglu <zushenglu1809@gmail.com>
Co-authored-by: KevinT928 <96837880+KevinT928@users.noreply.github.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-29 14:02:57 -08:00
h3l
dbaeb163aa langchain[minor]: add volcengine endpoint as LLM (#13942)
- **Description:** Volc Engine MaaS serves as an enterprise-grade,
large-model service platform designed for developers. You can visit its
homepage at https://www.volcengine.com/docs/82379/1099455 for details.
This change will facilitate developers to integrate quickly with the
platform.
  - **Issue:** None
  - **Dependencies:** volcengine
  - **Tag maintainer:** @baskaryan 
  - **Twitter handle:** @he1v3tica

---------

Co-authored-by: lvzhong <lvzhong@bytedance.com>
2023-11-29 13:16:42 -08:00
Mohammad Ahmad
1600ebe6c7 langchain[patch]: Mask API key for ForeFrontAI LLM (#14013)
- **Description:** Mask API key for ForeFrontAI LLM and associated unit
tests
  - **Issue:** https://github.com/langchain-ai/langchain/issues/12165
  - **Dependencies:** N/A
  - **Tag maintainer:** @eyurtsev 
  - **Twitter handle:** `__mmahmad__`

I made the API key non-optional since linting required adding validation
for None, but the key is required per documentation:
https://python.langchain.com/docs/integrations/llms/forefrontai
2023-11-29 13:12:19 -08:00
yoch
a0e859df51 langchain[patch]: fix cohere reranker init #12899 (#14029)
- **Description:** use post field validation for `CohereRerank`
  - **Issue:** #12899 and #13058
  - **Dependencies:** 
  - **Tag maintainer:** @baskaryan

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-29 12:57:06 -08:00
123-fake-st
9bd6e9df36 update pdf document loaders' metadata source to url for online pdf (#13274)
- **Description:** Update 5 pdf document loaders in
`langchain.document_loaders.pdf`, to store a url in the metadata
(instead of a temporary, local file path) if the user provides a web
path to a pdf: `PyPDFium2Loader`, `PDFMinerLoader`,
`PDFMinerPDFasHTMLLoader`, `PyMuPDFLoader`, and `PDFPlumberLoader` were
updated.
- The updates follow the approach used to update `PyPDFLoader` for the
same behavior in #12092
- The `PyMuPDFLoader` changes required additional work in updating
`langchain.document_loaders.parsers.pdf.PyMuPDFParser` to be able to
process either an `io.BufferedReader` (from local pdf) or `io.BytesIO`
(from online pdf)
- The `PDFMinerPDFasHTMLLoader` change used a simpler approach since the
metadata is assigned by the loader and not the parser
  - **Issue:** Fixes #7034
  - **Dependencies:** None


```python
# PyPDFium2Loader example:
# old behavior
>>> from langchain.document_loaders import PyPDFium2Loader
>>> loader = PyPDFium2Loader('https://arxiv.org/pdf/1706.03762.pdf')
>>> docs = loader.load()
>>> docs[0].metadata
{'source': '/var/folders/7z/d5dt407n673drh1f5cm8spj40000gn/T/tmpm5oqa92f/tmp.pdf', 'page': 0}

# new behavior
>>> from langchain.document_loaders import PyPDFium2Loader
>>> loader = PyPDFium2Loader('https://arxiv.org/pdf/1706.03762.pdf')
>>> docs = loader.load()
>>> docs[0].metadata
{'source': 'https://arxiv.org/pdf/1706.03762.pdf', 'page': 0}
```
2023-11-29 15:07:46 -05:00
Toshish Jawale
6f64cb5078 Remove deprecated param and flexibility for prompt (#13310)
- **Description:** Updated to remove deprecated parameter penalty_alpha,
and use string variation of prompt rather than json object for better
flexibility. - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** N/A
  - **Tag maintainer:** @eyurtsev
  - **Twitter handle:** @symbldotai

---------

Co-authored-by: toshishjawale <toshish@symbl.ai>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-11-29 14:48:25 -05:00
Tomaz Bratanic
3eb391561b langchain[minor]: Reduce the number of tokens required to describe a Cypher/Neo4j schema (#13851)
Instead of using JSON-like syntax to describe node and relationship
properties we changed to a shorter and more concise schema description

Old:

```
        Node properties are the following:
        [{'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Movie'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Actor'}]
        Relationship properties are the following:
        []
        The relationships are the following:
        ['(:Actor)-[:ACTED_IN]->(:Movie)']
```

New:

```
Node properties are the following:
Movie {name: STRING},Actor {name: STRING}
Relationship properties are the following:

The relationships are the following:
(:Actor)-[:ACTED_IN]->(:Movie)
```
2023-11-29 11:13:12 -08:00
Sauhaard
7ec4dbeb80 langchain[minor]: Add StackExchange API integration (#14002)
Implements
[#12115](https://github.com/langchain-ai/langchain/issues/12115)

Who can review?
@baskaryan , @eyurtsev , @hwchase17 

Integrated Stack Exchange API into Langchain, enabling access to diverse
communities within the platform. This addition enhances Langchain's
capabilities by allowing users to query Stack Exchange for specialized
information and engage in discussions. The integration provides seamless
interaction with Stack Exchange content, offering content from varied
knowledge repositories.

A notebook example and test cases were included to demonstrate the
functionality and reliability of this integration.

- Add StackExchange as a tool.
- Add unit test for the StackExchange wrapper and tool.
- Add documentation for the StackExchange wrapper and tool.

If you have time, could you please review the code and provide any
feedback as necessary! My team is welcome to any suggestions.

---------

Co-authored-by: Yuval Kamani <yuvalkamani@gmail.com>
Co-authored-by: Aryan Thakur <aryanthakur@Aryans-MacBook-Pro.local>
Co-authored-by: Manas1818 <79381912+manas1818@users.noreply.github.com>
Co-authored-by: aryan-thakur <61063777+aryan-thakur@users.noreply.github.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-29 10:32:07 -08:00
Bagatur
d4405bc94e langchain[patch]: Release 0.0.343 (#14037) 2023-11-29 10:31:03 -08:00
Erick Friis
3c29b0ded5 templates[patch]: template pyproject updates (#14035) 2023-11-29 10:21:18 -08:00
Yves Zumbühl
9c0ad0cebb langchain[patch]: Improve HyDe with custom prompts and ability to supply the run_manager (#14016)
- **Description:** The class allows to only select between a few
predefined prompts from the paper. That is not ideal, since other use
cases might need a custom prompt. The changes made allow for this. To be
able to monitor those, I also added functionality to supply a custom
run_manager.
  - **Issue:** no issue, but a new feature,
  - **Dependencies:** none,
  - **Tag maintainer:** @hwchase17,
  - **Twitter handle:** @yvesloy

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-29 09:40:53 -08:00
Anton Romanov
4964278ce4 docs[patch]: Update typo in map.ipynb (#14030)
fix the typo in docs, using "with" instead of "when"
2023-11-29 09:14:29 -08:00
Chad Norvell
1c4bfb8c5f langchain[patch]: Mathpix PDF loader supports arbitrary extra params (#13950)
- **Description:** Support providing whatever extra parameters you want
to the Mathpix PDF loader API request.
  - **Issue:** #12773
  - **Dependencies:** None

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-29 02:12:32 -08:00
Unai Garay Maestre
9e2ae866c4 langchain[patch]: Adds progress bar to GooglePalmEmbeddings (#13812)
- **Description:** Adds a tqdm progress bar to GooglePalmEmbeddings when
embedding a list.
  - **Issue:** #13637
  - **Dependencies:** TQDM as a main dependency (instead of extra)


Signed-off-by: ugm2 <unaigaraymaestre@gmail.com>

---------

Signed-off-by: ugm2 <unaigaraymaestre@gmail.com>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-11-29 01:58:53 -08:00
Richie
1cd9d5f332 docs[patch]: fix typo langchain version for mongodb integration (#14006)
- **Description:** update minimal supported langchain version for
[mongodb atlast integration
webpage](https://python.langchain.com/docs/integrations/vectorstores/mongodb_atlas)
- **Issue:** none
- **Dependencies:** none

-----

Just fixing a typo. 
In [mongodb atlas vectorstore integration
page](https://python.langchain.com/docs/integrations/vectorstores/mongodb_atlas),
`langchain` support for `$vectorSearch MQL stage` should be `0.0.305`
rather than `0.0.35`
2023-11-28 21:20:30 -08:00
David Norman
a578076aea Mask api key for Together LLM (#13981)
- **Description:** Add unit tests and mask api key for Together LLM
- **Issue:** the issue
https://github.com/langchain-ai/langchain/issues/12165 ,
  - **Dependencies:** N/A
  - **Tag maintainer:** ?,
  - **Twitter handle:** N/A

---------

Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
2023-11-28 22:57:40 -05:00
Pavel Zwerschke
5f5c701f2c docs: Install langsmith from conda-forge (#13335)
<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->

langsmith is available on conda-forge as well and also a dependency of
the package so it gets installed either way by conda
306ed13308/recipe/meta.yaml (L43)
2023-11-28 22:44:02 -05:00
Piotr Ząbek
d0b818b634 DOCS: added missing imports (#13736) (#13737)
- **Description:** Fixed missing imports in docs 
- **Issue:**
[#13736](https://github.com/langchain-ai/langchain/issues/13736)
- **Dependencies:** N/A
2023-11-28 22:42:43 -05:00
Johnny
6463d2d0bd small fix matching engine AttributeError - object has no attribute (#13763)
This PR is fixing an attributeError: object endpoint has no attribute
"_public_match_client" when using gcp matching engine with private VPC
network.

@baskaryan, @eyurtsev, @hwchase17.

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-11-28 22:42:29 -05:00
Amyh102
750485eaa8 Add object parsing functionality (#13864)
* **Description:** Parses huggingface dataset Sequence objects into
strings for Document loading.
* **Issue:** Fixes #10674 
* **Tag maintainter:** @baskaryan @eyurtsev

---------

Co-authored-by: Amy Han <amyhan@Amys-Air.lan>
Co-authored-by: Amy Han <amyhan@Amys-MacBook-Air.local>
2023-11-28 22:33:16 -05:00
ggeutzzang
981f78f920 Fix: (issue #13825) Getting an error with DallEAPIWrapper (#13874)
- **Description:** As of OpenAI's Python package 1.0, the existing
DallEAPIWrapper does not work correctly, so the example in the LangChain
Documentation link below does not work either.

https://python.langchain.com/docs/integrations/tools/dalle_image_generator
Also, since OpenAI only supports DALL-E version 2 or version 3, I
modified the DallEAPIWrapper to support it.

  - **Issue:** #13825 

  - **Twitter handle:** ggeutzzang
2023-11-28 22:31:25 -05:00
Kunal
74045bf5c0 max length attribute for spacy splitter for large docs (#13875)
For large size documents spacy splitter doesn't work it throws an error
as shown in below screenshot.
Reason its default max_length is 1000000 and there is no option to
increase it. So i added it in this PR.


![image](https://github.com/langchain-ai/langchain/assets/73680423/613625c3-0e21-4834-9aad-2a73cf56eecc)

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-28 22:30:26 -05:00
Yusuf Khan
0bc7c1b5b4 Add Outline provider doc (#13938)
- **Description:** Added a provider doc to `docs/integrations/providers`
for the new Outline integration in #13889
  - **Tag maintainer:** @baskaryan
2023-11-28 22:29:30 -05:00
colton
643d28847d [docs] fix reduce prompt in summarization example (#13726)
<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->

Small fix to _summarization_ example, `reduce_template` should use
`{docs}` variable.

Bug likely introduced as following code suggests using
`hub.pull("rlm/map-prompt")` instead of defined prompt.
2023-11-28 22:22:42 -05:00
Wang Wei
fe9341a29c feat: Add ERNIE-Bot-8K model support for ErnieBotChat. (#13716)
- **Description:** According to the document
https://cloud.baidu.com/doc/WENXINWORKSHOP/s/6lp69is2a, add ERNIE-Bot-8K
model support for ErnieBotChat.
- **Dependencies:** Before using the ERNIE-Bot-8K, you should have the
model's access authority.
2023-11-28 22:22:23 -05:00
Leonid Ganeline
5c28bb63dd docs microsoft page updates (#14000)
The Excel, PowerPoint and SharePoint document loaders were missed in the
`Microsoft` platform page.
- added these references
2023-11-28 22:20:21 -05:00
Leonid Ganeline
15b32cfcd4 docs OpenAI platform page update (#14001)
Missed the OpenAI adapter reference in the OpenAI platform page
- Added this reference
2023-11-28 22:08:21 -05:00
Burak Ömür
0e462b72ef Update openai/create_llm_result function to consider kwargs (#13815)
Replace this entire comment with:
- **Description:** updates `create_llm_result` function within
`openai.py` to consider latest `params`,
  - **Issue:** #8928
  - **Dependencies:** -,
  - **Tag maintainer:** -
  - **Twitter handle:** [burkomr](https://twitter.com/burkomr)

<!-- If no one reviews your PR within a few days, please @-mention one
of @baskaryan, @eyurtsev, @hwchase17. -->

---------

Co-authored-by: Burak Ömür <burakomur@retorio.com>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-11-28 22:02:38 -05:00
chyroc
f97ab84c6b Merge pull request #13907
* feat: mask api_key for jina
2023-11-28 21:24:50 -05:00
nhywieza
9b86fb3fcb secretStr for baichuan chat model api key (#13946)
Merge pull request #13946
* secretStr for baichuan chat model api key
2023-11-28 21:20:23 -05:00
卢靖轩
aff1dba252 Merge pull request #13945
* feat: mask api key for nlpcloud
2023-11-28 21:16:36 -05:00
Leonid Kuligin
85bb3a418c Switched VertexAI models from preview (#13657)
Replace this entire comment with:
- **Description:** VertexAI models are now GA, moved away from using
preview ones from the SDK
  - **Issue:** #13606

---------

Co-authored-by: Nuno Campos <nuno@boringbits.io>
2023-11-28 20:38:04 -05:00
WaseemH
a47f1da884 docs[patch]: RAG Cookbook example fix (#13914)
### Description:
Hey 👋🏽  this is a small docs example fix. Hoping it helps future developers  who are working with Langchain.

### Problem:
Take a look at the original example code. You were not able to get the `dialogue_turn[0]` while it was a tuple.

Original code:
```python
def _format_chat_history(chat_history: List[Tuple]) -> str:
    buffer = ""
    for dialogue_turn in chat_history:
        human = "Human: " + dialogue_turn[0]
        ai = "Assistant: " + dialogue_turn[1]
        buffer += "\n" + "\n".join([human, ai])
    return buffer
```
In the original code you were getting this error:
```bash
    human = "Human: " + dialogue_turn[0].content
                        ~~~~~~~~~~~~~^^^
TypeError: 'HumanMessage' object is not subscriptable
```
### Solution:
The fix is to just for loop over the chat history and look to see if its a human or ai message and add it to the buffer.
2023-11-28 17:37:03 -08:00
Erick Friis
5eca1bd93f Library Licenses (#13300)
Same change as #8403 but in other libs

also updates (c) LangChain Inc. instead of @hwchase17
2023-11-28 17:34:27 -08:00
Bagatur
14799b139a infra[patch]: add base deps and fix docs lint (#13998) 2023-11-28 17:27:37 -08:00
Théo LEBRUN
926d4cfda7 Set default region from boto3 session for Bedrock (#13694)
- **Description:** Set default region from boto3 session for Bedrock 
- **Issue:** #13683
2023-11-28 20:26:54 -05:00
Snow
1a33e5b500 Repair Wikipedia document loader load_max_docs and improve test coverage. (#13769)
**Description:** 

Repair Wikipedia document loader `load_max_docs` and improve test
coverage.

**Issue:** 

The Wikipedia document loader was not respecting the `load_max_docs`
paramater (not reported) and would always return a maximum of 10
documents. This is because the API wrapper (in `utilities/wikipedia.py`)
wasn't passing `top_k_results` to the underlying [Wikipedia
library](https://wikipedia.readthedocs.io/en/latest/code.html#module-wikipedia).
By default this library returns 10 results.

The default number of results for the document loader has been reduced
from 100 to 25. This is because loading 100 results takes a very long
time and is an inconvenient default. It should possibly be 10.

In addition, the documentation for the loader reported that there was a
hard limit (300) on the number of documents returned. In actuality 300
is the maximum Wikipedia query character length set by the API wrapper.

Tests have been added for the document loader (previously missing) and
to test the correct numbers of documents are being returned by each
class, both by default, and when overridden. Also repaired is the
`assert_docs` test which has been updated to correctly test for the
default metadata (which includes `source` in recent releases).

**Dependencies:** 
nil

**Tag maintainer:**
@leo-gan

**Twitter handle:**
@queenvictoria
2023-11-28 20:26:40 -05:00
Bob Lin
04c4878306 Remove python_repl from _BASE_TOOLS (#13962)
### **Description:**

Previously `python_repl` was a built-in tool, but now it has been moved
to `langchain_experimental`.

When I use `load_tools` I get an error:

```python
In [1]: from langchain.agents import load_tools

In [2]: load_tools(["python_repl"])
---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
Cell In[2], line 1
----> 1 load_tools(["python_repl"])

File ~/workspace/langchain/libs/langchain/langchain/agents/load_tools.py:530, in load_tools(tool_names, llm, callbacks, **kwargs)
    528     tool_names.extend(requests_method_tools)
    529 elif name in _BASE_TOOLS:
--> 530     tools.append(_BASE_TOOLS[name]())
    531 elif name in _LLM_TOOLS:
    532     if llm is None:

File ~/workspace/langchain/libs/langchain/langchain/agents/load_tools.py:84, in _get_python_repl()
     83 def _get_python_repl() -> BaseTool:
---> 84     raise ImportError(
     85         "This tool has been moved to langchain experiment. "
     86         "This tool has access to a python REPL. "
     87         "For best practices make sure to sandbox this tool. "
     88         "Read https://github.com/langchain-ai/langchain/blob/master/SECURITY.md "
     89         "To keep using this code as is, install langchain experimental and "
     90         "update relevant imports replacing 'langchain' with 'langchain_experimental'"
     91     )

ImportError: This tool has been moved to langchain experiment. This tool has access to a python REPL. For best practices make sure to sandbox this tool. Read https://github.com/langchain-ai/langchain/blob/master/SECURITY.md To keep using this code as is, install langchain experimental and update relevant imports replacing 'langchain' with 'langchain_experimental'
```

In this case, it will be very confusing. I think it is no longer a
built-in tool now, so it can be removed from `_BASE_TOOLS`

### **Issue:** 

https://github.com/langchain-ai/langchain/issues/13858,
https://github.com/langchain-ai/langchain/issues/13859,
https://github.com/langchain-ai/langchain/issues/13856
### **Twitter handle:** 

[lin_bob57617](https://twitter.com/lin_bob57617)
2023-11-28 20:13:54 -05:00
Leonid Ganeline
52eee458bb renamed google_vertex_ai_vector_search notebook (#13484)
The `integrations/vectorstores/matchingengine.ipynb` example has the
"Google Vertex AI Vector Search" title. This place this Title in the
wrong order in the ToC (it is sorted by the file name).
- Renamed `integrations/vectorstores/matchingengine.ipynb` into
`integrations/vectorstores/google_vertex_ai_vector_search.ipynb`.
- Updated a correspondent comment in docstring
- Rerouted old URL to a new URL

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
2023-11-28 16:58:29 -08:00
Leonid Ganeline
f5326cfb4e docs[patch]: link to LangSmith docs (#13740)
It happens that there is no link to the LangSmith Docs from the LangChain Docs.
Added this link
2023-11-28 16:44:45 -08:00
Leonid Ganeline
bf5787f58b experimental[patch]: fixed namespace bug (#13585)
It was :
`from langchain.schema.prompts import BasePromptTemplate`
but because of the breaking change in the ns, it is now
`from langchain.schema.prompt_template import BasePromptTemplate`

This bug prevents building the API Reference for the langchain_experimental
2023-11-28 16:40:27 -08:00
Leonid Ganeline
1ab8a14742 docs[patch]: top menu (#13748)
Addressed this issue with the top menu: It allocates too much space. If the screen is small, then the top menu items are split into two lines and look unreadable.
Another issue is with several top menu items: "Chat our docs" and "Also by LangChain". They are compound of several words which also hurts readability. The top menu items should be 1-word size.
Updates:
- "Chat our docs" -> "Chat" (the meaning is clean after clicking/opening the item)
- "Also by LangChain" -> "🦜🔗"
- "🦜🔗" moved before "Chat" item. This new item is partially copied from the first left item, the "🦜🔗 LangChain". This design (with two 🦜🔗 elements, visually splits the top menu into two parts. The first item in each part holds the 🦜🔗 symbols and, when we click the second 🦜🔗 item, it opens the drop-down menu. So, we've got two visually similar parts, which visually split the top menu on the right side: the LangChain Docs (and Doc-related items) and the lift side: other LangChain.ai (company) products/docs.
2023-11-28 16:35:38 -08:00
Bob Lin
41b3968d39 docs[patch]: Update CONTRIBUTING.md doc (#13965)
- **Description:** The new demo notebook should be placed in
[docs/docs/modules](https://github.com/langchain-ai/langchain/tree/master/docs/docs/modules)
  - **Twitter handle:**  lin_bob57617

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-28 16:32:25 -08:00
Taqi Jaffri
144710ad9a langchain[minor]: Updated DocugamiLoader, includes breaking changes (#13265)
There are the following main changes in this PR:

1. Rewrite of the DocugamiLoader to not do any XML parsing of the DGML
format internally, and instead use the `dgml-utils` library we are
separately working on. This is a very lightweight dependency.
2. Added MMR search type as an option to multi-vector retriever, similar
to other retrievers. MMR is especially useful when using Docugami for
RAG since we deal with large sets of documents within which a few might
be duplicates and straight similarity based search doesn't give great
results in many cases.

We are @docugami on twitter, and I am @tjaffri

---------

Co-authored-by: Taqi Jaffri <tjaffri@docugami.com>
2023-11-28 15:56:22 -08:00
Bagatur
a20e8f8bb0 experimental[patch]: release 0.0.43 (#13570) 2023-11-28 15:38:09 -08:00
juan-calvo-datatonic
6137894008 templates[minor]: Add rag google sensitive data protection template (#13921)
This is a template demonstrating how to utilize Google Sensitive Data
Protection in conjunction with ChatVertexAI(). Tagging you @efriis as
you reviewed my last template. :) Thanks!

Proof of successful execution: 

![image](https://github.com/langchain-ai/langchain/assets/82172964/e4d678aa-85c8-482b-b09d-81fe7e912dd4)

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
2023-11-28 15:15:58 -08:00
Erick Friis
8b9dc5e6d3 langchain[patch]: contributing test guide update (#13993) 2023-11-28 14:38:11 -08:00
Bagatur
95a472a85f docs[patch]: install local core (#13990) 2023-11-28 14:36:22 -08:00
Bagatur
d8fe987ef5 langchain[patch]: release 0.0.342 (#13992) 2023-11-28 14:34:57 -08:00
Bagatur
61ec71064a docs[patch]: update stack diagram (#13902) 2023-11-28 14:19:13 -08:00
david qiu
9fb6805be4 langchain[minor]: Add retriever for Knowledge Bases for Amazon Bedrock (#13980)
- **Description:** Adds a retriever implementation for [Knowledge Bases
for Amazon Bedrock](https://aws.amazon.com/bedrock/knowledge-bases/), a
new service announced at AWS re:Invent, shortly before this PR was
opened. This depends on the `bedrock-agent-runtime` service, which will
be included in a future version of `boto3` and of `botocore`. We will
open a follow-up PR documenting the minimum required versions of `boto3`
and `botocore` after that information is available.
  - **Issue:** N/A
  - **Dependencies:** `boto3>=1.33.2, botocore>=1.33.2`
  - **Tag maintainer:** @baskaryan
  - **Twitter handles:** `@pjain7` `@dead_letter_q`

This PR includes a documentation notebook under
`docs/docs/integrations/retrievers`, which I (@dlqqq) have verified
independently.

EDIT: `bedrock-agent-runtime` service is now included in
`boto3>=1.33.2`:
5cf793f493

---------

Co-authored-by: Piyush Jain <piyushjain@duck.com>
Co-authored-by: Erick Friis <erick@langchain.dev>
Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-28 14:10:23 -08:00
Bagatur
1aed2d1f08 core[patch]: release 0.0.7 (#13989) 2023-11-28 14:05:01 -08:00
David Duong
eb67f07e32 Track RunnableAssign as a separate run trace (#13972)
Addressing incorrect order being sent to callbacks / tracers, due to the
nature of threading

---------

Co-authored-by: Nuno Campos <nuno@boringbits.io>
2023-11-28 22:02:31 +00:00
Nuno Campos
0f255bb6c4 In Runnable.stream_log build up final_output from adding output chunks (#12781)
Add arg to omit streamed_output list, in cases where final_output is
enough this saves bandwidth

<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->
2023-11-28 21:50:41 +00:00
Nuno Campos
970fe23feb Fixes for opengpts release (#13960) 2023-11-28 21:49:43 +00:00
David Duong
947daaf833 Exclude Bedrock client and credentials_profile_name fields from serialisation (#13603) 2023-11-28 16:34:46 -05:00
Bagatur
48fbc5513d infra[patch], langchain[patch]: fix test deps and upper bound langchain dep on core(#13984) 2023-11-28 13:26:15 -08:00
Stefano Lottini
1fd724293b Astra DB vector store, move constructor docstring to class docstring (#13784)
This PR rearranges the docstring for the `AstraDB` vector store class so
as to have all useful information in the _class_ docstring for ease of
reading.

(incidentally, due to an oversight, the docstring that was in the
constructor ended up buried below some lines of code, thereby
disappearing altogether from accessibility. Apologies.)
2023-11-28 16:25:44 -05:00
Johannes Foulds
fc40bd4cdb AnthropicFunctions function_call compatibility (#13901)
- **Description:** Updates to `AnthropicFunctions` to be compatible with
the OpenAI `function_call` functionality.
- **Issue:** The functionality to indicate `auto`, `none` and a forced
function_call was not completely implemented in the existing code.
  - **Dependencies:** None
- **Tag maintainer:** @baskaryan , and any of the other maintainers if
needed.
  - **Twitter handle:** None

I have specifically tested this functionality via AWS Bedrock with the
Claude-2 and Claude-Instant models.
2023-11-28 16:22:55 -05:00
Varun
14cc907d35 Update the stable docs link (#13798)
- **Description:** Point to the stable version of documentation, 
  - **Twitter handle:** varunzxzx
2023-11-28 21:11:16 +00:00
mengjincn
05ea4fd37d fix merge None value and non None value error (#13703)
<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->
2023-11-28 15:49:56 -05:00
Amélie
d2cad53ec0 Fix broken link on Meilisearch vector-store documentation (#13604)
- **Description:** dead link replacement 
  - **Issue:** no open issue

**Note:**
Hi langchain team,
Sorry to open a PR for this concern but we realized that one of the
links present in the documentation booklet was broken 😄
2023-11-28 15:49:32 -05:00
Ali Orozgani
32d794f5a3 iMessage loader: implement message content extraction from attributed… (#13634)
- **Description:** We are adding functionality to extract message
content from the `attributedBody` field of the database, in case the
content is not in the `text` field.
  - **Issue:** Closes #13326 and #10680 
  - **Dependencies:** None.
  - **Tag maintainer:** @eyurtsev, @hwchase17

---------

Co-authored-by: onotate <johnp.pham@mail.utoronto.ca>
2023-11-28 15:45:43 -05:00
William FH
e5256bcb69 [Evals] Add Project Tags (#13982)
Add them to project extra
2023-11-28 11:38:59 -08:00
Rihards Gravis
9e017ff6ba docs[patch]: Reduce largest static image file size (#13508)
- **Description:** Reduce image asset file size used in documentation by
running them via lossless image optimization
([tinypng](https://www.npmjs.com/package/tinypng-cli) was used in this
case). Images wider than 1916px (the maximum width of an image displayed
in documentation) where downsized.
- **Issue:** No issue is created for this, but the large image file
assets caused slow documentation load times
  - **Dependencies:** No dependencies affected
2023-11-28 13:00:53 -05:00
Nuno Campos
e0bcc98436 infra[patch]: Use langchain core in-tree as a dev dependency (#13957)
Using the published version means master is broken for contributors
whenever we make changes in one lib that depend on the other.
2023-11-28 09:23:43 -08:00
unifyh
2703a1b061 Fix MarkdownHeaderTextSplitter not recognizing tilde-fenced code blocks (#13511)
- **Description:** Previously `MarkdownHeaderTextSplitter` did not
consider tilde-fenced code blocks
(https://spec.commonmark.org/0.30/#fenced-code-blocks). This PR fixes
that.
   ````md
   # Bug caused by previous implementation:
   ~~~py
   foo()
   # This is a comment that would be considered header
   bar()
   ~~~
   ````
 - **Tag maintainer:** @baskaryan
2023-11-28 11:52:38 -05:00
Leonid Ganeline
7929b26017 office365 toolkit bug fixes (#13618)
Several bug fixes:
- emails: instead of `bcc` the `cc` is used.
- errors in the truncation descriptions
- no truncation of the `message_search`
Several updates:
- generalized UTC format 
- truncation limit can be changed now in _call()
2023-11-28 11:49:24 -05:00
William FH
60309341bd Eval Error Key (#13974) 2023-11-28 08:38:30 -08:00
Erick Friis
f9bef600f1 RELEASE: core 0.0.7 (#13973) 2023-11-28 10:28:28 -05:00
Nicolas Bondoux
e17edc4d0b RunnableLambda: create afunc instance from func when not provided (#13408)
Fixes #13407.

This workaround consists in letting the RunnableLambda create its
self.afunc from its self.func when self.afunc is not provided; the
change has no dependency.

<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
Co-authored-by: Nuno Campos <nuno@langchain.dev>
2023-11-28 11:18:26 +00:00
Nuno Campos
391f200eaa Implement stream() and astream() for agents (#12783)
```
---- chunk 1
{'actions': [AgentActionMessageLog(tool='Search', tool_input="Leo DiCaprio's current girlfriend", log="\nInvoking: `Search` with `Leo DiCaprio's current girlfriend`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Search', 'arguments': '{\n  "__arg1": "Leo DiCaprio\'s current girlfriend"\n}'}})])],
 'messages': [AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Search', 'arguments': '{\n  "__arg1": "Leo DiCaprio\'s current girlfriend"\n}'}})]}
---- chunk 2
{'messages': [FunctionMessage(content="According to Us, the 48-year-old actor is now “exclusively” dating Italian model Vittoria Ceretti. A source told Us that DiCaprio is “completely smitten” with Ceretti, and their relationship is “going so well that Leo's actually being exclusive.”", name='Search')],
 'steps': [AgentStep(action=AgentActionMessageLog(tool='Search', tool_input="Leo DiCaprio's current girlfriend", log="\nInvoking: `Search` with `Leo DiCaprio's current girlfriend`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Search', 'arguments': '{\n  "__arg1": "Leo DiCaprio\'s current girlfriend"\n}'}})]), observation="According to Us, the 48-year-old actor is now “exclusively” dating Italian model Vittoria Ceretti. A source told Us that DiCaprio is “completely smitten” with Ceretti, and their relationship is “going so well that Leo's actually being exclusive.”")]}
---- chunk 3
{'actions': [AgentActionMessageLog(tool='Search', tool_input='Vittoria Ceretti age', log='\nInvoking: `Search` with `Vittoria Ceretti age`\n\n\n', message_log=[AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Search', 'arguments': '{\n  "__arg1": "Vittoria Ceretti age"\n}'}})])],
 'messages': [AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Search', 'arguments': '{\n  "__arg1": "Vittoria Ceretti age"\n}'}})]}
---- chunk 4
{'messages': [FunctionMessage(content='25 years', name='Search')],
 'steps': [AgentStep(action=AgentActionMessageLog(tool='Search', tool_input='Vittoria Ceretti age', log='\nInvoking: `Search` with `Vittoria Ceretti age`\n\n\n', message_log=[AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Search', 'arguments': '{\n  "__arg1": "Vittoria Ceretti age"\n}'}})]), observation='25 years')]}
---- chunk 5
{'actions': [AgentActionMessageLog(tool='Calculator', tool_input='25^0.43', log='\nInvoking: `Calculator` with `25^0.43`\n\n\n', message_log=[AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Calculator', 'arguments': '{\n  "__arg1": "25^0.43"\n}'}})])],
 'messages': [AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Calculator', 'arguments': '{\n  "__arg1": "25^0.43"\n}'}})]}
---- chunk 6
{'messages': [FunctionMessage(content='Answer: 3.991298452658078', name='Calculator')],
 'steps': [AgentStep(action=AgentActionMessageLog(tool='Calculator', tool_input='25^0.43', log='\nInvoking: `Calculator` with `25^0.43`\n\n\n', message_log=[AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Calculator', 'arguments': '{\n  "__arg1": "25^0.43"\n}'}})]), observation='Answer: 3.991298452658078')]}
---- chunk 7
{'messages': [AIMessage(content="Leonardo DiCaprio's current girlfriend is the Italian model Vittoria Ceretti, who is 25 years old. Her age raised to the 0.43 power is approximately 3.99.")],
 'output': "Leonardo DiCaprio's current girlfriend is the Italian model "
           'Vittoria Ceretti, who is 25 years old. Her age raised to the 0.43 '
           'power is approximately 3.99.'}
---- final
{'actions': [AgentActionMessageLog(tool='Search', tool_input="Leo DiCaprio's current girlfriend", log="\nInvoking: `Search` with `Leo DiCaprio's current girlfriend`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Search', 'arguments': '{\n  "__arg1": "Leo DiCaprio\'s current girlfriend"\n}'}})]),
             AgentActionMessageLog(tool='Search', tool_input='Vittoria Ceretti age', log='\nInvoking: `Search` with `Vittoria Ceretti age`\n\n\n', message_log=[AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Search', 'arguments': '{\n  "__arg1": "Vittoria Ceretti age"\n}'}})]),
             AgentActionMessageLog(tool='Calculator', tool_input='25^0.43', log='\nInvoking: `Calculator` with `25^0.43`\n\n\n', message_log=[AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Calculator', 'arguments': '{\n  "__arg1": "25^0.43"\n}'}})])],
 'messages': [AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Search', 'arguments': '{\n  "__arg1": "Leo DiCaprio\'s current girlfriend"\n}'}}),
              FunctionMessage(content="According to Us, the 48-year-old actor is now “exclusively” dating Italian model Vittoria Ceretti. A source told Us that DiCaprio is “completely smitten” with Ceretti, and their relationship is “going so well that Leo's actually being exclusive.”", name='Search'),
              AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Search', 'arguments': '{\n  "__arg1": "Vittoria Ceretti age"\n}'}}),
              FunctionMessage(content='25 years', name='Search'),
              AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Calculator', 'arguments': '{\n  "__arg1": "25^0.43"\n}'}}),
              FunctionMessage(content='Answer: 3.991298452658078', name='Calculator'),
              AIMessage(content="Leonardo DiCaprio's current girlfriend is the Italian model Vittoria Ceretti, who is 25 years old. Her age raised to the 0.43 power is approximately 3.99.")],
 'output': "Leonardo DiCaprio's current girlfriend is the Italian model "
           'Vittoria Ceretti, who is 25 years old. Her age raised to the 0.43 '
           'power is approximately 3.99.',
 'steps': [AgentStep(action=AgentActionMessageLog(tool='Search', tool_input="Leo DiCaprio's current girlfriend", log="\nInvoking: `Search` with `Leo DiCaprio's current girlfriend`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Search', 'arguments': '{\n  "__arg1": "Leo DiCaprio\'s current girlfriend"\n}'}})]), observation="According to Us, the 48-year-old actor is now “exclusively” dating Italian model Vittoria Ceretti. A source told Us that DiCaprio is “completely smitten” with Ceretti, and their relationship is “going so well that Leo's actually being exclusive.”"),
           AgentStep(action=AgentActionMessageLog(tool='Search', tool_input='Vittoria Ceretti age', log='\nInvoking: `Search` with `Vittoria Ceretti age`\n\n\n', message_log=[AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Search', 'arguments': '{\n  "__arg1": "Vittoria Ceretti age"\n}'}})]), observation='25 years'),
           AgentStep(action=AgentActionMessageLog(tool='Calculator', tool_input='25^0.43', log='\nInvoking: `Calculator` with `25^0.43`\n\n\n', message_log=[AIMessageChunk(content='', additional_kwargs={'function_call': {'name': 'Calculator', 'arguments': '{\n  "__arg1": "25^0.43"\n}'}})]), observation='Answer: 3.991298452658078')]}
```
2023-11-28 08:11:37 +00:00
Michael Feil
686162670e langchain[minor]: Adding infinity embedding integration. (#13928)
This adds integation to https://github.com/michaelfeil/infinity. Users
requested it in https://github.com/michaelfeil/infinity/issues/36
@saatvikshah

Follows my implementation of gradient.ai.

Feedback 1: Well done - I love your CI / repo / poetry setup - I adapted
a lot in https://github.com/michaelfeil/infinity.
Feedback 2: Not so good: The openai integration contains to much reverse
engineering - in general projects such as michaelfeil/infinity and
huggingface/text-embeddings-inference are compatible to the `pip install
openai` package.

Reverse engineering like this one is really hindering the use for me:

8e88ba16a8/libs/langchain/langchain/embeddings/openai.py (L347)

8e88ba16a8/libs/langchain/langchain/embeddings/openai.py (L351)
- it is about preventing 3rd party providers to use the same url + uses
interfaces of openai, that are not publically documented.
2023-11-27 16:43:47 -08:00
Bagatur
10a6e7cbb6 langchain[patch], core[patch]: Make common utils public (#13932)
- rename `langchain_core.chat_models.base._generate_from_stream` -> `generate_from_stream`
- rename `langchain_core.chat_models.base._agenerate_from_stream` -> `agenerate_from_stream`
- export `langchain_core.utils.utils.build_extra_kwargs` from `langchain_core.utils`
2023-11-27 15:34:46 -08:00
Oleksandr Yaremchuk
c0277d06e8 experimental[patch] Update prompt injection model (#13930)
- **Description:** Existing model used for Prompt Injection is quite
outdated but we fine-tuned and open-source a new model based on the same
model deberta-v3-base from Microsoft -
[laiyer/deberta-v3-base-prompt-injection](https://huggingface.co/laiyer/deberta-v3-base-prompt-injection).
It supports more up-to-date injections and less prone to
false-positives.
  - **Dependencies:** No
  - **Tag maintainer:** -
  - **Twitter handle:** @alex_yaremchuk

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-27 17:56:53 -05:00
Bob Lin
e6ebde9688 experimental[patch]: Add experimental.agent imports (#13839)
- **Description:** The experimental package needs to be compatible with
the usage of importing agents

For example, if i use `from langchain.agents import
create_pandas_dataframe_agent`, running the program will prompt the
following information:

```
Traceback (most recent call last):
   File "/Users/dongwm/test/main.py", line 1, in <module>
     from langchain.agents import create_pandas_dataframe_agent
   File "/Users/dongwm/test/venv/lib/python3.11/site-packages/langchain/agents/__init__.py", line 87, in __getattr__
     raise ImportError(
ImportError: create_pandas_dataframe_agent has been moved to langchain experimental. See https://github.com/langchain-ai/langchain/discussions/11680 for more information.
Please update your import statement from: `langchain.agents.create_pandas_dataframe_agent` to `langchain_experimental.agents.create_pandas_dataframe_agent`.
```

But when I changed to `from langchain_experimental.agents import
create_pandas_dataframe_agent`, it was actually wrong:

```python
Traceback (most recent call last):
  File "/Users/dongwm/test/main.py", line 2, in <module>
    from langchain_experimental.agents import create_pandas_dataframe_agent
ImportError: cannot import name 'create_pandas_dataframe_agent' from 'langchain_experimental.agents' (/Users/dongwm/test/venv/lib/python3.11/site-packages/langchain_experimental/agents/__init__.py)
```

I should use `from langchain_experimental.agents.agent_toolkits import
create_pandas_dataframe_agent`. In order to solve the problem and make
it compatible, I added additional import code to the
langchain_experimental package. Now it can be like this Used `from
langchain_experimental.agents import create_pandas_dataframe_agent`

  - **Twitter handle:** [lin_bob57617](https://twitter.com/lin_bob57617)
2023-11-27 14:03:47 -08:00
Tyler Titsworth
afcfa2a5e7 langchain[patch]: Add progress bar option to OllamaEmbeddings (#13882)
- **Description:** Adds a tqdm progress bar to OllamaEmbeddings when
embedding a list.
- **Issue:** Related to #13637, but extended to Ollama.
- **Dependencies:** `tqdm` made a necessary dependency.

Thanks to @ugm2 for helping identify a common problem. Embeddings take a
very long time to finish on local machines, and require a progress bar
to help identify if one should even attempt the workload.

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-27 13:56:13 -08:00
Kalyan
ec53d983a1 TEMPLATES Add rag-opensearch template (#13501)
<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->

Adding rag-opensearch template.

---------

Signed-off-by: kalyanr <kalyan.ben10@live.com>
Co-authored-by: Erick Friis <erick@langchain.dev>
2023-11-27 16:21:39 -05:00
Leonid Ganeline
e47b9c5285 DOCS: move adapters to integrations (#13862)
Current docs for adapters are in the `Guides/Adapters which is not a
good place.
- moved Adapters into `Integratons/Components/Adapters/
- simplified the OpenAI adapter notebook
- rerouted the old OpenAI adapter page URL to a new one.
2023-11-27 13:05:43 -08:00
jeremyb-data
cd77fba562 Improvement: Weaviate multitenant adddocs (#13827)
<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
- **Description:** Added a line to pass the tenant parameter to
add_data_object
  - **Issue:** An extra line added from the fix for #9956
  - **Dependencies:** n/a
  - **Tag maintainer:** @baskaryan 

Tested locally, works as expected with the line change.

---------

Co-authored-by: Simon Dai <simon6752@gmail.com>
2023-11-27 12:59:57 -08:00
jiangying
3e30cd8261 NIT: comment typo (#13817) 2023-11-27 12:59:12 -08:00
Manuel Riezebosch
92b07ecaf3 DOCS: fix link to question answering (#13806)
first link in
[overview](https://python.langchain.com/docs/use_cases/question_answering/code_understanding#overview)
2023-11-27 12:56:15 -08:00
Assaf Toledo
ba62ff89cc BUGFIX: Support for elastic indices that don't return 'metadata' in '_source' (#13903)
Description: Some Elastic indexes do not return a 'metadata' field in
'_source'. However, prior to this PR, the code assumed there always is a
'metadata' field. This PR adds support for cases where the field is
missing by adding it manually.

Issue: #13869
2023-11-27 12:52:57 -08:00
Enric Soler Rastrollo
c156d0281a BUGFIX: Use embedding key in azure_cosmos_db index creation (#13919)
Description: Implement embedding key parametrisation
Issue: https://github.com/langchain-ai/langchain/issues/13918
Dependencies: None
Tag maintainer: @hwchase17 @izzymsft
Twitter handle:@MaddogoS
2023-11-27 12:51:08 -08:00
Bagatur
ac67422a3d IMPROVEMENT: import Document from core (#13905) 2023-11-27 12:48:43 -08:00
chyroc
886bc2d50a IMPROVEMENT: fix qianfan validate_environment typo (#13908) 2023-11-27 11:17:27 -08:00
Chengzu Ou
4b8e053fe8 FEATURE: Add Databricks Vector Search as a new vector store (#13621)
**Description:**
This PR adds Databricks Vector Search as a new vector store in
LangChain.

- [x] Add `DatabricksVectorSearch` in `langchain/vectorstores/`
- [x] Unit tests
- [x] Add
[`databricks-vectorsearch`](https://pypi.org/project/databricks-vectorsearch/)
as a new optional dependency

We ran the following checks:
- `make format` passed  
- `make lint` failed but the failures were caused by other files
    + Files touched by this PR passed the linter  
- `make test` passed  
- `make coverage` failed but the failures were caused by other files.
Tests added by or related to this PR all passed
+ langchain/vectorstores/databricks_vector_search.py test coverage 94% 
- `make spell_check` passed  

The example notebook and updates to the [provider's documentation
page](https://github.com/langchain-ai/langchain/blob/master/docs/docs/integrations/providers/databricks.md)
will be added later in a separate PR.

**Dependencies:**
Optional dependency:
[`databricks-vectorsearch`](https://pypi.org/project/databricks-vectorsearch/)

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-27 11:07:26 -08:00
Leonid Kuligin
25387db432 BUFIX: add support for various OSS images from Vertex Model Garden (#13917)
- **Description:** add support for various OSS images from Model
Garden
  - **Issue:** #13370
2023-11-27 10:31:53 -08:00
Eugene Yurtsev
e186637921 Document Runnable Binding (#13927)
Document runnable binding
2023-11-27 13:21:27 -05:00
Bagatur
46b3311190 RELEASE: 0.0.341 (#13926) 2023-11-27 09:51:12 -08:00
Nuno Campos
f6b05cacd0 Update root poetry lock with core (#13922)
<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->
2023-11-27 17:30:44 +00:00
umair mehmood
b3e08f9239 improvement: fix chat prompt loading from config (#13818)
Add loader for loading chat prompt from config file.

fixed: #13667

@efriis 
@baskaryan
2023-11-27 11:39:50 -05:00
Nuno Campos
8a3e0c9afa Add option to prefix config keys in configurable_alts (#13714) 2023-11-27 15:25:17 +00:00
Tomaz Bratanic
4ce5254442 Add Cypher template diagrams (#13913) 2023-11-27 10:18:51 -05:00
Taqi Jaffri
bfc12a4a76 DOCS: Simplified Docugami cookbook to remove code now available in docugami library (#13828)
The cookbook had some code to upload files, and wait for the processing
to finish.

This code is now moved to the `docugami` library so removing from the
cookbook to simplify.

Thanks @rlancemartin for suggesting this when working on evals.

---------

Co-authored-by: Taqi Jaffri <tjaffri@docugami.com>
2023-11-27 00:07:24 -08:00
ggeutzzang
3749af79ae DOCS: fixed error in the docstring of RunnablePassthrough class (#13843)
This pull request addresses an issue found in the example code within
the docstring of `libs/core/langchain_core/runnables/passthrough.py`

The original code snippet caused a `NameError` due to the missing import
of `RunnableLambda`. The error was as follows:
```
     12     return "completion"
     13 
---> 14 chain = RunnableLambda(fake_llm) | {
     15     'original': RunnablePassthrough(), # Original LLM output
     16     'parsed': lambda text: text[::-1] # Parsing logic

NameError: name 'RunnableLambda' is not defined
```
To resolve this, I have modified the example code to include the
necessary import statement for `RunnableLambda`. Additionally, I have
adjusted the indentation in the code snippet to ensure consistency and
readability.

The modified code now successfully defines and utilizes
`RunnableLambda`, ensuring that users referencing the docstring will
have a functional and clear example to follow.

There are no related GitHub issues for this particular change.

Modified Code:
```python
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.runnables import RunnableLambda

runnable = RunnableParallel(
    origin=RunnablePassthrough(),
    modified=lambda x: x+1
)

runnable.invoke(1) # {'origin': 1, 'modified': 2}

def fake_llm(prompt: str) -> str: # Fake LLM for the example
    return "completion"

chain = RunnableLambda(fake_llm) | {
    'original': RunnablePassthrough(), # Original LLM output
    'parsed': lambda text: text[::-1] # Parsing logic
}

chain.invoke('hello') # {'original': 'completion', 'parsed': 'noitelpmoc'}
```

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-27 00:06:55 -08:00
Dylan Williams
1983a39894 FEATURE: Add OneNote document loader (#13841)
- **Description:** Added OneNote document loader
  - **Issue:** #12125
  - **Dependencies:** msal

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-26 23:59:52 -08:00
Ikko Eltociear Ashimine
ff7d4d9c0b Update llamacpp.ipynb (#13840)
specifed -> specified
2023-11-26 23:47:19 -08:00
Tomaz Bratanic
1ad65f7a98 BUGFIX: Fix bugs with Cypher validation (#13849)
Fixes https://github.com/langchain-ai/langchain/issues/13803. Thanks to
@sakusaku-rich
2023-11-26 19:30:11 -08:00
Sᴜᴘᴇʀ Lᴇᴇ
e42e95cc11 docs: fix link to local_retrieval_qa (#13872)
\The original link in [this
section](https://python.langchain.com/docs/use_cases/question_answering/#:~:text=locally%2Drunning%20models-,here,-.):

https://python.langchain.com/docs/modules/use_cases/question_answering/local_retrieval_qa

After fix:

https://python.langchain.com/docs/use_cases/question_answering/local_retrieval_qa
2023-11-26 19:16:46 -08:00
Harrison Chase
6a35831128 BUGFIX: export more types (#13886)
Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-26 19:15:34 -08:00
Yusuf Khan
935f78c944 FEATURE: Add retriever for Outline (#13889)
- **Description:** Added a retriever for the Outline API to ask
questions on knowledge base
  - **Issue:** resolves #11814
  - **Dependencies:** None
  - **Tag maintainer:** @baskaryan
2023-11-26 18:56:12 -08:00
ggeutzzang
f2af82058f DOCS: Fix Sample Code for Compatibility with Pydantic 2.0 (#13890)
- **Description:** 
I encountered an issue while running the existing sample code on the
page https://python.langchain.com/docs/modules/agents/how_to/agent_iter
in an environment with Pydantic 2.0 installed. The following error was
triggered:

```python
ValidationError                           Traceback (most recent call last)
<ipython-input-12-2ffff2c87e76> in <cell line: 43>()
     41 
     42 tools = [
---> 43     Tool(
     44         name="GetPrime",
     45         func=get_prime,

2 frames
/usr/local/lib/python3.10/dist-packages/pydantic/v1/main.py in __init__(__pydantic_self__, **data)
    339         values, fields_set, validation_error = validate_model(__pydantic_self__.__class__, data)
    340         if validation_error:
--> 341             raise validation_error
    342         try:
    343             object_setattr(__pydantic_self__, '__dict__', values)

ValidationError: 1 validation error for Tool
args_schema
  subclass of BaseModel expected (type=type_error.subclass; expected_class=BaseModel)
```

I have made modifications to the example code to ensure it functions
correctly in environments with Pydantic 2.0.
2023-11-26 18:21:13 -08:00
Harrison Chase
968ba6961f add skeleton of thought (#13883) 2023-11-26 19:31:41 -05:00
Bagatur
0efa59cbb8 RELEASE: 0.0.339rc3 (#13852) 2023-11-25 10:37:30 -08:00
Bagatur
7222c42077 RELEASE: core 0.0.6 (#13853) 2023-11-25 10:21:14 -08:00
raelix
c172605ea6 IMPROVEMENT: Added title metadata to GoogleDriveLoader for optional File Loaders (#13832)
- **Description:** Simple change, I just added title metadata to
GoogleDriveLoader for optional File Loaders
  - **Dependencies:** no dependencies
  - **Tag maintainer:** @hwchase17

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-24 18:53:55 -08:00
Stefano Lottini
19c68c7652 FEATURE: Astra DB, LLM cache classes (exact-match and semantic cache) (#13834)
This PR provides idiomatic implementations for the exact-match and the
semantic LLM caches using Astra DB as backend through the database's
HTTP JSON API. These caches require the `astrapy` library as dependency.

Comes with integration tests and example usage in the `llm_cache.ipynb`
in the docs.

@baskaryan this is the Astra DB counterpart for the Cassandra classes
you merged some time ago, tagging you for your familiarity with the
topic. Thank you!

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-24 18:53:37 -08:00
Stefano Lottini
272df9dcae Astra DB, chat message history (#13836)
This PR adds a chat message history component that uses Astra DB for
persistence through the JSON API.
The `astrapy` package is required for this class to work.

I have added tests and a small notebook, and updated the relevant
references in the other docs pages.

(@rlancemartin this is the counterpart of the Cassandra equivalent class
you so helpfully reviewed back at the end of June)

Thank you!
2023-11-24 18:12:29 -08:00
Bagatur
58f7e109ac BUGFIX: Add import types and typevars from core (#13829) 2023-11-24 17:04:10 -08:00
Bagatur
751226e067 bump 0.0.339rc2 (#13787) 2023-11-23 12:50:09 -08:00
Bagatur
300ff01824 RELEASE: core 0.0.5 (#13786) 2023-11-23 12:23:50 -08:00
Bagatur
bcf83988ec Revert "INFRA: temp rm master condition (#13753)" (#13759) 2023-11-22 17:22:07 -08:00
Bagatur
df471b0c0b INFRA: temp rm master condition (#13753) 2023-11-22 16:59:50 -08:00
Bagatur
72c108b003 IMPROVEMENT: filter global warnings properly (#13754) 2023-11-22 16:26:37 -08:00
William FH
163bf165ed Add Batch Size kwarg to the llm start callback (#13483)
So you can more easily use the token counts directly from the API
endpoint for batch size of 1
2023-11-22 14:47:57 -08:00
Bagatur
23566cbea9 DOCS: core editable dep api refs (#13747) 2023-11-22 14:33:30 -08:00
Bagatur
0be515f720 RELEASE: 0.0.339rc1 (#13746) 2023-11-22 14:29:49 -08:00
Bagatur
2bc5bd67f7 RELEASE: core 0.0.4 (#13745) 2023-11-22 13:57:28 -08:00
Bagatur
b6b7654f7f INFRA: run LC ci after core changes (#13742) 2023-11-22 13:38:48 -08:00
Bagatur
3d28c1a9e0 DOCS: fix core api ref build (#13744) 2023-11-22 15:42:35 -05:00
Bagatur
32d087fcb8 REFACTOR: combine core documents files (#13733) 2023-11-22 10:10:26 -08:00
h3l
14d4fb98fc DOCS: Fix typo/line break in python code (#13708) 2023-11-22 09:10:07 -08:00
William FH
5b90fe5b1c Fix locking (#13725) 2023-11-22 07:37:25 -08:00
Bagatur
16af282429 BUGFIX: add prompt imports for backwards compat (#13702) 2023-11-21 23:04:20 -08:00
Erick Friis
78da34153e TEMPLATES Metadata (#13691)
Co-authored-by: Lance Martin <lance@langchain.dev>
2023-11-22 01:41:12 -05:00
Bagatur
e327bb4ba4 IMPROVEMENT: Conditionally import core type hints (#13700) 2023-11-21 21:38:49 -08:00
dandanwei
d47ee1ae79 BUGFIX: redis vector store overwrites falsey metadata (#13652)
- **Description:** This commit fixed the problem that Redis vector store
will change the value of a metadata from 0 to empty when saving the
document, which should be an un-intended behavior.
  - **Issue:** N/A
  - **Dependencies:** N/A
2023-11-21 20:16:23 -08:00
Bagatur
a21e84faf7 BUGFIX: llm backwards compat imports (#13698) 2023-11-21 20:12:35 -08:00
Yujie Qian
ace9e64d62 IMPROVEMENT: VoyageEmbeddings embed_general_texts (#13620)
- **Description:** add method embed_general_texts in VoyageEmebddings to
support input_type
  - **Issue:** 
  - **Dependencies:** 
  - **Tag maintainer:** 
  - **Twitter handle:** @Voyage_AI_
2023-11-21 18:33:07 -08:00
tanujtiwari-at
5064890fcf BUGFIX: handle tool message type when converting to string (#13626)
**Description:** Currently, if we pass in a ToolMessage back to the
chain, it crashes with error

`Got unsupported message type: `

This fixes it. 

Tested locally

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-21 18:20:58 -08:00
Josep Pon Farreny
143049c90f Added partial_variables to BaseStringMessagePromptTemplate.from_template(...) (#13645)
**Description:** BaseStringMessagePromptTemplate.from_template was
passing the value of partial_variables into cls(...) via **kwargs,
rather than passing it to PromptTemplate.from_template. Which resulted
in those *partial_variables being* lost and becoming required
*input_variables*.

Co-authored-by: Josep Pon Farreny <josep.pon-farreny@siemens.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-21 17:48:38 -08:00
Erick Friis
c5ae9f832d INFRA: Lint for imports (#13632)
- Adds pydantic/import linting to core
- Adds a check for `langchain_experimental` imports to langchain
2023-11-21 17:42:56 -08:00
Erick Friis
131db4ba68 BUGFIX: anthropic models on bedrock (#13629)
Introduced in #13403
2023-11-21 17:40:29 -08:00
David Ruan
04bddbaba4 BUGFIX: Update bedrock.py to fix provider bug (#13646)
Provider check was incorrectly failing for anything other than "meta"
2023-11-21 17:28:38 -08:00
Guangya Liu
aec8715073 DOCS: remove openai api key from cookbook (#13633) 2023-11-21 17:25:06 -08:00
Guangya Liu
bb18b0266e DOCS: fixed import error for BashOutputParser (#13680) 2023-11-21 16:33:40 -08:00
Bagatur
dc53523837 IMPROVEMENT: bump core dep 0.0.3 (#13690) 2023-11-21 15:50:19 -08:00
Bagatur
a208abe6b7 add callback import test (#13689) 2023-11-21 15:28:49 -08:00
Bagatur
083afba697 BUG: Add core utils imports (#13688) 2023-11-21 15:25:47 -08:00
Bagatur
c61e30632e BUG: more core fixes (#13665)
Fix some circular deps:
- move PromptValue into top level module bc both PromptTemplates and
OutputParsers import
- move tracer context vars to `tracers.context` and import them in
functions in `callbacks.manager`
- add core import tests
2023-11-21 15:15:48 -08:00
William FH
59df16ab92 Update name (#13676) 2023-11-21 13:39:30 -08:00
Erick Friis
bfb980b968 CLI 0.0.19 (#13677) 2023-11-21 12:34:38 -08:00
Taqi Jaffri
d65c36d60a docugami cookbook (#13183)
Adds a cookbook for semi-structured RAG via Docugami. This follows the
same outline as the semi-structured RAG with Unstructured cookbook:
https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb

The main change is this cookbook uses Docugami instead of Unstructured
to find text and tables, and shows how XML markup in the output helps
with retrieval and generation.

We are \@docugami on twitter, I am \@tjaffri

---------

Co-authored-by: Taqi Jaffri <tjaffri@docugami.com>
2023-11-21 12:02:20 -08:00
jakerachleff
249c796785 update langserve to v0.0.30 (#13673)
Upgrade langserve template version to 0.0.30 to include new improvements
2023-11-21 11:17:47 -08:00
jakerachleff
c6937a2eb4 fix templates dockerfile (#13672)
- **Description:** We need to update the Dockerfile for templates to
also copy your README.md. This is because poetry requires that a readme
exists if it is specified in the pyproject.toml
2023-11-21 11:09:55 -08:00
Bagatur
11614700a4 bump 0.0.339rc0 (#13664) 2023-11-21 08:41:59 -08:00
Bagatur
d32e511826 REFACTOR: Refactor langchain_core (#13627)
Changes:
- remove langchain_core/schema since no clear distinction b/n schema and
non-schema modules
- make every module that doesn't end in -y plural
- where easy have 1-2 classes per file
- no more than one level of nesting in directories
- only import from top level core modules in langchain
2023-11-21 08:35:29 -08:00
William FH
17c6551c18 Add error rate (#13568)
To the in-memory outputs. Separate it out from the outputs so it's
present in the dataframe.describe() results
2023-11-21 07:51:30 -08:00
Nuno Campos
8329f81072 Use pytest asyncio auto mode (#13643)
<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->
2023-11-21 15:00:13 +00:00
Lance Martin
611e1e0ca4 Add template for gpt-crawler (#13625)
Template for RAG using
[gpt-crawler](https://github.com/BuilderIO/gpt-crawler).

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
2023-11-20 21:32:57 -08:00
Bagatur
99b4f46cbe REFACTOR: Add core as dep (#13623) 2023-11-20 14:38:10 -08:00
Harrison Chase
d82cbf5e76 Separate out langchain_core package (#13577)
Co-authored-by: Nuno Campos <nuno@boringbits.io>
Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Erick Friis <erick@langchain.dev>
2023-11-20 13:09:30 -08:00
Bagatur
4eec47b191 DOCS: update rag use case images (#13615) 2023-11-20 10:14:52 -08:00
Bagatur
e620347a83 RELEASE: bump 339 (#13613) 2023-11-20 09:56:43 -08:00
Ofer Mendelevitch
52e23e50b1 BUG: Fix search_kwargs in Vectara retriever (#13299)
- **Description:** fix a bug that prevented as_retriever() in Vectara to
use the desired input arguments
  - **Issue:** as_retriever did not pass the arguments properly
  - **Tag maintainer:** @baskaryan
  - **Twitter handle:** @ofermend
2023-11-20 09:44:43 -08:00
Holt Skinner
1c08dbfb33 IMPROVEMENT: Reduce post-processing time for DocAIParser (#13210)
- Remove `WrappedDocument` introduced in
https://github.com/langchain-ai/langchain/pull/11413
- https://github.com/googleapis/python-documentai-toolbox/issues/198 in
Document AI Toolbox to improve initialization time for `WrappedDocument`
object.

@lkuligin

@baskaryan

@hwchase17
2023-11-20 09:41:44 -08:00
Leonid Kuligin
f3fcdea574 fixed an UnboundLocalError when no documents are found (#12995)
Replace this entire comment with:
  - **Description:** fixed a bug
  - **Issue:** the issue # #12780
2023-11-20 09:41:14 -08:00
Stijn Tratsaert
b6f70d776b VertexAI LLM count_tokens method requires list of prompts (#13451)
I encountered this during summarization with VertexAI. I was receiving
an INVALID_ARGUMENT error, as it was trying to send a list of about
17000 single characters.

The [count_tokens
method](https://github.com/googleapis/python-aiplatform/blob/main/vertexai/language_models/_language_models.py#L658)
made available by Google takes in a list of prompts. It does not fail
for small texts, but it does for longer documents because the argument
list will be exceeding Googles allowed limit. Enforcing the list type
makes it work successfully.

This change will cast the input text to count to a list of that single
text so that the input format is always correct.

[Twitter](https://www.x.com/stijn_tratsaert)
2023-11-20 09:40:48 -08:00
Wang Wei
fe7b40cb2a feat: add ERNIE-Bot-4 Function Calling (#13320)
- **Description:** ERNIE-Bot-Chat-4 Large Language Model adds the
ability of `Function Calling` by passing parameters through the
`functions` parameter in the request. To simplify function calling for
ERNIE-Bot-Chat-4, the `create_ernie_fn_chain()` function has been added.
The definition and usage of the `create_ernie_fn_chain()` function is
similar to that of the `create_openai_fn_chain()` function.

Examples as the follows:

```
import json

from langchain.chains.ernie_functions import (
    create_ernie_fn_chain,
)
from langchain.chat_models import ErnieBotChat
from langchain.prompts import ChatPromptTemplate

def get_current_news(location: str) -> str:
    """Get the current news based on the location.'

    Args:
        location (str): The location to query.
    
    Returs:
        str: Current news based on the location.
    """

    news_info = {
        "location": location,
        "news": [
            "I have a Book.",
            "It's a nice day, today."
        ]
    }

    return json.dumps(news_info)

def get_current_weather(location: str, unit: str="celsius") -> str:
    """Get the current weather in a given location

    Args:
        location (str): location of the weather.
        unit (str): unit of the tempuature.
    
    Returns:
        str: weather in the given location.
    """

    weather_info = {
        "location": location,
        "temperature": "27",
        "unit": unit,
        "forecast": ["sunny", "windy"],
    }
    return json.dumps(weather_info)

llm = ErnieBotChat(model_name="ERNIE-Bot-4")
prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{query}"),
    ]
)

chain = create_ernie_fn_chain([get_current_weather, get_current_news], llm, prompt, verbose=True)
res = chain.run("北京今天的新闻是什么?")
print(res)
```

The running results of the above program are shown below:
```
> Entering new LLMChain chain...
Prompt after formatting:
Human: 北京今天的新闻是什么?



> Finished chain.
{'name': 'get_current_news', 'thoughts': '用户想要知道北京今天的新闻。我可以使用get_current_news工具来获取这些信息。', 'arguments': {'location': '北京'}}
```
2023-11-19 22:36:12 -08:00
Adilkhan Sarsen
10418ab0c1 DeepLake Backwards compatibility fix (#13388)
- **Description:** during search with DeepLake some people are facing
backwards compatibility issues, this PR fixes it by making search
accessible for the older datasets

---------

Co-authored-by: adolkhan <adilkhan.sarsen@alumni.nu.edu.kz>
2023-11-19 21:46:01 -08:00
Tyler Hutcherson
190952fe76 IMPROVEMENT: Minor redis improvements (#13381)
- **Description:**
- Fixes a `key_prefix` bug where passing it in on
`Redis.from_existing(...)` did not work properly. Updates doc strings
accordingly.
- Updates Redis filter classes logic with best practices on typing,
string formatting, and handling "empty" filters.
- Fixes a bug that would prevent multiple tag filters from being applied
together in some scenarios.
- Added a whole new filter unit testing module. Also updated code
formatting for a number of modules that were failing the `make`
commands.
  - **Issue:** N/A
  - **Dependencies:** N/A
  - **Tag maintainer:** @baskaryan 
  - **Twitter handle:** @tchutch94
2023-11-19 19:15:45 -08:00
Sijun He
674bd90a47 DOCS: Fix typo in MongoDB memory docs (#13588)
- **Description:** Fix typo in MongoDB memory docs
  - **Tag maintainer:** @eyurtsev

<!-- Thank you for contributing to LangChain!

  - **Description:** Fix typo in MongoDB memory docs
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
  - **Tag maintainer:** @baskaryan
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->
2023-11-19 19:13:35 -08:00
Sergey Kozlov
df03267edf Fix tool arguments formatting in StructuredChatAgent (#10480)
In the `FORMAT_INSTRUCTIONS` template, 4 curly braces (escaping) are
used to get single curly brace after formatting:

```
"{{{ ... }}}}" -> format_instructions.format() ->  "{{ ... }}" -> template.format() -> "{ ... }".
```

Tool's `args_schema` string contains single braces `{ ... }`, and is
also transformed to `{{{{ ... }}}}` form. But this is not really correct
since there is only one `format()` call:

```
"{{{{ ... }}}}" -> template.format() -> "{{ ... }}".
```

As a result we get double curly braces in the prompt:
````
Respond to the human as helpfully and accurately as possible. You have access to the following tools:

foo: Test tool FOO, args: {{'tool_input': {{'type': 'string'}}}}    # <--- !!!
...
Provide only ONE action per $JSON_BLOB, as shown:

```
{
  "action": $TOOL_NAME,
  "action_input": $INPUT
}
```
````

This PR fixes curly braces escaping in the `args_schema` to have single
braces in the final prompt:
````
Respond to the human as helpfully and accurately as possible. You have access to the following tools:

foo: Test tool FOO, args: {'tool_input': {'type': 'string'}}    # <--- !!!
...
Provide only ONE action per $JSON_BLOB, as shown:

```
{
  "action": $TOOL_NAME,
  "action_input": $INPUT
}
```
````

---------

Co-authored-by: Sergey Kozlov <sergey.kozlov@ludditelabs.io>
2023-11-19 18:45:43 -08:00
Wouter Durnez
ef7802b325 Add llama2-13b-chat-v1 support to chat_models.BedrockChat (#13403)
Hi 👋 We are working with Llama2 on Bedrock, and would like to add it to
Langchain. We saw a [pull
request](https://github.com/langchain-ai/langchain/pull/13322) to add it
to the `llm.Bedrock` class, but since it concerns a chat model, we would
like to add it to `BedrockChat` as well.

- **Description:** Add support for Llama2 to `BedrockChat` in
`chat_models`
- **Issue:** the issue # it fixes (if applicable)
[#13316](https://github.com/langchain-ai/langchain/issues/13316)
  - **Dependencies:** any dependencies required for this change `None`
  - **Tag maintainer:** /
  - **Twitter handle:** `@SimonBockaert @WouterDurnez`

---------

Co-authored-by: wouter.durnez <wouter.durnez@showpad.com>
Co-authored-by: Simon Bockaert <simon.bockaert@showpad.com>
2023-11-19 18:44:58 -08:00
jwbeck97
a93616e972 FEAT: Add azure cognitive health tool (#13448)
- **Description:** This change adds an agent to the Azure Cognitive
Services toolkit for identifying healthcare entities
  - **Dependencies:** azure-ai-textanalytics (Optional)

---------

Co-authored-by: James Beck <James.Beck@sa.gov.au>
Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-19 18:44:01 -08:00
Massimiliano Pronesti
6bf9b2cb51 BUG: Limit Azure OpenAI embeddings chunk size (#13425)
Hi! 
This short PR aims at:
* Fixing `OpenAIEmbeddings`' check on `chunk_size` when used with Azure
OpenAI (thus with openai < 1.0). Azure OpenAI embeddings support at most
16 chunks per batch, I believe we are supposed to take the min between
the passed value/default value and 16, not the max - which, I suppose,
was introduced by accident while refactoring the previous version of
this check from this other PR of mine: #10707
* Porting this fix to the newest class (`AzureOpenAIEmbeddings`) for
openai >= 1.0

This fixes #13539 (closed but the issue persists).  

@baskaryan @hwchase17
2023-11-19 18:34:51 -08:00
Zeyang Lin
e53f59f01a DOCS: doc-string - langchain.vectorstores.dashvector.DashVector (#13502)
- **Description:** There are several mistakes in the sample code in the
doc-string of `DashVector` class, and this pull request aims to correct
them.
The correction code has been tested against latest version (at the time
of creation of this pull request) of: `langchain==0.0.336`
`dashvector==1.0.6` .
- **Issue:** No issue is created for this.
- **Dependencies:** No dependency is required for this change,
<!-- - **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below), -->
- **Twitter handle:** `zeyanglin`

<!-- Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
-->
2023-11-19 18:24:05 -08:00
John Mai
16f7912e1b BUG: fix hunyuan appid type (#13496)
- **Description: fix hunyuan appid type
- **Issue:
https://github.com/langchain-ai/langchain/pull/12022#issuecomment-1815627855
2023-11-19 18:23:45 -08:00
Leonid Ganeline
43972be632 docs updating AzureML notebooks (#13492)
- Added/updated descriptions and links

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
2023-11-19 18:07:12 -08:00
Nicolò Boschi
8362bd729b AstraDB: use includeSimilarity option instead of $similarity (#13512)
- **Description:** AstraDB is going to deprecate the `$similarity`
projection property in favor of the ´includeSimilarity´ option flag. I
moved all the queries to the new format.
- **Tag maintainer:** @hemidactylus 
- **Twitter handle:** nicoloboschi
2023-11-19 17:54:35 -08:00
shumpei
7100d586ef Introduce search_kwargs for Custom Parameters in BingSearchAPIWrapper (#13525)
Added a `search_kwargs` field to BingSearchAPIWrapper in
`bing_search.py,` enabling users to include extra keyword arguments in
Bing search queries. This update, like specifying language preferences,
adds more customization to searches. The `search_kwargs` seamlessly
merge with standard parameters in `_bing_search_results` method.

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
2023-11-19 17:51:02 -08:00
Nicolò Boschi
ad0c3b9479 Fix Astra integration tests (#13520)
- **Description:** Fix Astra integration tests that are failing. The
`delete` always return True as the deletion is successful if no errors
are thrown. I aligned the test to verify this behaviour
  - **Tag maintainer:** @hemidactylus 
  - **Twitter handle:** nicoloboschi

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-19 17:50:49 -08:00
umair mehmood
69d39e2173 fix: VLLMOpenAI -- create() got an unexpected keyword argument 'api_key' (#13517)
The issue was accuring because of `openai` update in Completions. its
not accepting `api_key` and 'api_base' args.

The fix is we check for the openai version and if ats v1 then remove
these keys from args before passing them to `Compilation.create(...)`
when sending from `VLLMOpenAI`

Fixed: #13507 

@eyu
@efriis 
@hwchase17

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
2023-11-19 17:49:55 -08:00
Manuel Alemán Cueto
6bc08266e0 Fix for oracle schema parsing stated on the issue #7928 (#13545)
- **Description:** In this pull request, we address an issue related to
assigning a schema to the SQLDatabase class when utilizing an Oracle
database. The current implementation encounters a bug where, upon
attempting to execute a query, the alter session parse is not
appropriately defined for Oracle, leading to an error,
  - **Issue:** #7928,
  - **Dependencies:** No dependencies,
  - **Tag maintainer:** @baskaryan,

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-19 17:35:27 -08:00
Andrew Teeter
325bdac673 feat: load all namespaces (#13549)
- **Description:** This change allows for the `MWDumpLoader` to load all
namespaces including custom by default instead of only loading the
[default
namespaces](https://www.mediawiki.org/wiki/Help:Namespaces#Localisation).
  - **Tag maintainer:** @hwchase17
2023-11-19 17:35:17 -08:00
Taranjeet Singh
47451764a7 Add embedchain retriever (#13553)
**Description:**

This commit adds embedchain retriever along with tests and docs.
Embedchain is a RAG framework to create data pipelines.

**Twitter handle:**
- [Taranjeet's twitter](https://twitter.com/taranjeetio) and
[Embedchain's twitter](https://twitter.com/embedchain)

**Reviewer**
@hwchase17

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-19 17:35:03 -08:00
rafly lesmana
420a17542d fix: Make YoutubeLoader support on demand language translation (#13583)
**Description:**
Enhance the functionality of YoutubeLoader to enable the translation of
available transcripts by refining the existing logic.

**Issue:**
Encountering a problem with YoutubeLoader (#13523) where the translation
feature is not functioning as expected.

Tag maintainers/contributors who might be interested:
@eyurtsev

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-19 17:34:48 -08:00
Leonid Ganeline
cc50e023d1 DOCS langchain decorators update (#13535)
added disclaimer

---------

Co-authored-by: Erick Friis <erickfriis@gmail.com>
2023-11-19 17:30:05 -08:00
Brace Sproul
02a13030c0 DOCS: updated langchain stack img to be svg (#13540) 2023-11-19 16:26:53 -08:00
Bagatur
78a1f4b264 bump 338, exp 42 (#13564) 2023-11-18 15:12:07 -08:00
Bagatur
790ed8be69 update multi index templates (#13569) 2023-11-18 14:42:22 -08:00
Harrison Chase
f4c0e3cc15 move streaming stdout (#13559) 2023-11-18 12:24:49 -05:00
Leonid Ganeline
43dad6cb91 BUG fixed openai_assistant namespace (#13543)
BUG: langchain.agents.openai_assistant has a reference as
`from langchain_experimental.openai_assistant.base import
OpenAIAssistantRunnable`
should be 
`from langchain.agents.openai_assistant.base import
OpenAIAssistantRunnable`

This prevents building of the API Reference docs
2023-11-17 17:15:33 -08:00
Bassem Yacoube
ff382b7b1b IMPROVEMENT Adds support for new OctoAI endpoints (#13521)
small fix to add support for new OctoAI LLM endpoints
2023-11-17 17:15:21 -08:00
Mark Silverberg
cda1b33270 Fix typo/line break in the middle of a word (#13314)
- **Description:** a simple typo/extra line break fix
  - **Dependencies:** none
2023-11-17 16:43:42 -08:00
William FH
cac849ae86 Use random seed (#13544)
For default eval llm
2023-11-17 16:33:31 -08:00
Martin Krasser
79ed66f870 EXPERIMENTAL Generic LLM wrapper to support chat model interface with configurable chat prompt format (#8295)
## Update 2023-09-08

This PR now supports further models in addition to Lllama-2 chat models.
See [this comment](#issuecomment-1668988543) for further details. The
title of this PR has been updated accordingly.

## Original PR description

This PR adds a generic `Llama2Chat` model, a wrapper for LLMs able to
serve Llama-2 chat models (like `LlamaCPP`,
`HuggingFaceTextGenInference`, ...). It implements `BaseChatModel`,
converts a list of chat messages into the [required Llama-2 chat prompt
format](https://huggingface.co/blog/llama2#how-to-prompt-llama-2) and
forwards the formatted prompt as `str` to the wrapped `LLM`. Usage
example:

```python
# uses a locally hosted Llama2 chat model
llm = HuggingFaceTextGenInference(
    inference_server_url="http://127.0.0.1:8080/",
    max_new_tokens=512,
    top_k=50,
    temperature=0.1,
    repetition_penalty=1.03,
)

# Wrap llm to support Llama2 chat prompt format.
# Resulting model is a chat model
model = Llama2Chat(llm=llm)

messages = [
    SystemMessage(content="You are a helpful assistant."),
    MessagesPlaceholder(variable_name="chat_history"),
    HumanMessagePromptTemplate.from_template("{text}"),
]

prompt = ChatPromptTemplate.from_messages(messages)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
chain = LLMChain(llm=model, prompt=prompt, memory=memory)

# use chat model in a conversation
# ...
```

Also part of this PR are tests and a demo notebook.

- Tag maintainer: @hwchase17
- Twitter handle: `@mrt1nz`

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
2023-11-17 16:32:13 -08:00
William FH
c56faa6ef1 Add execution time (#13542)
And warn instead of raising an error, since the chain API is too
inconsistent.
2023-11-17 16:04:16 -08:00
pedro-inf-custodio
0fb5f857f9 IMPROVEMENT WebResearchRetriever error handling in urls with connection error (#13401)
- **Description:** Added a method `fetch_valid_documents` to
`WebResearchRetriever` class that will test the connection for every url
in `new_urls` and remove those that raise a `ConnectionError`.
- **Issue:** [Previous
PR](https://github.com/langchain-ai/langchain/pull/13353),
  - **Dependencies:** None,
  - **Tag maintainer:** @efriis 

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
2023-11-17 14:02:26 -08:00
Piyush Jain
d2335d0114 IMPROVEMENT Neptune graph updates (#13491)
## Description
This PR adds an option to allow unsigned requests to the Neptune
database when using the `NeptuneGraph` class.

```python
graph = NeptuneGraph(
    host='<my-cluster>',
    port=8182,
    sign=False
)
```

Also, added is an option in the `NeptuneOpenCypherQAChain` to provide
additional domain instructions to the graph query generation prompt.
This will be injected in the prompt as-is, so you should include any
provider specific tags, for example `<instructions>` or `<INSTR>`.

```python
chain = NeptuneOpenCypherQAChain.from_llm(
    llm=llm,
    graph=graph,
    extra_instructions="""
    Follow these instructions to build the query:
    1. Countries contain airports, not the other way around
    2. Use the airport code for identifying airports
    """
)
```
2023-11-17 13:49:31 -08:00
William FH
5a28dc3210 Override Keys Option (#13537)
Should be able to override the global key if you want to evaluate
different outputs in a single run
2023-11-17 13:32:43 -08:00
Bagatur
e584b28c54 bump 337 (#13534) 2023-11-17 12:50:52 -08:00
Wietse Venema
e80b53ff4f TEMPLATE Add VertexAI Chuck Norris template (#13531)
<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
2023-11-17 12:27:52 -08:00
Bagatur
2e2114d2d0 FEATURE: Runnable with message history (#13418)
Add RunnableWithMessageHistory class that can wrap certain runnables and manages chat history for them.
2023-11-17 12:00:01 -08:00
Bagatur
0fc3af8932 IMPROVEMENT: update assistants output and doc (#13480) 2023-11-17 11:58:54 -08:00
Bagatur
b4312aac5c TEMPLATES: Add multi-index templates (#13490)
One that routes and one that fuses

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
2023-11-17 02:00:11 -08:00
Hugues Chocart
35e04f204b [LLMonitorCallbackHandler] Various improvements (#13151)
Small improvements for the llmonitor callback handler, like better
support for non-openai models.


---------

Co-authored-by: vincelwt <vince@lyser.io>
2023-11-16 23:39:36 -08:00
Noah Stapp
c1b041c188 Add Wrapping Library Metadata to MongoDB vector store (#13084)
**Description**
MongoDB drivers are used in various flavors and languages. Making sure
we exercise our due diligence in identifying the "origin" of the library
calls makes it best to understand how our Atlas servers get accessed.
2023-11-16 22:20:04 -08:00
Leonid Ganeline
21552628c8 DOCS updated data_connection index page (#13426)
- the `Index` section was missed. Created it.
- text simplification

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
2023-11-16 18:16:50 -08:00
Guy Korland
7f8fd70ac4 Add optional arguments to FalkorDBGraph constructor (#13459)
**Description:** Add optional arguments to FalkorDBGraph constructor
**Tag maintainer:** baskaryan 
**Twitter handle:** @g_korland
2023-11-16 18:15:40 -08:00
Leonid Ganeline
e3a5cd7969 docs integrations/vectorstores/ cleanup (#13487)
- updated titles to consistent format
- added/updated descriptions and links
- format heading
2023-11-16 17:51:49 -08:00
Leonid Ganeline
1d2981114f DOCS updated async-faiss example (#13434)
The original notebook has the `faiss` title which is duplicated in
the`faiss.jpynb`. As a result, we have two `faiss` items in the
vectorstore ToC. And the first item breaks the searching order (it is
placed between `A...` items).
- I updated title to `Asynchronous Faiss`.
2023-11-16 17:41:26 -08:00
Erick Friis
9dfad613c2 IMPROVEMENT Allow openai v1 in all templates that require it (#13489)
- pyproject change
- lockfiles
2023-11-16 17:10:08 -08:00
chris stucchio
d7f014cd89 Bug: OpenAIFunctionsAgentOutputParser doesn't handle functions with no args (#13467)
**Description/Issue:** 
When OpenAI calls a function with no args, the args are `""` rather than
`"{}"`. Then `json.loads("")` blows up. This PR handles it correctly.

**Dependencies:** None
2023-11-16 16:47:05 -08:00
Yujie Qian
41a433fa33 IMPROVEMENT: add input_type to VoyageEmbeddings (#13488)
- **Description:** add input_type to VoyageEmbeddings
2023-11-16 16:35:36 -08:00
David Duong
ea6e017b85 Add serialisation arguments to Bedrock and ChatBedrock (#13465) 2023-11-17 01:33:24 +01:00
Erick Friis
427331d621 IMPROVEMENT Lock pydantic v1 in app template, cli 0.0.18 (#13485) 2023-11-16 15:22:11 -08:00
Erick Friis
75363f048f BUG Fix app_name in cli app new (#13482) 2023-11-16 14:19:35 -08:00
Leonid Ganeline
9ff8f69e75 DOCS updated memory Titles (#13435)
- Fixed titles for two notebooks. They were inconsistent with other
titles and clogged ToC.
- Added `Upstash` description and link
- Moved the authentication text up in the `Elasticsearch` nb, right
after package installation. It was on the end of the page which was a
wrong place.
2023-11-16 13:24:05 -08:00
ifduyue
324ab382ad Use List instead of list (#13443)
Unify List usages in libs/langchain/langchain/text_splitter.py, only one
place it's `list`, all other ocurrences are `List`
2023-11-16 13:15:58 -08:00
Stefano Lottini
b029d9f4e6 Astra DB: minor improvements to docstrings and demo notebook (#13449)
This PR brings a few minor improvements to the docs, namely class/method
docstrings and the demo notebook.

- A note on how to control concurrency levels to tune performance in
bulk inserts, both in the class docstring and the demo notebook;
- Slightly increased concurrency defaults after careful experimentation
(still on the conservative side even for clients running on
less-than-typical network/hardware specs)
- renamed the DB token variable to the standardized
`ASTRA_DB_APPLICATION_TOKEN` name (used elsewhere, e.g. in the Astra DB
docs)
- added a note and a reference (add_text docstring, demo notebook) on
allowed metadata field names.

Thank you!
2023-11-16 12:48:32 -08:00
Eugene Yurtsev
1e43fd6afe Add ahandle_event to _all_ (#13469)
Add ahandle_event for backwards compatibility as it is used by langserve
2023-11-16 12:46:20 -08:00
Leonid Ganeline
283ef1f66d DOCS fix for integratons/document_loaders sidebar (#13471)
The current `integrations/document_loaders/` sidebar has the
`example_data` item, which is a menu with a single item: "Notebook".
It is happening because the `integrations/document_loaders/` folder has
the `example_data/notebook.md` file that is used to autogenerate the
above menu item.
- removed an example_data/notebook.md file. Docusaurus doesn't have
simple ways to fix this problem (to exclude folders/files from an
autogenerated sidebar). Removing this file didn't break any existing
examples, so this fix is safe.
2023-11-16 12:02:30 -08:00
Leonid Ganeline
b1fcf5b481 DOCS: integrations/text_embeddings/ cleanup (#13476)
Updated several notebooks:
- fixed titles which are inconsistent or break the ToC sorting order.
- added missed soruce descriptions and links
- fixed formatting
2023-11-16 11:56:53 -08:00
Bagatur
6030ab9779 Update chain of note README.md (#13473) 2023-11-16 10:47:27 -08:00
Lance Martin
cf66a4737d Update multi-modal RAG cookbook (#13429)
Use example
[blog](https://cloudedjudgement.substack.com/p/clouded-judgement-111023)
w/ tables, charts as images.
2023-11-16 10:34:13 -08:00
Bagatur
10fddac4b5 Bagatur/chain of note template(#13470) 2023-11-16 10:34:04 -08:00
Leonid Ganeline
d5b1a21ae4 DOCS updated semadb example (#13431)
- the `SemaDB` notebook was placed in additional subfolder which breaks
the vectorstore ToC. I moved file up, removed this unnecessary
subfolder; updated the `vercel.json` with rerouting for the new URL
- Added SemaDB description and link
- improved text consistency
2023-11-16 09:57:22 -08:00
Leonid Ganeline
17c2007e0c DOCS updated Activeloop DeepMemory notebook (#13428)
- Fixed the title of the notebook. It created an ugly ToC element as
`Activeloop DeepLake's DeepMemory + LangChain + ragas or how to get +27%
on RAG recall.`
- Added Activeloop description
- improved consistency in text
- fixed ToC (it was using HTML tagas that break left-side in-page ToC).
Now in-page ToC works
2023-11-16 09:56:28 -08:00
Harrison Chase
f90249305a callback refactor (#13372)
Co-authored-by: Nuno Campos <nuno@boringbits.io>
2023-11-16 08:25:09 -08:00
Bagatur
9e6748e198 DOCS: rag nit (#13436) 2023-11-15 18:06:52 -08:00
Leonid Ganeline
8a52c1456b updated clickup example (#13424)
- Fixed headers (was more then 1 Titles)
- Removed security token value. It was OK to have it, because it is
temporary token, but the automatic security swippers raise warnings on
that.
- Added `ClickUp` service description and link.
2023-11-15 15:11:24 -08:00
Brace Sproul
79fa9a81f4 Fix a link in docs (#13423) 2023-11-15 15:02:26 -08:00
Nuno Campos
a632f61f3d IMPROVEMENT pirate-speak-configurable alternatives env vars (#13395)
…rnative LLMs until used

<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->
2023-11-15 14:38:03 -08:00
Bagatur
f0bb839506 DOCS: langchain stack img update (#13421) 2023-11-15 14:10:02 -08:00
Bagatur
a9b2c943e6 bump 336, exp 44 (#13420) 2023-11-15 14:08:34 -08:00
Bagatur
1372296dc8 FIX: Infer runnable agent single or multi action (#13412) 2023-11-15 13:58:14 -08:00
Eugene Yurtsev
accadccf8e Use secretstr for api keys for javelin-ai-gateway (#13417)
- Make javelin_ai_gateway_api_key a SecretStr

---------

Co-authored-by: Hiroshi Tashiro <hiroshitash@gmail.com>
2023-11-15 16:12:05 -05:00
William FH
ba501b27a0 Fix Runnable Lambda Afunc Repr (#13413)
Otherwise, you get an error when using async functions.


h/t to Chris Ruppelt
2023-11-15 16:11:42 -05:00
Sumukh Sridhara
1726d5dcdd Merge pull request #13232
* PGVector needs to close its connection if its garbage collected
2023-11-15 15:34:37 -05:00
Nuno Campos
85a77d2c27 IMPROVEMENT Passthrough kwargs in runnable lambda (#13405)
<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->
2023-11-15 11:45:16 -08:00
Bagatur
76c317ed78 DOCS: update rag use case (#13319) 2023-11-15 10:54:15 -08:00
Bagatur
a0b39a4325 DOCS: install nit (#13380) 2023-11-15 10:27:00 -08:00
Clay Elmore
8823e3831f FEAT Bedrock cohere embedding support (#13366)
- **Description:** adding cohere embedding support to bedrock embedding
class
  - **Issue:** N/A
  - **Dependencies:** None
  - **Tag maintainer:** @3coins 
  - **Twitter handle:** celmore25

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
2023-11-15 10:19:12 -08:00
Bagatur
9f543634e2 Agent window management how to (#13033) 2023-11-15 09:38:02 -08:00
Nuno Campos
d5aeff706a Make it easier to subclass RunnableEach (#13346)
<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->
2023-11-15 13:12:57 +00:00
Erick Friis
bed06a4f4a IMPROVEMENT research-assistant configurable report type (#13312)
<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-11-14 21:04:57 -08:00
竹内謙太
3b5e8bacfa FEAT Add some properties to NotionDBLoader (#13358)
<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->

fix #13356

Add supports following properties for metadata to NotionDBLoader.

- `checkbox`
- `email`
- `number`
- `select`

There are no relevant tests for this code to be updated.
2023-11-14 20:31:12 -08:00
Leonid Ganeline
c9b9359647 FEAT docs integration cards site (#13379)
The `Integrations` site is hidden now.
I've added it into the `More` menu.
The name is `Integration Cards` otherwise, it is confused with the
`Integrations` menu.

---------

Co-authored-by: Erick Friis <erickfriis@gmail.com>
2023-11-14 19:49:17 -08:00
Erick Friis
0f25ea9671 api doc newlines (#13378)
cc @leo-gan 

Deploying at
https://api.python.langchain.com/en/erick-api-doc-newlines-/api_reference.html
(will take a bit)
2023-11-14 19:16:31 -08:00
Fielding Johnston
37eb44c591 BUG Add limit_to_domains to APIChain based tools (#13367)
- **Description:** Adds `limit_to_domains` param to the APIChain based
tools (open_meteo, TMDB, podcast_docs, and news_api)
- **Issue:** I didn't open an issue, but after upgrading to 0.0.328
using these tools would throw an error.
  - **Dependencies:** N/A
  - **Tag maintainer:** @baskaryan 
  
  
**Note**: I included the trailing / simply because the docs here did
fc886cc303/docs/docs/use_cases/apis.ipynb (L246)
, but I checked the code and it is using `urlparse`. SoI followed the
docs since it comes down to stylee.
2023-11-14 19:07:16 -08:00
Predrag Gruevski
91443cacdb Update templates/rag-self-query with newer dependencies without CVEs. (#13362)
The `langchain` repo was being flagged for using vulnerable
dependencies, some of which were in this template's lockfile. Updating
to newer versions should fix that.
2023-11-14 19:06:18 -08:00
Predrag Gruevski
ac7e88fbbe Update rag-timescale-conversation to dependencies without CVEs. (#13364)
Just `poetry lock` and moving `langchain` to the latest version, in case
folks copy this template.

This resolves some vulnerable dependency alerts GitHub code scanning was
flagging.
2023-11-14 19:05:12 -08:00
Leonid Ganeline
342ed5c77a Yi model from 01.ai , example (#13375)
Added an example with new soa `Yi` model to `HuggingFace-hub` notebook
2023-11-14 17:10:53 -08:00
Bagatur
38180ad25f bump openai support (#13262) 2023-11-14 16:50:23 -08:00
Erick Friis
9545f0666d fix cli release (#13373)
My thought is that the ==version would prevent pip from finding the
package on regular [pypi.org](http://pypi.org/), so it would look at
[test.pypi.org](http://test.pypi.org/) for that. Otherwise it'll pull
package from [pypi.org](http://pypi.org/) (e.g. sub deps)

Right now, the cli release is failing because it's going to
test.pypi.org by default, so it finds this incorrect FASTAPI package
instead of the real one: https://test.pypi.org/project/FASTAPI/
2023-11-14 15:08:35 -08:00
Erick Friis
7c3066f9ec more cli interactivity, bugfix (#13360) 2023-11-14 14:49:43 -08:00
Bagatur
3596be5210 DOCS: format notebooks (#13371) 2023-11-14 14:17:44 -08:00
Predrag Gruevski
d63d4994c0 Bump all libraries to the latest ruff version. (#13350)
This version of `ruff` is the one we'll be using to lint the docs and
cookbooks (#12677), so I'm making it used everywhere else too.
2023-11-14 16:00:21 -05:00
Predrag Gruevski
2ebd167dba Lint Python notebooks with ruff. (#12677)
The new ruff version fixed the blocking bugs, and I was able to fairly
easily us to a passing state: ruff fixed some issues on its own, I fixed
a handful by hand, and I added a list of narrowly-targeted exclusions
for files that are currently failing ruff rules that we probably should
look into eventually.

I went pretty lenient on the docs / cookbooks rules, allowing dead code
and such things. Perhaps in the future we may want to tighten the rules
further, but this is already a good set of checks that found real issues
and will prevent them going forward.
2023-11-14 15:58:22 -05:00
Massimiliano Pronesti
344cab0739 IMPROVEMENT: support Openai API v1 for Azure OpenAI completions (#13231)
Hi,
this PR adds support for OpenAI API v1 for Azure OpenAI completion API.
@baskaryan @hwchase17

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-11-14 12:10:18 -08:00
dependabot[bot]
fc886cc303 Bump pyarrow from 13.0.0 to 14.0.1 in /libs/langchain (#13363)
Bumps [pyarrow](https://github.com/apache/arrow) from 13.0.0 to 14.0.1.
<details>
<summary>Commits</summary>
<ul>
<li><a
href="ba53748361"><code>ba53748</code></a>
MINOR: [Release] Update versions for 14.0.1</li>
<li><a
href="529f3768fa"><code>529f376</code></a>
MINOR: [Release] Update .deb/.rpm changelogs for 14.0.1</li>
<li><a
href="b84bbcac64"><code>b84bbca</code></a>
MINOR: [Release] Update CHANGELOG.md for 14.0.1</li>
<li><a
href="f141709763"><code>f141709</code></a>
<a
href="https://redirect.github.com/apache/arrow/issues/38607">GH-38607</a>:
[Python] Disable PyExtensionType autoload (<a
href="https://redirect.github.com/apache/arrow/issues/38608">#38608</a>)</li>
<li><a
href="5a37e74198"><code>5a37e74</code></a>
<a
href="https://redirect.github.com/apache/arrow/issues/38431">GH-38431</a>:
[Python][CI] Update fs.type_name checks for s3fs tests (<a
href="https://redirect.github.com/apache/arrow/issues/38455">#38455</a>)</li>
<li><a
href="2dcee3f82c"><code>2dcee3f</code></a>
MINOR: [Release] Update versions for 14.0.0</li>
<li><a
href="297428cbf2"><code>297428c</code></a>
MINOR: [Release] Update .deb/.rpm changelogs for 14.0.0</li>
<li><a
href="3e9734f883"><code>3e9734f</code></a>
MINOR: [Release] Update CHANGELOG.md for 14.0.0</li>
<li><a
href="9f90995c8c"><code>9f90995</code></a>
<a
href="https://redirect.github.com/apache/arrow/issues/38332">GH-38332</a>:
[CI][Release] Resolve symlinks in RAT lint (<a
href="https://redirect.github.com/apache/arrow/issues/38337">#38337</a>)</li>
<li><a
href="bd61239a32"><code>bd61239</code></a>
<a
href="https://redirect.github.com/apache/arrow/issues/35531">GH-35531</a>:
[Python] C Data Interface PyCapsule Protocol (<a
href="https://redirect.github.com/apache/arrow/issues/37797">#37797</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/apache/arrow/compare/go/v13.0.0...go/v14.0.1">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=pyarrow&package-manager=pip&previous-version=13.0.0&new-version=14.0.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the
[Security Alerts
page](https://github.com/langchain-ai/langchain/network/alerts).

</details>

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Predrag Gruevski <2348618+obi1kenobi@users.noreply.github.com>
2023-11-14 14:23:52 -05:00
Leonid Ganeline
f5bf3bdf14 added Cookbooks link (#13078)
It is a temporary solution before major documents refactoring.
Related to #13070 (not solving it)
2023-11-14 10:52:47 -08:00
Erick Friis
c0e6045c0b cli 0.0.17 (#13359) 2023-11-14 09:56:18 -08:00
Erick Friis
927824b7cb CLI interactivity (#13148)
Will implement more later
2023-11-14 09:53:29 -08:00
billytrend-cohere
2f6fe6ddf3 Fix latest message index (#13355)
There is a bug which caused the earliest message rather than the latest
message being sent
2023-11-14 09:23:25 -08:00
2379 changed files with 84583 additions and 37632 deletions

View File

@@ -23,7 +23,7 @@ It's essential that we maintain great documentation and testing. If you:
- Update any affected example notebooks and documentation. These live in `docs`.
- Update unit and integration tests when relevant.
- Add a feature
- Add a demo notebook in `docs/modules`.
- Add a demo notebook in `docs/docs/`.
- Add unit and integration tests.
We are a small, progress-oriented team. If there's something you'd like to add or change, opening a pull request is the
@@ -214,6 +214,10 @@ ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogy
Langchain relies heavily on optional dependencies to keep the Langchain package lightweight.
You only need to add a new dependency if a **unit test** relies on the package.
If your package is only required for **integration tests**, then you can skip these
steps and leave all pyproject.toml and poetry.lock files alone.
If you're adding a new dependency to Langchain, assume that it will be an optional dependency, and
that most users won't have it installed.

View File

@@ -68,7 +68,7 @@ jobs:
# It doesn't matter how you change it, any change will cause a cache-bust.
working-directory: ${{ inputs.working-directory }}
run: |
poetry install --with dev,lint,test,typing
poetry install --with lint,typing
- name: Install langchain editable
working-directory: ${{ inputs.working-directory }}
@@ -76,7 +76,7 @@ jobs:
env:
LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
run: |
pip install -e "$LANGCHAIN_LOCATION"
poetry run pip install -e "$LANGCHAIN_LOCATION"
- name: Get .mypy_cache to speed up mypy
uses: actions/cache@v3

View File

@@ -7,6 +7,10 @@ on:
required: true
type: string
description: "From which folder this pipeline executes"
langchain-location:
required: false
type: string
description: "Relative path to the langchain library folder"
env:
POETRY_VERSION: "1.6.1"
@@ -40,6 +44,14 @@ jobs:
shell: bash
run: poetry install
- name: Install langchain editable
working-directory: ${{ inputs.working-directory }}
if: ${{ inputs.langchain-location }}
env:
LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
run: |
poetry run pip install -e "$LANGCHAIN_LOCATION"
- name: Install the opposite major version of pydantic
# If normal tests use pydantic v1, here we'll use v2, and vice versa.
shell: bash

View File

@@ -97,19 +97,18 @@ jobs:
env:
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
VERSION: ${{ needs.build.outputs.version }}
# Here we specify:
# - The test PyPI index as the *primary* index, meaning that it takes priority.
# - The regular PyPI index as an extra index, so that any dependencies that
# Here we use:
# - The default regular PyPI index as the *primary* index, meaning
# that it takes priority (https://pypi.org/simple)
# - The test PyPI index as an extra index, so that any dependencies that
# are not found on test PyPI can be resolved and installed anyway.
#
# Without the former, we might install the wrong langchain release.
# Without the latter, we might not be able to install langchain's dependencies.
# (https://test.pypi.org/simple). This will include the PKG_NAME==VERSION
# package because VERSION will not have been uploaded to regular PyPI yet.
#
# TODO: add more in-depth pre-publish tests after testing that importing works
run: |
pip install \
--index-url https://test.pypi.org/simple/ \
--extra-index-url https://pypi.org/simple/ \
--extra-index-url https://test.pypi.org/simple/ \
"$PKG_NAME==$VERSION"
# Replace all dashes in the package name with underscores,

View File

@@ -7,6 +7,10 @@ on:
required: true
type: string
description: "From which folder this pipeline executes"
langchain-location:
required: false
type: string
description: "Relative path to the langchain library folder"
env:
POETRY_VERSION: "1.6.1"
@@ -38,11 +42,20 @@ jobs:
- name: Install dependencies
shell: bash
run: poetry install
run: poetry install --with test
- name: Install langchain editable
working-directory: ${{ inputs.working-directory }}
if: ${{ inputs.langchain-location }}
env:
LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
run: |
poetry run pip install -e "$LANGCHAIN_LOCATION"
- name: Run core tests
shell: bash
run: make test
run: |
make test
- name: Ensure the tests did not create any additional files
shell: bash

View File

@@ -3,6 +3,8 @@ import toml
pyproject_toml = toml.load("pyproject.toml")
# Extract the ignore words list (adjust the key as per your TOML structure)
ignore_words_list = pyproject_toml.get("tool", {}).get("codespell", {}).get("ignore-words-list")
ignore_words_list = (
pyproject_toml.get("tool", {}).get("codespell", {}).get("ignore-words-list")
)
print(f"::set-output name=ignore_words_list::{ignore_words_list}")
print(f"::set-output name=ignore_words_list::{ignore_words_list}")

View File

@@ -3,18 +3,19 @@ name: libs/langchain CI
on:
push:
branches: [ master ]
branches: [master]
pull_request:
paths:
- '.github/actions/poetry_setup/action.yml'
- '.github/tools/**'
- '.github/workflows/_lint.yml'
- '.github/workflows/_test.yml'
- '.github/workflows/_pydantic_compatibility.yml'
- '.github/workflows/langchain_ci.yml'
- 'libs/*'
- 'libs/langchain/**'
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
- ".github/actions/poetry_setup/action.yml"
- ".github/tools/**"
- ".github/workflows/_lint.yml"
- ".github/workflows/_test.yml"
- ".github/workflows/_pydantic_compatibility.yml"
- ".github/workflows/langchain_ci.yml"
- "libs/*"
- "libs/langchain/**"
- "libs/core/**"
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
# If another push to the same PR or branch happens while this workflow is still running,
# cancel the earlier run in favor of the next run.
@@ -32,29 +33,25 @@ env:
jobs:
lint:
uses:
./.github/workflows/_lint.yml
uses: ./.github/workflows/_lint.yml
with:
working-directory: libs/langchain
secrets: inherit
test:
uses:
./.github/workflows/_test.yml
uses: ./.github/workflows/_test.yml
with:
working-directory: libs/langchain
secrets: inherit
compile-integration-tests:
uses:
./.github/workflows/_compile_integration_test.yml
uses: ./.github/workflows/_compile_integration_test.yml
with:
working-directory: libs/langchain
secrets: inherit
pydantic-compatibility:
uses:
./.github/workflows/_pydantic_compatibility.yml
uses: ./.github/workflows/_pydantic_compatibility.yml
with:
working-directory: libs/langchain
secrets: inherit
@@ -89,6 +86,11 @@ jobs:
echo "Running extended tests, installing dependencies with poetry..."
poetry install -E extended_testing
- name: Install langchain core editable
shell: bash
run: |
poetry run pip install -e ../core
- name: Run extended tests
run: make extended_tests

52
.github/workflows/langchain_core_ci.yml vendored Normal file
View File

@@ -0,0 +1,52 @@
---
name: libs/langchain core CI
on:
push:
branches: [ master ]
pull_request:
paths:
- '.github/actions/poetry_setup/action.yml'
- '.github/tools/**'
- '.github/workflows/_lint.yml'
- '.github/workflows/_test.yml'
- '.github/workflows/_pydantic_compatibility.yml'
- '.github/workflows/langchain_core_ci.yml'
- 'libs/core/**'
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
# If another push to the same PR or branch happens while this workflow is still running,
# cancel the earlier run in favor of the next run.
#
# There's no point in testing an outdated version of the code. GitHub only allows
# a limited number of job runners to be active at the same time, so it's better to cancel
# pointless jobs early so that more useful jobs can run sooner.
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
POETRY_VERSION: "1.6.1"
WORKDIR: "libs/core"
jobs:
lint:
uses:
./.github/workflows/_lint.yml
with:
working-directory: libs/core
secrets: inherit
test:
uses:
./.github/workflows/_test.yml
with:
working-directory: libs/core
secrets: inherit
pydantic-compatibility:
uses:
./.github/workflows/_pydantic_compatibility.yml
with:
working-directory: libs/core
secrets: inherit

View File

@@ -0,0 +1,13 @@
---
name: libs/core Release
on:
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
jobs:
release:
uses:
./.github/workflows/_release.yml
with:
working-directory: libs/core
secrets: inherit

View File

@@ -3,17 +3,19 @@ name: libs/experimental CI
on:
push:
branches: [ master ]
branches: [master]
pull_request:
paths:
- '.github/actions/poetry_setup/action.yml'
- '.github/tools/**'
- '.github/workflows/_lint.yml'
- '.github/workflows/_test.yml'
- '.github/workflows/langchain_experimental_ci.yml'
- 'libs/*'
- 'libs/experimental/**'
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
- ".github/actions/poetry_setup/action.yml"
- ".github/tools/**"
- ".github/workflows/_lint.yml"
- ".github/workflows/_test.yml"
- ".github/workflows/langchain_experimental_ci.yml"
- "libs/*"
- "libs/experimental/**"
- "libs/langchain/**"
- "libs/core/**"
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
# If another push to the same PR or branch happens while this workflow is still running,
# cancel the earlier run in favor of the next run.
@@ -31,23 +33,19 @@ env:
jobs:
lint:
uses:
./.github/workflows/_lint.yml
uses: ./.github/workflows/_lint.yml
with:
working-directory: libs/experimental
langchain-location: ../langchain
secrets: inherit
test:
uses:
./.github/workflows/_test.yml
uses: ./.github/workflows/_test.yml
with:
working-directory: libs/experimental
secrets: inherit
compile-integration-tests:
uses:
./.github/workflows/_compile_integration_test.yml
uses: ./.github/workflows/_compile_integration_test.yml
with:
working-directory: libs/experimental
secrets: inherit
@@ -88,6 +86,7 @@ jobs:
echo "Editably installing langchain outside of poetry, to avoid messing up lockfile..."
poetry run pip install -e ../langchain
poetry run pip install -e ../core
- name: Run tests
run: make test

12
LICENSE
View File

@@ -1,6 +1,6 @@
The MIT License
MIT License
Copyright (c) Harrison Chase
Copyright (c) LangChain, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -9,13 +9,13 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -43,10 +43,11 @@ spell_fix:
lint:
poetry run ruff docs templates cookbook
poetry run black docs templates cookbook --diff
poetry run ruff format docs templates cookbook --diff
poetry run ruff --select I docs templates cookbook
format format_diff:
poetry run black docs templates cookbook
poetry run ruff format docs templates cookbook
poetry run ruff --select I --fix docs templates cookbook
######################

View File

@@ -30,7 +30,7 @@ pip install langchain
With conda:
```bash
pip install langsmith && conda install langchain -c conda-forge
conda install langchain -c conda-forge
```
## 🤔 What is LangChain?

View File

@@ -67,7 +67,6 @@
"llama2_code = ChatOllama(model=\"codellama:7b-instruct\")\n",
"\n",
"# API\n",
"from getpass import getpass\n",
"from langchain.llms import Replicate\n",
"\n",
"# REPLICATE_API_TOKEN = getpass()\n",

File diff suppressed because one or more lines are too long

View File

@@ -8,6 +8,7 @@ Notebook | Description
[Semi_Structured_RAG.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_Structured_RAG.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data, including text and tables, using unstructured for parsing, multi-vector retriever for storing, and lcel for implementing chains.
[Semi_structured_and_multi_moda...](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data and images, using unstructured for parsing, multi-vector retriever for storage and retrieval, and lcel for implementing chains.
[Semi_structured_multi_modal_RA...](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data and images, using various tools and methods such as unstructured for parsing, multi-vector retriever for storing, lcel for implementing chains, and open source language models like llama2, llava, and gpt4all.
[analyze_document.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/analyze_document.ipynb) | Analyze a single long document.
[autogpt/autogpt.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/autogpt/autogpt.ipynb) | Implement autogpt, a language model, with langchain primitives such as llms, prompttemplates, vectorstores, embeddings, and tools.
[autogpt/marathon_times.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/autogpt/marathon_times.ipynb) | Implement autogpt for finding winning marathon times.
[baby_agi.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/baby_agi.ipynb) | Implement babyagi, an ai agent that can generate and execute tasks based on a given objective, with the flexibility to swap out specific vectorstores/model providers.
@@ -44,6 +45,7 @@ Notebook | Description
[plan_and_execute_agent.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/plan_and_execute_agent.ipynb) | Create plan-and-execute agents that accomplish objectives by planning tasks with a language model (llm) and executing them with a separate agent.
[press_releases.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/press_releases.ipynb) | Retrieve and query company press release data powered by [Kay.ai](https://kay.ai).
[program_aided_language_model.i...](https://github.com/langchain-ai/langchain/tree/master/cookbook/program_aided_language_model.ipynb) | Implement program-aided language models as described in the provided research paper.
[qa_citations.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/qa_citations.ipynb) | Different ways to get a model to cite its sources.
[retrieval_in_sql.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/retrieval_in_sql.ipynb) | Perform retrieval-augmented-generation (rag) on a PostgreSQL database using pgvector.
[sales_agent_with_context.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/sales_agent_with_context.ipynb) | Implement a context-aware ai sales agent, salesgpt, that can have natural sales conversations, interact with other systems, and use a product knowledge base to discuss a company's offerings.
[self_query_hotel_search.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/self_query_hotel_search.ipynb) | Build a hotel room search feature with self-querying retrieval, using a specific hotel recommendation dataset.

View File

@@ -102,9 +102,9 @@
"metadata": {},
"outputs": [],
"source": [
"from lxml import html\n",
"from typing import Any\n",
"\n",
"from pydantic import BaseModel\n",
"from typing import Any, Optional\n",
"from unstructured.partition.pdf import partition_pdf\n",
"\n",
"# Get elements\n",
@@ -317,11 +317,12 @@
"outputs": [],
"source": [
"import uuid\n",
"from langchain.vectorstores import Chroma\n",
"from langchain.storage import InMemoryStore\n",
"from langchain.schema.document import Document\n",
"\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
"from langchain.schema.document import Document\n",
"from langchain.storage import InMemoryStore\n",
"from langchain.vectorstores import Chroma\n",
"\n",
"# The vectorstore to use to index the child chunks\n",
"vectorstore = Chroma(collection_name=\"summaries\", embedding_function=OpenAIEmbeddings())\n",
@@ -373,7 +374,6 @@
"metadata": {},
"outputs": [],
"source": [
"from operator import itemgetter\n",
"from langchain.schema.runnable import RunnablePassthrough\n",
"\n",
"# Prompt template\n",

View File

@@ -92,9 +92,9 @@
"metadata": {},
"outputs": [],
"source": [
"from lxml import html\n",
"from typing import Any\n",
"\n",
"from pydantic import BaseModel\n",
"from typing import Any, Optional\n",
"from unstructured.partition.pdf import partition_pdf\n",
"\n",
"# Get elements\n",
@@ -224,7 +224,7 @@
"outputs": [],
"source": [
"# Prompt\n",
"prompt_text = \"\"\"You are an assistant tasked with summarizing tables and text. \\ \n",
"prompt_text = \"\"\"You are an assistant tasked with summarizing tables and text. \\\n",
"Give a concise summary of the table or text. Table or text chunk: {element} \"\"\"\n",
"prompt = ChatPromptTemplate.from_template(prompt_text)\n",
"\n",
@@ -313,7 +313,7 @@
" # Execute the command and save the output to the defined output file\n",
" /Users/rlm/Desktop/Code/llama.cpp/bin/llava -m ../models/llava-7b/ggml-model-q5_k.gguf --mmproj ../models/llava-7b/mmproj-model-f16.gguf --temp 0.1 -p \"Describe the image in detail. Be specific about graphs, such as bar plots.\" --image \"$img\" > \"$output_file\"\n",
"\n",
"done"
"done\n"
]
},
{
@@ -337,7 +337,8 @@
"metadata": {},
"outputs": [],
"source": [
"import os, glob\n",
"import glob\n",
"import os\n",
"\n",
"# Get all .txt file summaries\n",
"file_paths = glob.glob(os.path.expanduser(os.path.join(path, \"*.txt\")))\n",
@@ -371,11 +372,12 @@
"outputs": [],
"source": [
"import uuid\n",
"from langchain.vectorstores import Chroma\n",
"from langchain.storage import InMemoryStore\n",
"from langchain.schema.document import Document\n",
"\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
"from langchain.schema.document import Document\n",
"from langchain.storage import InMemoryStore\n",
"from langchain.vectorstores import Chroma\n",
"\n",
"# The vectorstore to use to index the child chunks\n",
"vectorstore = Chroma(collection_name=\"summaries\", embedding_function=OpenAIEmbeddings())\n",
@@ -644,7 +646,6 @@
"metadata": {},
"outputs": [],
"source": [
"from operator import itemgetter\n",
"from langchain.schema.runnable import RunnablePassthrough\n",
"\n",
"# Prompt template\n",

View File

@@ -82,10 +82,9 @@
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from lxml import html\n",
"from typing import Any\n",
"\n",
"from pydantic import BaseModel\n",
"from typing import Any, Optional\n",
"from unstructured.partition.pdf import partition_pdf\n",
"\n",
"# Path to save images\n",
@@ -223,7 +222,7 @@
"outputs": [],
"source": [
"# Prompt\n",
"prompt_text = \"\"\"You are an assistant tasked with summarizing tables and text. \\ \n",
"prompt_text = \"\"\"You are an assistant tasked with summarizing tables and text. \\\n",
"Give a concise summary of the table or text. Table or text chunk: {element} \"\"\"\n",
"prompt = ChatPromptTemplate.from_template(prompt_text)\n",
"\n",
@@ -312,7 +311,7 @@
" # Execute the command and save the output to the defined output file\n",
" /Users/rlm/Desktop/Code/llama.cpp/bin/llava -m ../models/llava-7b/ggml-model-q5_k.gguf --mmproj ../models/llava-7b/mmproj-model-f16.gguf --temp 0.1 -p \"Describe the image in detail. Be specific about graphs, such as bar plots.\" --image \"$img\" > \"$output_file\"\n",
"\n",
"done"
"done\n"
]
},
{
@@ -322,7 +321,8 @@
"metadata": {},
"outputs": [],
"source": [
"import os, glob\n",
"import glob\n",
"import os\n",
"\n",
"# Get all .txt files in the directory\n",
"file_paths = glob.glob(os.path.expanduser(os.path.join(path, \"*.txt\")))\n",
@@ -375,11 +375,12 @@
],
"source": [
"import uuid\n",
"from langchain.vectorstores import Chroma\n",
"from langchain.storage import InMemoryStore\n",
"from langchain.schema.document import Document\n",
"\n",
"from langchain.embeddings import GPT4AllEmbeddings\n",
"from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
"from langchain.schema.document import Document\n",
"from langchain.storage import InMemoryStore\n",
"from langchain.vectorstores import Chroma\n",
"\n",
"# The vectorstore to use to index the child chunks\n",
"vectorstore = Chroma(\n",
@@ -531,7 +532,6 @@
"metadata": {},
"outputs": [],
"source": [
"from operator import itemgetter\n",
"from langchain.schema.runnable import RunnablePassthrough\n",
"\n",
"# Prompt template\n",

View File

@@ -63,11 +63,13 @@
"\n",
"# Load\n",
"from langchain.document_loaders import PyPDFLoader\n",
"\n",
"loader = PyPDFLoader(path + \"cpi.pdf\")\n",
"pdf_pages = loader.load()\n",
"\n",
"# Split\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
"all_splits_pypdf = text_splitter.split_documents(pdf_pages)\n",
"all_splits_pypdf_texts = [d.page_content for d in all_splits_pypdf]"
@@ -130,12 +132,15 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.vectorstores import Chroma\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"baseline = Chroma.from_texts(texts=all_splits_pypdf_texts,\n",
" collection_name=\"baseline\",\n",
" embedding=OpenAIEmbeddings())\n",
"retriever_baseline=baseline.as_retriever()"
"from langchain.vectorstores import Chroma\n",
"\n",
"baseline = Chroma.from_texts(\n",
" texts=all_splits_pypdf_texts,\n",
" collection_name=\"baseline\",\n",
" embedding=OpenAIEmbeddings(),\n",
")\n",
"retriever_baseline = baseline.as_retriever()"
]
},
{
@@ -160,7 +165,7 @@
"from langchain.schema.output_parser import StrOutputParser\n",
"\n",
"# Prompt\n",
"prompt_text = \"\"\"You are an assistant tasked with summarizing tables and text for retrieval. \\ \n",
"prompt_text = \"\"\"You are an assistant tasked with summarizing tables and text for retrieval. \\\n",
"These summaries will be embedded and used to retrieve the raw text or table elements. \\\n",
"Give a concise summary of the table or text that is well optimized for retrieval. Table or text: {element} \"\"\"\n",
"prompt = ChatPromptTemplate.from_template(prompt_text)\n",
@@ -169,7 +174,7 @@
"model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n",
"summarize_chain = {\"element\": lambda x: x} | prompt | model | StrOutputParser()\n",
"\n",
"# Apply to text \n",
"# Apply to text\n",
"text_summaries = summarize_chain.batch(texts, {\"max_concurrency\": 5})\n",
"\n",
"# Apply to tables\n",
@@ -192,31 +197,33 @@
"outputs": [],
"source": [
"# Image summary chain\n",
"import os, base64, io\n",
"import base64\n",
"import io\n",
"import os\n",
"from io import BytesIO\n",
"from PIL import Image\n",
"\n",
"from langchain.schema.messages import HumanMessage\n",
"from PIL import Image\n",
"\n",
"\n",
"def encode_image(image_path):\n",
" ''' Getting the base64 string '''\n",
" \"\"\"Getting the base64 string\"\"\"\n",
" with open(image_path, \"rb\") as image_file:\n",
" return base64.b64encode(image_file.read()).decode('utf-8') \n",
" return base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
"\n",
"\n",
"def image_summarize(img_base64, prompt):\n",
" \"\"\"Image summary\"\"\"\n",
" chat = ChatOpenAI(model=\"gpt-4-vision-preview\", max_tokens=1024)\n",
"\n",
"def image_summarize(img_base64,prompt):\n",
" ''' Image summary '''\n",
" chat = ChatOpenAI(model=\"gpt-4-vision-preview\",\n",
" max_tokens=1024)\n",
" \n",
" msg = chat.invoke(\n",
" [\n",
" HumanMessage(\n",
" content=[\n",
" {\"type\": \"text\", \"text\":prompt},\n",
" {\"type\": \"text\", \"text\": prompt},\n",
" {\n",
" \"type\": \"image_url\",\n",
" \"image_url\": {\n",
" \"url\": f\"data:image/jpeg;base64,{img_base64}\"\n",
" },\n",
" \"image_url\": {\"url\": f\"data:image/jpeg;base64,{img_base64}\"},\n",
" },\n",
" ]\n",
" )\n",
@@ -224,6 +231,7 @@
" )\n",
" return msg.content\n",
"\n",
"\n",
"# Store base64 encoded images\n",
"img_base64_list = []\n",
"\n",
@@ -231,17 +239,17 @@
"image_summaries = []\n",
"\n",
"# Prompt\n",
"prompt = \"\"\"You are an assistant tasked with summarizing images for retrieval. \\ \n",
"prompt = \"\"\"You are an assistant tasked with summarizing images for retrieval. \\\n",
"These summaries will be embedded and used to retrieve the raw image. \\\n",
"Give a concise summary of the image that is well optimized for retrieval.\"\"\"\n",
"\n",
"# Apply to images\n",
"for img_file in sorted(os.listdir(path)):\n",
" if img_file.endswith('.jpg'):\n",
" if img_file.endswith(\".jpg\"):\n",
" img_path = os.path.join(path, img_file)\n",
" base64_image = encode_image(img_path)\n",
" img_base64_list.append(base64_image)\n",
" image_summaries.append(image_summarize(base64_image,prompt))"
" image_summaries.append(image_summarize(base64_image, prompt))"
]
},
{
@@ -263,18 +271,15 @@
"source": [
"import uuid\n",
"from base64 import b64decode\n",
"from langchain.storage import InMemoryStore\n",
"from langchain.schema.document import Document\n",
"from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
"\n",
"def create_multi_vector_retriever(vectorstore, \n",
" text_summaries, \n",
" texts, \n",
" table_summaries, \n",
" tables, \n",
" image_summaries, \n",
" images):\n",
" \n",
"from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
"from langchain.schema.document import Document\n",
"from langchain.storage import InMemoryStore\n",
"\n",
"\n",
"def create_multi_vector_retriever(\n",
" vectorstore, text_summaries, texts, table_summaries, tables, image_summaries, images\n",
"):\n",
" # Initialize the storage layer\n",
" store = InMemoryStore()\n",
" id_key = \"doc_id\"\n",
@@ -309,18 +314,22 @@
"\n",
" return retriever\n",
"\n",
"\n",
"# The vectorstore to use to index the summaries\n",
"multi_vector_img = Chroma(collection_name=\"multi_vector_img\", \n",
" embedding_function=OpenAIEmbeddings())\n",
"multi_vector_img = Chroma(\n",
" collection_name=\"multi_vector_img\", embedding_function=OpenAIEmbeddings()\n",
")\n",
"\n",
"# Create retriever\n",
"retriever_multi_vector_img = create_multi_vector_retriever(multi_vector_img,\n",
" text_summaries,\n",
" texts,\n",
" table_summaries, \n",
" tables, \n",
" image_summaries, \n",
" img_base64_list)"
"retriever_multi_vector_img = create_multi_vector_retriever(\n",
" multi_vector_img,\n",
" text_summaries,\n",
" texts,\n",
" table_summaries,\n",
" tables,\n",
" image_summaries,\n",
" img_base64_list,\n",
")"
]
},
{
@@ -330,10 +339,10 @@
"metadata": {},
"outputs": [],
"source": [
"# Testing on retrieval \n",
"query=\"What percentage of CPI is dedicated to Housing, and how does it compare to the combined percentage of Medical Care, Apparel, and Other Goods and Services?\"\n",
"suffix_for_images=\" Include any pie charts, graphs, or tables.\"\n",
"docs = retriever_multi_vector_img.get_relevant_documents(query+suffix_for_images)"
"# Testing on retrieval\n",
"query = \"What percentage of CPI is dedicated to Housing, and how does it compare to the combined percentage of Medical Care, Apparel, and Other Goods and Services?\"\n",
"suffix_for_images = \" Include any pie charts, graphs, or tables.\"\n",
"docs = retriever_multi_vector_img.get_relevant_documents(query + suffix_for_images)"
]
},
{
@@ -356,15 +365,17 @@
}
],
"source": [
"from IPython.display import display, HTML\n",
"def plt_img_base64(img_base64):\n",
"from IPython.display import HTML, display\n",
"\n",
"\n",
"def plt_img_base64(img_base64):\n",
" # Create an HTML img tag with the base64 string as the source\n",
" image_html = f'<img src=\"data:image/jpeg;base64,{img_base64}\" />'\n",
" \n",
"\n",
" # Display the image by rendering the HTML\n",
" display(HTML(image_html))\n",
"\n",
"\n",
"plt_img_base64(docs[1])"
]
},
@@ -386,17 +397,20 @@
"outputs": [],
"source": [
"# The vectorstore to use to index the summaries\n",
"multi_vector_text = Chroma(collection_name=\"multi_vector_text\", \n",
" embedding_function=OpenAIEmbeddings())\n",
"multi_vector_text = Chroma(\n",
" collection_name=\"multi_vector_text\", embedding_function=OpenAIEmbeddings()\n",
")\n",
"\n",
"# Create retriever\n",
"retriever_multi_vector_img_summary = create_multi_vector_retriever(multi_vector_text,\n",
" text_summaries,\n",
" texts,\n",
" table_summaries, \n",
" tables, \n",
" image_summaries, \n",
" image_summaries)"
"retriever_multi_vector_img_summary = create_multi_vector_retriever(\n",
" multi_vector_text,\n",
" text_summaries,\n",
" texts,\n",
" table_summaries,\n",
" tables,\n",
" image_summaries,\n",
" image_summaries,\n",
")"
]
},
{
@@ -418,14 +432,17 @@
"\n",
"# Create chroma w/ multi-modal embeddings\n",
"multimodal_embd = Chroma(\n",
" collection_name=\"multimodal_embd\",\n",
" embedding_function=OpenCLIPEmbeddings()\n",
" collection_name=\"multimodal_embd\", embedding_function=OpenCLIPEmbeddings()\n",
")\n",
"\n",
"# Get image URIs\n",
"image_uris = sorted([os.path.join(path, image_name) \n",
" for image_name in os.listdir(path) \n",
" if image_name.endswith('.jpg')])\n",
"image_uris = sorted(\n",
" [\n",
" os.path.join(path, image_name)\n",
" for image_name in os.listdir(path)\n",
" if image_name.endswith(\".jpg\")\n",
" ]\n",
")\n",
"\n",
"# Add images and documents\n",
"if image_uris:\n",
@@ -435,7 +452,7 @@
"if tables:\n",
" multimodal_embd.add_texts(texts=tables)\n",
"\n",
"# Make retriever \n",
"# Make retriever\n",
"retriever_multimodal_embd = multimodal_embd.as_retriever()"
]
},
@@ -457,6 +474,7 @@
"outputs": [],
"source": [
"from operator import itemgetter\n",
"\n",
"from langchain.schema.runnable import RunnablePassthrough\n",
"\n",
"# Prompt\n",
@@ -466,14 +484,14 @@
"\"\"\"\n",
"rag_prompt_text = ChatPromptTemplate.from_template(template)\n",
"\n",
"# Build \n",
"\n",
"# Build\n",
"def text_rag_chain(retriever):\n",
" \n",
" ''' RAG chain '''\n",
" \"\"\"RAG chain\"\"\"\n",
"\n",
" # LLM\n",
" model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n",
" \n",
"\n",
" # RAG pipeline\n",
" chain = (\n",
" {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
@@ -500,13 +518,16 @@
"metadata": {},
"outputs": [],
"source": [
"import re \n",
"import re\n",
"\n",
"from langchain.schema import Document\n",
"from langchain.schema.runnable import RunnableLambda\n",
"\n",
"\n",
"def looks_like_base64(sb):\n",
" \"\"\"Check if the string looks like base64.\"\"\"\n",
" return re.match('^[A-Za-z0-9+/]+[=]{0,2}$', sb) is not None\n",
" return re.match(\"^[A-Za-z0-9+/]+[=]{0,2}$\", sb) is not None\n",
"\n",
"\n",
"def is_image_data(b64data):\n",
" \"\"\"Check if the base64 data is an image by looking at the start of the data.\"\"\"\n",
@@ -514,7 +535,7 @@
" b\"\\xFF\\xD8\\xFF\": \"jpg\",\n",
" b\"\\x89\\x50\\x4E\\x47\\x0D\\x0A\\x1A\\x0A\": \"png\",\n",
" b\"\\x47\\x49\\x46\\x38\": \"gif\",\n",
" b\"\\x52\\x49\\x46\\x46\": \"webp\"\n",
" b\"\\x52\\x49\\x46\\x46\": \"webp\",\n",
" }\n",
" try:\n",
" header = base64.b64decode(b64data)[:8] # Decode and get the first 8 bytes\n",
@@ -525,6 +546,7 @@
" except Exception:\n",
" return False\n",
"\n",
"\n",
"def split_image_text_types(docs):\n",
" \"\"\"Split base64-encoded images and texts.\"\"\"\n",
" b64_images = []\n",
@@ -539,6 +561,7 @@
" texts.append(doc)\n",
" return {\"images\": b64_images, \"texts\": texts}\n",
"\n",
"\n",
"def img_prompt_func(data_dict):\n",
" # Joining the context texts into a single string\n",
" formatted_texts = \"\\n\".join(data_dict[\"context\"][\"texts\"])\n",
@@ -550,7 +573,7 @@
" \"type\": \"image_url\",\n",
" \"image_url\": {\n",
" \"url\": f\"data:image/jpeg;base64,{data_dict['context']['images'][0]}\"\n",
" }\n",
" },\n",
" }\n",
" messages.append(image_message)\n",
"\n",
@@ -563,22 +586,24 @@
" f\"User-provided question / keywords: {data_dict['question']}\\n\\n\"\n",
" \"Text and / or tables:\\n\"\n",
" f\"{formatted_texts}\"\n",
" )\n",
" ),\n",
" }\n",
" messages.append(text_message)\n",
" return [HumanMessage(content=messages)]\n",
"\n",
"\n",
"def multi_modal_rag_chain(retriever):\n",
" ''' Multi-modal RAG chain '''\n",
" \"\"\"Multi-modal RAG chain\"\"\"\n",
"\n",
" # Multi-modal LLM\n",
" model = ChatOpenAI(temperature=0, \n",
" model=\"gpt-4-vision-preview\", \n",
" max_tokens=1024)\n",
" \n",
" model = ChatOpenAI(temperature=0, model=\"gpt-4-vision-preview\", max_tokens=1024)\n",
"\n",
" # RAG pipeline\n",
" chain = (\n",
" {\"context\": retriever | RunnableLambda(split_image_text_types), \"question\": RunnablePassthrough()}\n",
" {\n",
" \"context\": retriever | RunnableLambda(split_image_text_types),\n",
" \"question\": RunnablePassthrough(),\n",
" }\n",
" | RunnableLambda(img_prompt_func)\n",
" | model\n",
" | StrOutputParser()\n",
@@ -603,12 +628,12 @@
"outputs": [],
"source": [
"# RAG chains\n",
"chain_baseline=text_rag_chain(retriever_baseline)\n",
"chain_mv_text=text_rag_chain(retriever_multi_vector_img_summary)\n",
"chain_baseline = text_rag_chain(retriever_baseline)\n",
"chain_mv_text = text_rag_chain(retriever_multi_vector_img_summary)\n",
"\n",
"# Multi-modal RAG chains\n",
"chain_multimodal_mv_img=multi_modal_rag_chain(retriever_multi_vector_img)\n",
"chain_multimodal_embd=multi_modal_rag_chain(retriever_multimodal_embd)"
"chain_multimodal_mv_img = multi_modal_rag_chain(retriever_multi_vector_img)\n",
"chain_multimodal_embd = multi_modal_rag_chain(retriever_multimodal_embd)"
]
},
{
@@ -694,7 +719,8 @@
"source": [
"# Read\n",
"import pandas as pd\n",
"eval_set = pd.read_csv(path+'cpi_eval.csv')\n",
"\n",
"eval_set = pd.read_csv(path + \"cpi_eval.csv\")\n",
"eval_set.head(3)"
]
},
@@ -715,12 +741,12 @@
"# Populate dataset\n",
"for _, row in eval_set.iterrows():\n",
" # Get Q, A\n",
" q = row['Question']\n",
" a = row['Answer']\n",
" q = row[\"Question\"]\n",
" a = row[\"Answer\"]\n",
" # Use the values in your function\n",
" client.create_example(inputs={\"question\": q}, \n",
" outputs={\"answer\": a}, \n",
" dataset_id=dataset.id)"
" client.create_example(\n",
" inputs={\"question\": q}, outputs={\"answer\": a}, dataset_id=dataset.id\n",
" )"
]
},
{
@@ -764,17 +790,22 @@
" evaluators=[\"qa\"],\n",
")\n",
"\n",
"def run_eval(chain,run_name,dataset_name):\n",
"\n",
"def run_eval(chain, run_name, dataset_name):\n",
" _ = client.run_on_dataset(\n",
" dataset_name=dataset_name,\n",
" llm_or_chain_factory=lambda: (lambda x: x[\"question\"]+suffix_for_images) | chain,\n",
" llm_or_chain_factory=lambda: (lambda x: x[\"question\"] + suffix_for_images)\n",
" | chain,\n",
" evaluation=eval_config,\n",
" project_name=run_name,\n",
" )\n",
"\n",
"for chain, run in zip([chain_baseline, chain_mv_text, chain_multimodal_mv_img, chain_multimodal_embd], \n",
" [\"baseline\", \"mv_text\", \"mv_img\", \"mm_embd\"]):\n",
" run_eval(chain, dataset_name+\"-\"+run, dataset_name)"
"\n",
"for chain, run in zip(\n",
" [chain_baseline, chain_mv_text, chain_multimodal_mv_img, chain_multimodal_embd],\n",
" [\"baseline\", \"mv_text\", \"mv_img\", \"mm_embd\"],\n",
"):\n",
" run_eval(chain, dataset_name + \"-\" + run, dataset_name)"
]
}
],

View File

@@ -0,0 +1,105 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "f69d4a4c-137d-47e9-bea1-786afce9c1c0",
"metadata": {},
"source": [
"# Analyze a single long document\n",
"\n",
"The AnalyzeDocumentChain takes in a single document, splits it up, and then runs it through a CombineDocumentsChain."
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2a0707ce-6d2d-471b-bc33-64da32a7b3f0",
"metadata": {},
"outputs": [],
"source": [
"with open(\"../docs/docs/modules/state_of_the_union.txt\") as f:\n",
" state_of_the_union = f.read()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "ca14d161-2d5b-4a6c-a296-77d8ce4b28cd",
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains import AnalyzeDocumentChain\n",
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "9f97406c-85a9-45fb-99ce-9138c0ba3731",
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains.question_answering import load_qa_chain\n",
"\n",
"qa_chain = load_qa_chain(llm, chain_type=\"map_reduce\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "0871a753-f5bb-4b4f-a394-f87f2691f659",
"metadata": {},
"outputs": [],
"source": [
"qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "e6f86428-3c2c-46a0-a57c-e22826fdbf91",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'The President said, \"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.\"'"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"qa_document_chain.run(\n",
" input_document=state_of_the_union,\n",
" question=\"what did the president say about justice breyer?\",\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -27,10 +27,10 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.utilities import SerpAPIWrapper\n",
"from langchain.agents import Tool\n",
"from langchain.tools.file_management.write import WriteFileTool\n",
"from langchain.tools.file_management.read import ReadFileTool\n",
"from langchain.tools.file_management.write import WriteFileTool\n",
"from langchain.utilities import SerpAPIWrapper\n",
"\n",
"search = SerpAPIWrapper()\n",
"tools = [\n",
@@ -61,9 +61,9 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.vectorstores import FAISS\n",
"from langchain.docstore import InMemoryDocstore\n",
"from langchain.embeddings import OpenAIEmbeddings"
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.vectorstores import FAISS"
]
},
{
@@ -100,8 +100,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain_experimental.autonomous_agents import AutoGPT\n",
"from langchain.chat_models import ChatOpenAI"
"from langchain.chat_models import ChatOpenAI\n",
"from langchain_experimental.autonomous_agents import AutoGPT"
]
},
{

View File

@@ -34,16 +34,15 @@
"outputs": [],
"source": [
"# General\n",
"import os\n",
"import pandas as pd\n",
"from langchain_experimental.autonomous_agents import AutoGPT\n",
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent\n",
"from langchain.docstore.document import Document\n",
"import asyncio\n",
"import nest_asyncio\n",
"import os\n",
"\n",
"import nest_asyncio\n",
"import pandas as pd\n",
"from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.docstore.document import Document\n",
"from langchain_experimental.autonomous_agents import AutoGPT\n",
"\n",
"# Needed synce jupyter runs an async eventloop\n",
"nest_asyncio.apply()"
@@ -92,6 +91,7 @@
"import os\n",
"from contextlib import contextmanager\n",
"from typing import Optional\n",
"\n",
"from langchain.agents import tool\n",
"from langchain.tools.file_management.read import ReadFileTool\n",
"from langchain.tools.file_management.write import WriteFileTool\n",
@@ -223,14 +223,13 @@
},
"outputs": [],
"source": [
"from langchain.tools import BaseTool, DuckDuckGoSearchRun\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"\n",
"from pydantic import Field\n",
"from langchain.chains.qa_with_sources.loading import (\n",
" load_qa_with_sources_chain,\n",
" BaseCombineDocumentsChain,\n",
" load_qa_with_sources_chain,\n",
")\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain.tools import BaseTool, DuckDuckGoSearchRun\n",
"from pydantic import Field\n",
"\n",
"\n",
"def _get_text_splitter():\n",
@@ -311,10 +310,9 @@
"source": [
"# Memory\n",
"import faiss\n",
"from langchain.vectorstores import FAISS\n",
"from langchain.docstore import InMemoryDocstore\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.tools.human.tool import HumanInputRun\n",
"from langchain.vectorstores import FAISS\n",
"\n",
"embeddings_model = OpenAIEmbeddings()\n",
"embedding_size = 1536\n",

View File

@@ -29,16 +29,10 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from collections import deque\n",
"from typing import Dict, List, Optional, Any\n",
"from typing import Optional\n",
"\n",
"from langchain.chains import LLMChain\nfrom langchain.llms import OpenAI\nfrom langchain.prompts import PromptTemplate\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.llms import BaseLLM\n",
"from langchain.schema.vectorstore import VectorStore\n",
"from pydantic import BaseModel, Field\n",
"from langchain.chains.base import Chain\n",
"from langchain.llms import OpenAI\n",
"from langchain_experimental.autonomous_agents import BabyAGI"
]
},
@@ -59,8 +53,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.vectorstores import FAISS\n",
"from langchain.docstore import InMemoryDocstore"
"from langchain.docstore import InMemoryDocstore\n",
"from langchain.vectorstores import FAISS"
]
},
{

View File

@@ -25,16 +25,12 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from collections import deque\n",
"from typing import Dict, List, Optional, Any\n",
"from typing import Optional\n",
"\n",
"from langchain.chains import LLMChain\nfrom langchain.llms import OpenAI\nfrom langchain.prompts import PromptTemplate\n",
"from langchain.chains import LLMChain\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.llms import BaseLLM\n",
"from langchain.schema.vectorstore import VectorStore\n",
"from pydantic import BaseModel, Field\n",
"from langchain.chains.base import Chain\n",
"from langchain.llms import OpenAI\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain_experimental.autonomous_agents import BabyAGI"
]
},
@@ -66,8 +62,8 @@
"source": [
"%pip install faiss-cpu > /dev/null\n",
"%pip install google-search-results > /dev/null\n",
"from langchain.vectorstores import FAISS\n",
"from langchain.docstore import InMemoryDocstore"
"from langchain.docstore import InMemoryDocstore\n",
"from langchain.vectorstores import FAISS"
]
},
{
@@ -110,8 +106,10 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import ZeroShotAgent, Tool, AgentExecutor\n",
"from langchain.llms import OpenAI\nfrom langchain.utilities import SerpAPIWrapper\nfrom langchain.chains import LLMChain\n",
"from langchain.agents import AgentExecutor, Tool, ZeroShotAgent\n",
"from langchain.chains import LLMChain\n",
"from langchain.llms import OpenAI\n",
"from langchain.utilities import SerpAPIWrapper\n",
"\n",
"todo_prompt = PromptTemplate.from_template(\n",
" \"You are a planner who is an expert at coming up with a todo list for a given objective. Come up with a todo list for this objective: {objective}\"\n",

View File

@@ -35,16 +35,17 @@
"outputs": [],
"source": [
"from typing import List\n",
"\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.prompts.chat import (\n",
" SystemMessagePromptTemplate,\n",
" HumanMessagePromptTemplate,\n",
" SystemMessagePromptTemplate,\n",
")\n",
"from langchain.schema import (\n",
" AIMessage,\n",
" BaseMessage,\n",
" HumanMessage,\n",
" SystemMessage,\n",
" BaseMessage,\n",
")"
]
},

View File

@@ -47,10 +47,9 @@
"outputs": [],
"source": [
"from IPython.display import SVG\n",
"\n",
"from langchain.llms import OpenAI\n",
"from langchain_experimental.cpal.base import CPALChain\n",
"from langchain_experimental.pal_chain import PALChain\n",
"from langchain.llms import OpenAI\n",
"\n",
"llm = OpenAI(temperature=0, max_tokens=512)\n",
"cpal_chain = CPALChain.from_univariate_prompt(llm=llm, verbose=True)\n",

View File

@@ -177,7 +177,7 @@
" try:\n",
" loader = TextLoader(os.path.join(dirpath, file), encoding=\"utf-8\")\n",
" docs.extend(loader.load_and_split())\n",
" except Exception as e:\n",
" except Exception:\n",
" pass\n",
"print(f\"{len(docs)}\")"
]
@@ -648,7 +648,7 @@
{
"data": {
"text/plain": [
"OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, model='text-embedding-ada-002', deployment='text-embedding-ada-002', openai_api_version='', openai_api_base='', openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key='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', openai_organization='', allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=6, request_timeout=None, headers=None, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={})"
"OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, model='text-embedding-ada-002', deployment='text-embedding-ada-002', openai_api_version='', openai_api_base='', openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key='', openai_organization='', allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=6, request_timeout=None, headers=None, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={})"
]
},
"execution_count": 13,
@@ -717,7 +717,6 @@
"source": [
"from langchain.vectorstores import DeepLake\n",
"\n",
"\n",
"username = \"<USERNAME_OR_ORG>\"\n",
"\n",
"\n",
@@ -834,8 +833,8 @@
},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.chains import ConversationalRetrievalChain\n",
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"model = ChatOpenAI(\n",
" model_name=\"gpt-3.5-turbo-0613\"\n",

View File

@@ -32,19 +32,20 @@
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"from typing import Union\n",
"\n",
"from langchain.agents import (\n",
" Tool,\n",
" AgentExecutor,\n",
" LLMSingleActionAgent,\n",
" AgentOutputParser,\n",
" LLMSingleActionAgent,\n",
")\n",
"from langchain.prompts import StringPromptTemplate\n",
"from langchain.llms import OpenAI\nfrom langchain.utilities import SerpAPIWrapper\nfrom langchain.chains import LLMChain\n",
"from typing import List, Union\n",
"from langchain.schema import AgentAction, AgentFinish\n",
"from langchain.agents.agent_toolkits import NLAToolkit\n",
"from langchain.tools.plugin import AIPlugin\n",
"import re"
"from langchain.chains import LLMChain\n",
"from langchain.llms import OpenAI\n",
"from langchain.prompts import StringPromptTemplate\n",
"from langchain.schema import AgentAction, AgentFinish\n",
"from langchain.tools.plugin import AIPlugin"
]
},
{
@@ -113,9 +114,9 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.vectorstores import FAISS\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.schema import Document"
"from langchain.schema import Document\n",
"from langchain.vectorstores import FAISS"
]
},
{

View File

@@ -56,20 +56,21 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import (\n",
" Tool,\n",
" AgentExecutor,\n",
" LLMSingleActionAgent,\n",
" AgentOutputParser,\n",
")\n",
"from langchain.prompts import StringPromptTemplate\n",
"from langchain.llms import OpenAI\nfrom langchain.utilities import SerpAPIWrapper\nfrom langchain.chains import LLMChain\n",
"from typing import List, Union\n",
"from langchain.schema import AgentAction, AgentFinish\n",
"from langchain.agents.agent_toolkits import NLAToolkit\n",
"from langchain.tools.plugin import AIPlugin\n",
"import re\n",
"import plugnplai"
"from typing import Union\n",
"\n",
"import plugnplai\n",
"from langchain.agents import (\n",
" AgentExecutor,\n",
" AgentOutputParser,\n",
" LLMSingleActionAgent,\n",
")\n",
"from langchain.agents.agent_toolkits import NLAToolkit\n",
"from langchain.chains import LLMChain\n",
"from langchain.llms import OpenAI\n",
"from langchain.prompts import StringPromptTemplate\n",
"from langchain.schema import AgentAction, AgentFinish\n",
"from langchain.tools.plugin import AIPlugin"
]
},
{
@@ -137,9 +138,9 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.vectorstores import FAISS\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.schema import Document"
"from langchain.schema import Document\n",
"from langchain.vectorstores import FAISS"
]
},
{

View File

@@ -48,18 +48,17 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import getpass\n",
"from langchain.document_loaders import PyPDFLoader, TextLoader\n",
"import os\n",
"\n",
"from langchain.chains import RetrievalQA\n",
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.llms import OpenAI\n",
"from langchain.text_splitter import (\n",
" RecursiveCharacterTextSplitter,\n",
" CharacterTextSplitter,\n",
" RecursiveCharacterTextSplitter,\n",
")\n",
"from langchain.vectorstores import DeepLake\n",
"from langchain.chains import ConversationalRetrievalChain, RetrievalQA\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.llms import OpenAI\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
"activeloop_token = getpass.getpass(\"Activeloop Token:\")\n",

File diff suppressed because one or more lines are too long

View File

@@ -38,9 +38,8 @@
"outputs": [],
"source": [
"from elasticsearch import Elasticsearch\n",
"\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.chains.elasticsearch_database import ElasticsearchDatabaseChain"
"from langchain.chains.elasticsearch_database import ElasticsearchDatabaseChain\n",
"from langchain.chat_models import ChatOpenAI"
]
},
{
@@ -112,7 +111,6 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains.elasticsearch_database.prompts import DEFAULT_DSL_TEMPLATE\n",
"from langchain.prompts.prompt import PromptTemplate\n",
"\n",
"PROMPT_TEMPLATE = \"\"\"Given an input question, create a syntactically correct Elasticsearch query to run. Unless the user specifies in their question a specific number of examples they wish to obtain, always limit your query to at most {top_k} results. You can order the results by a relevant column to return the most interesting examples in the database.\n",

View File

@@ -19,10 +19,11 @@
"metadata": {},
"outputs": [],
"source": [
"from typing import List, Optional\n",
"\n",
"from langchain.chains.openai_tools import create_extraction_chain_pydantic\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.pydantic_v1 import BaseModel\n",
"from typing import Optional, List\n",
"from langchain.chains.openai_tools import create_extraction_chain_pydantic"
"from langchain.pydantic_v1 import BaseModel"
]
},
{

View File

@@ -30,9 +30,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import load_tools\n",
"from langchain.agents import initialize_agent\n",
"from langchain.agents import AgentType"
"from langchain.agents import AgentType, initialize_agent, load_tools"
]
},
{

View File

@@ -56,7 +56,8 @@
"source": [
"import os\n",
"\n",
"os.environ[\"SERPER_API_KEY\"] = \"\"os.environ[\"OPENAI_API_KEY\"] = \"\""
"os.environ[\"SERPER_API_KEY\"] = \"\"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\""
]
},
{
@@ -66,21 +67,16 @@
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"from typing import Any, List\n",
"\n",
"import numpy as np\n",
"\n",
"from langchain.schema import BaseRetriever\n",
"from langchain.callbacks.manager import (\n",
" AsyncCallbackManagerForRetrieverRun,\n",
" CallbackManagerForRetrieverRun,\n",
")\n",
"from langchain.utilities import GoogleSerperAPIWrapper\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.llms import OpenAI\n",
"from langchain.schema import Document\n",
"from typing import Any, List"
"from langchain.schema import BaseRetriever, Document\n",
"from langchain.utilities import GoogleSerperAPIWrapper"
]
},
{

View File

@@ -46,14 +46,13 @@
"source": [
"from datetime import datetime, timedelta\n",
"from typing import List\n",
"from termcolor import colored\n",
"\n",
"\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.docstore import InMemoryDocstore\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.retrievers import TimeWeightedVectorStoreRetriever\n",
"from langchain.vectorstores import FAISS"
"from langchain.vectorstores import FAISS\n",
"from termcolor import colored"
]
},
{
@@ -153,6 +152,7 @@
"outputs": [],
"source": [
"import math\n",
"\n",
"import faiss\n",
"\n",
"\n",

View File

@@ -27,18 +27,12 @@
"metadata": {},
"outputs": [],
"source": [
"import gymnasium as gym\n",
"import inspect\n",
"import tenacity\n",
"\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.output_parsers import RegexParser\n",
"from langchain.schema import (\n",
" AIMessage,\n",
" HumanMessage,\n",
" SystemMessage,\n",
" BaseMessage,\n",
")\n",
"from langchain.output_parsers import RegexParser"
")"
]
},
{
@@ -131,7 +125,7 @@
" ):\n",
" with attempt:\n",
" action = self._act()\n",
" except tenacity.RetryError as e:\n",
" except tenacity.RetryError:\n",
" action = self.random_action()\n",
" return action"
]

View File

@@ -55,9 +55,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import load_tools\n",
"from langchain.agents import initialize_agent\n",
"from langchain.agents import AgentType"
"from langchain.agents import AgentType, initialize_agent, load_tools"
]
},
{

View File

@@ -28,9 +28,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import load_tools\n",
"from langchain.agents import initialize_agent\n",
"from langchain.agents import AgentType"
"from langchain.agents import AgentType, initialize_agent, load_tools"
]
},
{

View File

@@ -20,9 +20,9 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.llms import OpenAI\n",
"from langchain.chains import HypotheticalDocumentEmbedder, LLMChain\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.chains import LLMChain, HypotheticalDocumentEmbedder\n",
"from langchain.llms import OpenAI\n",
"from langchain.prompts import PromptTemplate"
]
},

View File

@@ -790,8 +790,8 @@
}
],
"source": [
"from langchain.prompts.prompt import PromptTemplate\n",
"from langchain.globals import set_debug\n",
"from langchain.prompts.prompt import PromptTemplate\n",
"\n",
"set_debug(True)\n",
"\n",

View File

@@ -43,8 +43,8 @@
}
],
"source": [
"from langchain_experimental.llm_bash.base import LLMBashChain\n",
"from langchain.llms import OpenAI\n",
"from langchain_experimental.llm_bash.base import LLMBashChain\n",
"\n",
"llm = OpenAI(temperature=0)\n",
"\n",
@@ -70,7 +70,7 @@
"outputs": [],
"source": [
"from langchain.prompts.prompt import PromptTemplate\n",
"from langchain.chains.llm_bash.prompt import BashOutputParser\n",
"from langchain_experimental.llm_bash.prompt import BashOutputParser\n",
"\n",
"_PROMPT_TEMPLATE = \"\"\"If someone asks you to perform a task, your job is to come up with a series of bash commands that will perform the task. There is no need to put \"#!/bin/bash\" in your answer. Make sure to reason step by step, using this format:\n",
"Question: \"copy the files in the directory named 'target' into a new directory at the same level as target called 'myNewDirectory'\"\n",
@@ -185,7 +185,6 @@
"source": [
"from langchain_experimental.llm_bash.bash import BashProcess\n",
"\n",
"\n",
"persistent_process = BashProcess(persistent=True)\n",
"bash_chain = LLMBashChain.from_llm(llm, bash_process=persistent_process, verbose=True)\n",
"\n",

View File

@@ -45,7 +45,8 @@
}
],
"source": [
"from langchain.llms import OpenAI\nfrom langchain.chains import LLMMathChain\n",
"from langchain.chains import LLMMathChain\n",
"from langchain.llms import OpenAI\n",
"\n",
"llm = OpenAI(temperature=0)\n",
"llm_math = LLMMathChain.from_llm(llm, verbose=True)\n",

View File

@@ -56,8 +56,10 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.llms import OpenAI\nfrom langchain.chains import LLMChain\nfrom langchain.prompts import PromptTemplate\n",
"from langchain.memory import ConversationBufferWindowMemory"
"from langchain.chains import LLMChain\n",
"from langchain.llms import OpenAI\n",
"from langchain.memory import ConversationBufferWindowMemory\n",
"from langchain.prompts import PromptTemplate"
]
},
{
@@ -152,13 +154,13 @@
" for j in range(max_iters):\n",
" print(f\"(Step {j+1}/{max_iters})\")\n",
" print(f\"Assistant: {output}\")\n",
" print(f\"Human: \")\n",
" print(\"Human: \")\n",
" human_input = input()\n",
" if any(phrase in human_input.lower() for phrase in key_phrases):\n",
" break\n",
" output = chain.predict(human_input=human_input)\n",
" if success_phrase in human_input.lower():\n",
" print(f\"You succeeded! Thanks for playing!\")\n",
" print(\"You succeeded! Thanks for playing!\")\n",
" return\n",
" meta_chain = initialize_meta_chain()\n",
" meta_output = meta_chain.predict(chat_history=get_chat_history(chain.memory))\n",
@@ -166,7 +168,7 @@
" instructions = get_new_instructions(meta_output)\n",
" print(f\"New Instructions: {instructions}\")\n",
" print(\"\\n\" + \"#\" * 80 + \"\\n\")\n",
" print(f\"You failed! Thanks for playing!\")"
" print(\"You failed! Thanks for playing!\")"
]
},
{

View File

@@ -40,12 +40,13 @@
}
],
"source": [
"import os\n",
"import io\n",
"import base64\n",
"import io\n",
"import os\n",
"\n",
"import numpy as np\n",
"from IPython.display import HTML, display\n",
"from PIL import Image\n",
"from IPython.display import display, HTML\n",
"\n",
"\n",
"def encode_image(image_path):\n",

View File

@@ -115,7 +115,7 @@
"metadata": {},
"outputs": [],
"source": [
"# Folder with pdf and extracted images \n",
"# Folder with pdf and extracted images\n",
"path = \"/Users/rlm/Desktop/photos/\""
]
},
@@ -128,9 +128,10 @@
"source": [
"# Extract images, tables, and chunk text\n",
"from unstructured.partition.pdf import partition_pdf\n",
"\n",
"raw_pdf_elements = partition_pdf(\n",
" filename=path + \"photos.pdf\",\n",
" extract_images_in_pdf=True, \n",
" extract_images_in_pdf=True,\n",
" infer_table_structure=True,\n",
" chunking_strategy=\"by_title\",\n",
" max_characters=4000,\n",
@@ -183,22 +184,26 @@
"source": [
"import os\n",
"import uuid\n",
"\n",
"import chromadb\n",
"import numpy as np\n",
"from PIL import Image as _PILImage\n",
"from langchain.vectorstores import Chroma\n",
"from langchain_experimental.open_clip import OpenCLIPEmbeddings\n",
"from PIL import Image as _PILImage\n",
"\n",
"# Create chroma\n",
"vectorstore = Chroma(\n",
" collection_name=\"mm_rag_clip_photos\",\n",
" embedding_function=OpenCLIPEmbeddings()\n",
" collection_name=\"mm_rag_clip_photos\", embedding_function=OpenCLIPEmbeddings()\n",
")\n",
"\n",
"# Get image URIs with .jpg extension only\n",
"image_uris = sorted([os.path.join(path, image_name) \n",
" for image_name in os.listdir(path) \n",
" if image_name.endswith('.jpg')])\n",
"image_uris = sorted(\n",
" [\n",
" os.path.join(path, image_name)\n",
" for image_name in os.listdir(path)\n",
" if image_name.endswith(\".jpg\")\n",
" ]\n",
")\n",
"\n",
"# Add images\n",
"vectorstore.add_images(uris=image_uris)\n",
@@ -206,7 +211,7 @@
"# Add documents\n",
"vectorstore.add_texts(texts=texts)\n",
"\n",
"# Make retriever \n",
"# Make retriever\n",
"retriever = vectorstore.as_retriever()"
]
},
@@ -229,12 +234,14 @@
"metadata": {},
"outputs": [],
"source": [
"import io\n",
"import numpy as np\n",
"import base64\n",
"import io\n",
"from io import BytesIO\n",
"\n",
"import numpy as np\n",
"from PIL import Image\n",
"\n",
"\n",
"def resize_base64_image(base64_string, size=(128, 128)):\n",
" \"\"\"\n",
" Resize an image encoded as a Base64 string.\n",
@@ -258,30 +265,31 @@
" resized_img.save(buffered, format=img.format)\n",
"\n",
" # Encode the resized image to Base64\n",
" return base64.b64encode(buffered.getvalue()).decode('utf-8')\n",
" return base64.b64encode(buffered.getvalue()).decode(\"utf-8\")\n",
"\n",
"\n",
"def is_base64(s):\n",
" ''' Check if a string is Base64 encoded '''\n",
" \"\"\"Check if a string is Base64 encoded\"\"\"\n",
" try:\n",
" return base64.b64encode(base64.b64decode(s)) == s.encode()\n",
" except Exception:\n",
" return False\n",
" \n",
"\n",
"\n",
"def split_image_text_types(docs):\n",
" ''' Split numpy array images and texts '''\n",
" \"\"\"Split numpy array images and texts\"\"\"\n",
" images = []\n",
" text = []\n",
" for doc in docs:\n",
" doc = doc.page_content # Extract Document contents \n",
" doc = doc.page_content # Extract Document contents\n",
" if is_base64(doc):\n",
" # Resize image to avoid OAI server error\n",
" images.append(resize_base64_image(doc, size=(250, 250))) # base64 encoded str \n",
" images.append(\n",
" resize_base64_image(doc, size=(250, 250))\n",
" ) # base64 encoded str\n",
" else:\n",
" text.append(doc) \n",
" return {\n",
" \"images\": images,\n",
" \"texts\": text\n",
" }"
" text.append(doc)\n",
" return {\"images\": images, \"texts\": text}"
]
},
{
@@ -306,10 +314,12 @@
"outputs": [],
"source": [
"from operator import itemgetter\n",
"\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.schema.output_parser import StrOutputParser\n",
"from langchain.schema.runnable import RunnablePassthrough, RunnableLambda\n",
"from langchain.schema.messages import HumanMessage, SystemMessage\n",
"from langchain.schema.output_parser import StrOutputParser\n",
"from langchain.schema.runnable import RunnableLambda, RunnablePassthrough\n",
"\n",
"\n",
"def prompt_func(data_dict):\n",
" # Joining the context texts into a single string\n",
@@ -322,7 +332,7 @@
" \"type\": \"image_url\",\n",
" \"image_url\": {\n",
" \"url\": f\"data:image/jpeg;base64,{data_dict['context']['images'][0]}\"\n",
" }\n",
" },\n",
" }\n",
" messages.append(image_message)\n",
"\n",
@@ -342,17 +352,21 @@
" f\"User-provided keywords: {data_dict['question']}\\n\\n\"\n",
" \"Text and / or tables:\\n\"\n",
" f\"{formatted_texts}\"\n",
" )\n",
" ),\n",
" }\n",
" messages.append(text_message)\n",
"\n",
" return [HumanMessage(content=messages)]\n",
" \n",
"\n",
"\n",
"model = ChatOpenAI(temperature=0, model=\"gpt-4-vision-preview\", max_tokens=1024)\n",
"\n",
"# RAG pipeline\n",
"chain = (\n",
" {\"context\": retriever | RunnableLambda(split_image_text_types), \"question\": RunnablePassthrough()}\n",
" {\n",
" \"context\": retriever | RunnableLambda(split_image_text_types),\n",
" \"question\": RunnablePassthrough(),\n",
" }\n",
" | RunnableLambda(prompt_func)\n",
" | model\n",
" | StrOutputParser()\n",
@@ -410,17 +424,18 @@
}
],
"source": [
"from IPython.display import display, HTML\n",
"from IPython.display import HTML, display\n",
"\n",
"\n",
"def plt_img_base64(img_base64):\n",
"\n",
" # Create an HTML img tag with the base64 string as the source\n",
" image_html = f'<img src=\"data:image/jpeg;base64,{img_base64}\" />'\n",
" \n",
"\n",
" # Display the image by rendering the HTML\n",
" display(HTML(image_html))\n",
"\n",
"docs = retriever.get_relevant_documents(\"Woman with children\",k=10)\n",
"\n",
"docs = retriever.get_relevant_documents(\"Woman with children\", k=10)\n",
"for doc in docs:\n",
" if is_base64(doc.page_content):\n",
" plt_img_base64(doc.page_content)\n",
@@ -446,9 +461,7 @@
}
],
"source": [
"chain.invoke(\n",
" \"Woman with children\"\n",
")"
"chain.invoke(\"Woman with children\")"
]
},
{

View File

@@ -29,9 +29,10 @@
"metadata": {},
"outputs": [],
"source": [
"from steamship import Block, Steamship\n",
"import re\n",
"from IPython.display import Image"
"\n",
"from IPython.display import Image\n",
"from steamship import Block, Steamship"
]
},
{
@@ -41,9 +42,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import AgentType, initialize_agent\n",
"from langchain.llms import OpenAI\n",
"from langchain.agents import initialize_agent\n",
"from langchain.agents import AgentType\n",
"from langchain.tools import SteamshipImageGenerationTool"
]
},

View File

@@ -26,13 +26,12 @@
"metadata": {},
"outputs": [],
"source": [
"from typing import List, Dict, Callable\n",
"from typing import Callable, List\n",
"\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.schema import (\n",
" AIMessage,\n",
" HumanMessage,\n",
" SystemMessage,\n",
" BaseMessage,\n",
")"
]
},

View File

@@ -27,26 +27,20 @@
"metadata": {},
"outputs": [],
"source": [
"from collections import OrderedDict\n",
"import functools\n",
"import random\n",
"import re\n",
"import tenacity\n",
"from typing import List, Dict, Callable\n",
"from collections import OrderedDict\n",
"from typing import Callable, List\n",
"\n",
"from langchain.prompts import (\n",
" ChatPromptTemplate,\n",
" HumanMessagePromptTemplate,\n",
" PromptTemplate,\n",
")\n",
"from langchain.chains import LLMChain\n",
"import tenacity\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.output_parsers import RegexParser\n",
"from langchain.prompts import (\n",
" PromptTemplate,\n",
")\n",
"from langchain.schema import (\n",
" AIMessage,\n",
" HumanMessage,\n",
" SystemMessage,\n",
" BaseMessage,\n",
")"
]
},

View File

@@ -24,17 +24,15 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.prompts import PromptTemplate\n",
"import re\n",
"from typing import Callable, List\n",
"\n",
"import tenacity\n",
"from typing import List, Dict, Callable\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.output_parsers import RegexParser\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.schema import (\n",
" AIMessage,\n",
" HumanMessage,\n",
" SystemMessage,\n",
" BaseMessage,\n",
")"
]
},

View File

@@ -27,18 +27,15 @@
"metadata": {},
"outputs": [],
"source": [
"from os import environ\n",
"import getpass\n",
"from typing import Dict, Any\n",
"from langchain.llms import OpenAI\n",
"from langchain.utilities import SQLDatabase\n",
"from os import environ\n",
"\n",
"from langchain.chains import LLMChain\n",
"from langchain_experimental.sql.vector_sql import VectorSQLDatabaseChain\n",
"from sqlalchemy import create_engine, Column, MetaData\n",
"from langchain.llms import OpenAI\n",
"from langchain.prompts import PromptTemplate\n",
"\n",
"\n",
"from sqlalchemy import create_engine\n",
"from langchain.utilities import SQLDatabase\n",
"from langchain_experimental.sql.vector_sql import VectorSQLDatabaseChain\n",
"from sqlalchemy import MetaData, create_engine\n",
"\n",
"MYSCALE_HOST = \"msc-4a9e710a.us-east-1.aws.staging.myscale.cloud\"\n",
"MYSCALE_PORT = 443\n",
@@ -77,9 +74,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.llms import OpenAI\n",
"from langchain.callbacks import StdOutCallbackHandler\n",
"\n",
"from langchain.llms import OpenAI\n",
"from langchain.utilities.sql_database import SQLDatabase\n",
"from langchain_experimental.sql.prompt import MYSCALE_PROMPT\n",
"from langchain_experimental.sql.vector_sql import VectorSQLDatabaseChain\n",
@@ -120,15 +116,16 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain\n",
"\n",
"from langchain_experimental.sql.vector_sql import VectorSQLDatabaseChain\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain_experimental.retrievers.vector_sql_database import (\n",
" VectorSQLDatabaseChainRetriever,\n",
")\n",
"from langchain_experimental.sql.prompt import MYSCALE_PROMPT\n",
"from langchain_experimental.sql.vector_sql import VectorSQLRetrieveAllOutputParser\n",
"from langchain_experimental.sql.vector_sql import (\n",
" VectorSQLDatabaseChain,\n",
" VectorSQLRetrieveAllOutputParser,\n",
")\n",
"\n",
"output_parser_retrieve_all = VectorSQLRetrieveAllOutputParser.from_embeddings(\n",
" output_parser.model\n",

View File

@@ -50,10 +50,10 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.chains import create_qa_with_sources_chain\n",
"from langchain.chains.combine_documents.stuff import StuffDocumentsChain\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.chains import create_qa_with_sources_chain"
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.prompts import PromptTemplate"
]
},
{
@@ -230,9 +230,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains import ConversationalRetrievalChain\n",
"from langchain.chains import ConversationalRetrievalChain, LLMChain\n",
"from langchain.memory import ConversationBufferMemory\n",
"from langchain.chains import LLMChain\n",
"\n",
"memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True)\n",
"_template = \"\"\"Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.\\\n",
@@ -357,12 +356,10 @@
"source": [
"from typing import List\n",
"\n",
"from pydantic import BaseModel, Field\n",
"\n",
"from langchain.chains.openai_functions import create_qa_with_structure_chain\n",
"\n",
"from langchain.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate\n",
"from langchain.schema import SystemMessage, HumanMessage"
"from langchain.schema import HumanMessage, SystemMessage\n",
"from pydantic import BaseModel, Field"
]
},
{

View File

@@ -167,7 +167,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.tools import E2BDataAnalysisTool, DuckDuckGoSearchRun\n",
"from langchain.tools import DuckDuckGoSearchRun, E2BDataAnalysisTool\n",
"\n",
"tools = [E2BDataAnalysisTool(api_key=\"...\"), DuckDuckGoSearchRun()]"
]
@@ -419,7 +419,7 @@
"\n",
"\n",
"RECOMMENDED CHANGES:\n",
"- When using AzureChatOpenAI, if passing in an Azure endpoint (eg https://example-resource.azure.openai.com/) this should be specified via the `azure_endpoint` parameter or the `AZURE_OPENAI_ENDPOINT`. We're maintaining backwards compatibility for now with specifying this via `openai_api_base`/`base_url` or env var `OPENAI_API_BASE` but this shouldn't be relied upon.\n",
"- When using `AzureChatOpenAI` or `AzureOpenAI`, if passing in an Azure endpoint (eg https://example-resource.azure.openai.com/) this should be specified via the `azure_endpoint` parameter or the `AZURE_OPENAI_ENDPOINT`. We're maintaining backwards compatibility for now with specifying this via `openai_api_base`/`base_url` or env var `OPENAI_API_BASE` but this shouldn't be relied upon.\n",
"- When using Azure chat or embedding models, pass in API keys either via `openai_api_key` parameter or `AZURE_OPENAI_API_KEY` parameter. We're maintaining backwards compatibility for now with specifying this via `OPENAI_API_KEY` but this shouldn't be relied upon."
]
},
@@ -456,9 +456,9 @@
"from typing import Literal\n",
"\n",
"from langchain.output_parsers.openai_tools import PydanticToolsParser\n",
"from langchain.utils.openai_functions import convert_pydantic_to_openai_tool\n",
"from langchain.prompts import ChatPromptTemplate\n",
"from langchain.pydantic_v1 import BaseModel, Field\n",
"from langchain.utils.openai_functions import convert_pydantic_to_openai_tool\n",
"\n",
"\n",
"class GetCurrentWeather(BaseModel):\n",

View File

@@ -45,14 +45,14 @@
"source": [
"import collections\n",
"import inspect\n",
"import tenacity\n",
"\n",
"import tenacity\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.output_parsers import RegexParser\n",
"from langchain.schema import (\n",
" HumanMessage,\n",
" SystemMessage,\n",
")\n",
"from langchain.output_parsers import RegexParser"
")"
]
},
{
@@ -146,7 +146,7 @@
" ):\n",
" with attempt:\n",
" action = self._act()\n",
" except tenacity.RetryError as e:\n",
" except tenacity.RetryError:\n",
" action = self.random_action()\n",
" return action"
]

View File

@@ -17,8 +17,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain_experimental.pal_chain import PALChain\n",
"from langchain.llms import OpenAI"
"from langchain.llms import OpenAI\n",
"from langchain_experimental.pal_chain import PALChain"
]
},
{

View File

@@ -54,13 +54,13 @@
"metadata": {},
"outputs": [],
"source": [
"from baidubce.bce_client_configuration import BceClientConfiguration\n",
"from baidubce.auth.bce_credentials import BceCredentials\n",
"from baidubce.bce_client_configuration import BceClientConfiguration\n",
"from langchain.document_loaders.baiducloud_bos_directory import BaiduBOSDirectoryLoader\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n",
"from langchain.vectorstores import BESVectorStore\n",
"from langchain.llms.baidu_qianfan_endpoint import QianfanLLMEndpoint"
"from langchain.llms.baidu_qianfan_endpoint import QianfanLLMEndpoint\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain.vectorstores import BESVectorStore"
]
},
{
@@ -82,7 +82,9 @@
"secret_access_key = \"your bos access sk\"\n",
"\n",
"# create BceClientConfiguration\n",
"config = BceClientConfiguration(credentials=BceCredentials(access_key_id, secret_access_key), endpoint = bos_host)\n",
"config = BceClientConfiguration(\n",
" credentials=BceCredentials(access_key_id, secret_access_key), endpoint=bos_host\n",
")\n",
"\n",
"loader = BaiduBOSDirectoryLoader(conf=config, bucket=\"llm-test\", prefix=\"llm/\")\n",
"documents = loader.load()\n",
@@ -109,10 +111,14 @@
"embeddings.client = sentence_transformers.SentenceTransformer(embeddings.model_name)\n",
"\n",
"db = BESVectorStore.from_documents(\n",
" documents=split_docs, embedding=embeddings, bes_url=\"your bes url\", index_name='test-index', vector_query_field='vector'\n",
" )\n",
" documents=split_docs,\n",
" embedding=embeddings,\n",
" bes_url=\"your bes url\",\n",
" index_name=\"test-index\",\n",
" vector_query_field=\"vector\",\n",
")\n",
"\n",
"db.client.indices.refresh(index='test-index')\n",
"db.client.indices.refresh(index=\"test-index\")\n",
"retriever = db.as_retriever()"
]
},
@@ -130,8 +136,15 @@
"metadata": {},
"outputs": [],
"source": [
"llm = QianfanLLMEndpoint(model=\"ERNIE-Bot\", qianfan_ak='your qianfan ak', qianfan_sk='your qianfan sk', streaming=True)\n",
"qa = RetrievalQA.from_chain_type(llm=llm, chain_type=\"refine\", retriever=retriever, return_source_documents=True)\n",
"llm = QianfanLLMEndpoint(\n",
" model=\"ERNIE-Bot\",\n",
" qianfan_ak=\"your qianfan ak\",\n",
" qianfan_sk=\"your qianfan sk\",\n",
" streaming=True,\n",
")\n",
"qa = RetrievalQA.from_chain_type(\n",
" llm=llm, chain_type=\"refine\", retriever=retriever, return_source_documents=True\n",
")\n",
"\n",
"query = \"什么是张量?\"\n",
"print(qa.run(query))"

View File

@@ -30,8 +30,8 @@
"outputs": [],
"source": [
"import pinecone\n",
"from langchain.vectorstores import Pinecone\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.vectorstores import Pinecone\n",
"\n",
"pinecone.init(api_key=\"...\", environment=\"...\")"
]
@@ -87,7 +87,6 @@
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.prompts import ChatPromptTemplate\n",
"from langchain.schema.output_parser import StrOutputParser"
]
},

View File

@@ -28,8 +28,8 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import getpass\n",
"import os\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = os.environ.get(\"OPENAI_API_KEY\") or getpass.getpass(\n",
" \"OpenAI API Key:\"\n",
@@ -42,8 +42,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.sql_database import SQLDatabase\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.sql_database import SQLDatabase\n",
"\n",
"CONNECTION_STRING = \"postgresql+psycopg2://postgres:test@localhost:5432/vectordb\" # Replace with your own\n",
"db = SQLDatabase.from_uri(CONNECTION_STRING)"
@@ -323,6 +323,7 @@
"outputs": [],
"source": [
"import re\n",
"\n",
"from langchain.schema.runnable import RunnableLambda\n",
"\n",
"\n",

View File

@@ -31,12 +31,10 @@
"metadata": {},
"outputs": [],
"source": [
"from operator import itemgetter\n",
"\n",
"from langchain.prompts import ChatPromptTemplate\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.prompts import ChatPromptTemplate\n",
"from langchain.schema.output_parser import StrOutputParser\n",
"from langchain.schema.runnable import RunnablePassthrough, RunnableLambda\n",
"from langchain.schema.runnable import RunnablePassthrough\n",
"from langchain.utilities import DuckDuckGoSearchAPIWrapper"
]
},

View File

@@ -42,22 +42,22 @@
"OPENAI_API_KEY = \"sk-xx\"\n",
"os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY\n",
"\n",
"from typing import Dict, List, Any, Union, Callable\n",
"from pydantic import BaseModel, Field\n",
"from langchain.chains import LLMChain\nfrom langchain.prompts import PromptTemplate\n",
"from langchain.llms import BaseLLM\n",
"from langchain.chains.base import Chain\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.agents import Tool, LLMSingleActionAgent, AgentExecutor\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.chains import RetrievalQA\n",
"from langchain.vectorstores import Chroma\n",
"from langchain.llms import OpenAI\n",
"from langchain.prompts.base import StringPromptTemplate\n",
"from typing import Any, Callable, Dict, List, Union\n",
"\n",
"from langchain.agents import AgentExecutor, LLMSingleActionAgent, Tool\n",
"from langchain.agents.agent import AgentOutputParser\n",
"from langchain.agents.conversational.prompt import FORMAT_INSTRUCTIONS\n",
"from langchain.schema import AgentAction, AgentFinish"
"from langchain.chains import LLMChain, RetrievalQA\n",
"from langchain.chains.base import Chain\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.llms import BaseLLM, OpenAI\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.prompts.base import StringPromptTemplate\n",
"from langchain.schema import AgentAction, AgentFinish\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import Chroma\n",
"from pydantic import BaseModel, Field"
]
},
{

View File

@@ -17,12 +17,10 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.prompts import PromptTemplate\n",
"from langchain.schema.prompt import PromptValue\n",
"from langchain.schema.messages import BaseMessage\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.schema.output_parser import StrOutputParser\n",
"from typing import Union, Sequence"
"from langchain.schema.prompt import PromptValue"
]
},
{

View File

@@ -1084,7 +1084,6 @@
"outputs": [],
"source": [
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.schema import Document\n",
"from langchain.vectorstores import ElasticsearchStore\n",
"\n",
"embeddings = OpenAIEmbeddings()"

View File

@@ -51,8 +51,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.prompts import PromptTemplate\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain_experimental.smart_llm import SmartLLMChain"
]
},

View File

@@ -131,7 +131,6 @@
"source": [
"from langchain.utilities import DuckDuckGoSearchAPIWrapper\n",
"\n",
"\n",
"search = DuckDuckGoSearchAPIWrapper(max_results=4)\n",
"\n",
"\n",

View File

@@ -84,10 +84,11 @@
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"from typing import Tuple\n",
"\n",
"from langchain_experimental.tot.checker import ToTChecker\n",
"from langchain_experimental.tot.thought import ThoughtValidity\n",
"import re\n",
"\n",
"\n",
"class MyChecker(ToTChecker):\n",

View File

@@ -34,8 +34,8 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import getpass\n",
"import os\n",
"\n",
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.vectorstores import DeepLake\n",
@@ -109,6 +109,7 @@
"outputs": [],
"source": [
"import os\n",
"\n",
"from langchain.document_loaders import TextLoader\n",
"\n",
"root_dir = \"./the-algorithm\"\n",
@@ -118,7 +119,7 @@
" try:\n",
" loader = TextLoader(os.path.join(dirpath, file), encoding=\"utf-8\")\n",
" docs.extend(loader.load_and_split())\n",
" except Exception as e:\n",
" except Exception:\n",
" pass"
]
},
@@ -3807,8 +3808,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.chains import ConversationalRetrievalChain\n",
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"model = ChatOpenAI(model_name=\"gpt-3.5-turbo-0613\") # switch to 'gpt-4'\n",
"qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)"

View File

@@ -22,17 +22,14 @@
"metadata": {},
"outputs": [],
"source": [
"from typing import List, Dict, Callable\n",
"from langchain.chains import ConversationChain\n",
"from typing import Callable, List\n",
"\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.llms import OpenAI\n",
"from langchain.memory import ConversationBufferMemory\n",
"from langchain.prompts.prompt import PromptTemplate\n",
"from langchain.schema import (\n",
" AIMessage,\n",
" HumanMessage,\n",
" SystemMessage,\n",
" BaseMessage,\n",
")"
]
},
@@ -49,10 +46,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import Tool\n",
"from langchain.agents import initialize_agent\n",
"from langchain.agents import AgentType\n",
"from langchain.agents import load_tools"
"from langchain.agents import AgentType, initialize_agent, load_tools"
]
},
{

View File

@@ -22,7 +22,8 @@
"metadata": {},
"outputs": [],
"source": [
"from typing import List, Dict, Callable\n",
"from typing import Callable, List\n",
"\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.schema import (\n",
" HumanMessage,\n",

View File

@@ -192,10 +192,10 @@
" return current\n",
"\n",
"\n",
"import requests\n",
"\n",
"from typing import Optional\n",
"\n",
"import requests\n",
"\n",
"\n",
"def vocab_lookup(\n",
" search: str,\n",
@@ -319,9 +319,10 @@
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from typing import List, Dict, Any\n",
"import json\n",
"from typing import Any, Dict, List\n",
"\n",
"import requests\n",
"\n",
"\n",
"def run_sparql(\n",
@@ -389,17 +390,18 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import (\n",
" Tool,\n",
" AgentExecutor,\n",
" LLMSingleActionAgent,\n",
" AgentOutputParser,\n",
")\n",
"from langchain.prompts import StringPromptTemplate\n",
"from langchain.llms import OpenAI\nfrom langchain.chains import LLMChain\n",
"import re\n",
"from typing import List, Union\n",
"from langchain.schema import AgentAction, AgentFinish\n",
"import re"
"\n",
"from langchain.agents import (\n",
" AgentExecutor,\n",
" AgentOutputParser,\n",
" LLMSingleActionAgent,\n",
" Tool,\n",
")\n",
"from langchain.chains import LLMChain\n",
"from langchain.prompts import StringPromptTemplate\n",
"from langchain.schema import AgentAction, AgentFinish"
]
},
{

View File

@@ -15,3 +15,11 @@ pre {
#my-component-root *, #headlessui-portal-root * {
z-index: 10000;
}
table.longtable code {
white-space: normal;
}
table.longtable td {
max-width: 600px;
}

View File

@@ -13,8 +13,10 @@ HERE = Path(__file__).parent
PKG_DIR = ROOT_DIR / "libs" / "langchain" / "langchain"
EXP_DIR = ROOT_DIR / "libs" / "experimental" / "langchain_experimental"
CORE_DIR = ROOT_DIR / "libs" / "core" / "langchain_core"
WRITE_FILE = HERE / "api_reference.rst"
EXP_WRITE_FILE = HERE / "experimental_api_reference.rst"
CORE_WRITE_FILE = HERE / "core_api_reference.rst"
ClassKind = Literal["TypedDict", "Regular", "Pydantic", "enum"]
@@ -292,6 +294,17 @@ def _document_langchain_experimental() -> None:
def _document_langchain_core() -> None:
"""Document the langchain_core package."""
# Generate core_api_reference.rst
core_members = _load_package_modules(CORE_DIR)
core_doc = ".. _core_api_reference:\n\n" + _construct_doc(
"langchain_core", core_members
)
with open(CORE_WRITE_FILE, "w") as f:
f.write(core_doc)
def _document_langchain() -> None:
"""Document the main langchain package."""
# load top level module members
lc_members = _load_package_modules(PKG_DIR)
@@ -306,7 +319,6 @@ def _document_langchain_core() -> None:
"agents.output_parsers": agents["output_parsers"],
"agents.format_scratchpad": agents["format_scratchpad"],
"tools.render": tools["render"],
"schema.runnable": schema["runnable"],
}
)
@@ -318,8 +330,9 @@ def _document_langchain_core() -> None:
def main() -> None:
"""Generate the reference.rst file for each package."""
_document_langchain_core()
_document_langchain()
_document_langchain_experimental()
_document_langchain_core()
if __name__ == "__main__":

View File

@@ -1,5 +1,6 @@
-e libs/langchain
-e libs/experimental
-e libs/core
pydantic<2
autodoc_pydantic==1.8.0
myst_parser

View File

@@ -34,6 +34,9 @@
<li class="nav-item">
<a class="sk-nav-link nav-link" href="{{ pathto('api_reference') }}">API</a>
</li>
<li class="nav-item">
<a class="sk-nav-link nav-link" href="{{ pathto('core_api_reference') }}">Core</a>
</li>
<li class="nav-item">
<a class="sk-nav-link nav-link" href="{{ pathto('experimental_api_reference') }}">Experimental</a>
</li>

View File

@@ -17,7 +17,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import XMLAgent, tool, AgentExecutor\n",
"from langchain.agents import AgentExecutor, XMLAgent, tool\n",
"from langchain.chat_models import ChatAnthropic"
]
},

View File

@@ -20,8 +20,6 @@
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.prompts import (\n",
" ChatPromptTemplate,\n",
" SystemMessagePromptTemplate,\n",
" HumanMessagePromptTemplate,\n",
")\n",
"from langchain.schema.output_parser import StrOutputParser\n",
"from langchain_experimental.utilities import PythonREPL"

View File

@@ -26,7 +26,6 @@
"from langchain.schema.runnable import RunnableLambda, RunnablePassthrough\n",
"from langchain.utils.math import cosine_similarity\n",
"\n",
"\n",
"physics_template = \"\"\"You are a very smart physics professor. \\\n",
"You are great at answering questions about physics in a concise and easy to understand manner. \\\n",
"When you don't know the answer to a question you admit that you don't know.\n",

View File

@@ -18,10 +18,11 @@
"outputs": [],
"source": [
"from operator import itemgetter\n",
"\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.memory import ConversationBufferMemory\n",
"from langchain.schema.runnable import RunnablePassthrough, RunnableLambda\n",
"from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
"from langchain.schema.runnable import RunnableLambda, RunnablePassthrough\n",
"\n",
"model = ChatOpenAI()\n",
"prompt = ChatPromptTemplate.from_messages(\n",

View File

@@ -69,7 +69,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.schema.runnable import RunnableMap, RunnablePassthrough\n",
"from langchain.schema.runnable import RunnablePassthrough\n",
"\n",
"prompt1 = ChatPromptTemplate.from_template(\n",
" \"generate a {attribute} color. Return the name of the color and nothing else:\"\n",

View File

@@ -42,8 +42,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.prompts import ChatPromptTemplate\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.prompts import ChatPromptTemplate\n",
"\n",
"prompt = ChatPromptTemplate.from_template(\"tell me a joke about {foo}\")\n",
"model = ChatOpenAI()\n",

File diff suppressed because one or more lines are too long

View File

@@ -38,11 +38,11 @@
"source": [
"from operator import itemgetter\n",
"\n",
"from langchain.prompts import ChatPromptTemplate\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.prompts import ChatPromptTemplate\n",
"from langchain.schema.output_parser import StrOutputParser\n",
"from langchain.schema.runnable import RunnablePassthrough, RunnableLambda\n",
"from langchain.schema.runnable import RunnableLambda, RunnablePassthrough\n",
"from langchain.vectorstores import FAISS"
]
},
@@ -170,8 +170,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.schema.runnable import RunnableMap\n",
"from langchain.schema import format_document"
"from langchain.schema import format_document\n",
"from langchain.schema.runnable import RunnableMap"
]
},
{
@@ -231,10 +231,16 @@
"metadata": {},
"outputs": [],
"source": [
"from typing import Tuple, List\n",
"from typing import List, Tuple\n",
"\n",
"\n",
"def _format_chat_history(chat_history: List[Tuple]) -> str:\n",
"def _format_chat_history(chat_history: List[Tuple[str, str]]) -> str:\n",
" # chat history is of format:\n",
" # [\n",
" # (human_message_str, ai_message_str),\n",
" # ...\n",
" # ]\n",
" # see below for an example of how it's invoked\n",
" buffer = \"\"\n",
" for dialogue_turn in chat_history:\n",
" human = \"Human: \" + dialogue_turn[0]\n",
@@ -335,6 +341,7 @@
"outputs": [],
"source": [
"from operator import itemgetter\n",
"\n",
"from langchain.memory import ConversationBufferMemory"
]
},

View File

@@ -262,9 +262,9 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI, ChatAnthropic\n",
"from langchain.schema.runnable import ConfigurableField\n",
"from langchain.prompts import PromptTemplate"
"from langchain.chat_models import ChatAnthropic, ChatOpenAI\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.schema.runnable import ConfigurableField"
]
},
{

View File

@@ -31,7 +31,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI, ChatAnthropic"
"from langchain.chat_models import ChatAnthropic, ChatOpenAI"
]
},
{
@@ -50,6 +50,7 @@
"outputs": [],
"source": [
"from unittest.mock import patch\n",
"\n",
"from openai.error import RateLimitError"
]
},

View File

@@ -19,11 +19,12 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.schema.runnable import RunnableLambda\n",
"from langchain.prompts import ChatPromptTemplate\n",
"from langchain.chat_models import ChatOpenAI\n",
"from operator import itemgetter\n",
"\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.prompts import ChatPromptTemplate\n",
"from langchain.schema.runnable import RunnableLambda\n",
"\n",
"\n",
"def length_function(text):\n",
" return len(text)\n",
@@ -91,8 +92,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.schema.runnable import RunnableConfig\n",
"from langchain.schema.output_parser import StrOutputParser"
"from langchain.schema.output_parser import StrOutputParser\n",
"from langchain.schema.runnable import RunnableConfig"
]
},
{

View File

@@ -29,7 +29,6 @@
"from langchain.prompts.chat import ChatPromptTemplate\n",
"from langchain.schema.output_parser import StrOutputParser\n",
"\n",
"\n",
"prompt = ChatPromptTemplate.from_template(\n",
" \"Write a comma-separated list of 5 animals similar to: {animal}\"\n",
")\n",

View File

@@ -33,7 +33,6 @@
"from langchain.prompts import ChatPromptTemplate\n",
"from langchain.schema.runnable import RunnableParallel\n",
"\n",
"\n",
"model = ChatOpenAI()\n",
"joke_chain = ChatPromptTemplate.from_template(\"tell me a joke about {topic}\") | model\n",
"poem_chain = (\n",
@@ -105,7 +104,7 @@
"source": [
"Here the input to prompt is expected to be a map with keys \"context\" and \"question\". The user input is just the question. So we need to get the context using our retriever and passthrough the user input under the \"question\" key.\n",
"\n",
"Note that when composing a RunnableMap when another Runnable we don't even need to wrap our dictionary in the RunnableMap class — the type conversion is handled for us."
"Note that when composing a RunnableMap with another Runnable we don't even need to wrap our dictionary in the RunnableMap class — the type conversion is handled for us."
]
},
{

View File

@@ -0,0 +1,396 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "6a4becbd-238e-4c1d-a02d-08e61fbc3763",
"metadata": {},
"source": [
"# Add message history (memory)\n",
"\n",
"The `RunnableWithMessageHistory` let's us add message history to certain types of chains.\n",
"\n",
"Specifically, it can be used for any Runnable that takes as input one of\n",
"* a sequence of `BaseMessage`\n",
"* a dict with a key that takes a sequence of `BaseMessage`\n",
"* a dict with a key that takes the latest message(s) as a string or sequence of `BaseMessage`, and a separate key that takes historical messages\n",
"\n",
"And returns as output one of\n",
"* a string that can be treated as the contents of an `AIMessage`\n",
"* a sequence of `BaseMessage`\n",
"* a dict with a key that contains a sequence of `BaseMessage`\n",
"\n",
"Let's take a look at some examples to see how it works."
]
},
{
"cell_type": "markdown",
"id": "6bca45e5-35d9-4603-9ca9-6ac0ce0e35cd",
"metadata": {},
"source": [
"## Setup\n",
"\n",
"We'll use Redis to store our chat message histories and Anthropic's claude-2 model so we'll need to install the following dependencies:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "477d04b3-c2b6-4ba5-962f-492c0d625cd5",
"metadata": {},
"outputs": [],
"source": [
"!pip install -U langchain redis anthropic"
]
},
{
"cell_type": "markdown",
"id": "93776323-d6b8-4912-bb6a-867c5e655f46",
"metadata": {},
"source": [
"Set your [Anthropic API key](https://console.anthropic.com/):"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c7f56f69-d2f1-4a21-990c-b5551eb012fa",
"metadata": {},
"outputs": [],
"source": [
"import getpass\n",
"import os\n",
"\n",
"os.environ[\"ANTHROPIC_API_KEY\"] = getpass.getpass()"
]
},
{
"cell_type": "markdown",
"id": "6a0ec9e0-7b1c-4c6f-b570-e61d520b47c6",
"metadata": {},
"source": [
"Start a local Redis Stack server if we don't have an existing Redis deployment to connect to:\n",
"```bash\n",
"docker run -d -p 6379:6379 -p 8001:8001 redis/redis-stack:latest\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "cd6a250e-17fe-4368-a39d-1fe6b2cbde68",
"metadata": {},
"outputs": [],
"source": [
"REDIS_URL = \"redis://localhost:6379/0\""
]
},
{
"cell_type": "markdown",
"id": "36f43b87-655c-4f64-aa7b-bd8c1955d8e5",
"metadata": {},
"source": [
"### [LangSmith](/docs/langsmith)\n",
"\n",
"LangSmith is especially useful for something like message history injection, where it can be hard to otherwise understand what the inputs are to various parts of the chain.\n",
"\n",
"Note that LangSmith is not needed, but it is helpful.\n",
"If you do want to use LangSmith, after you sign up at the link above, make sure to uncoment the below and set your environment variables to start logging traces:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "2afc1556-8da1-4499-ba11-983b66c58b18",
"metadata": {},
"outputs": [],
"source": [
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
]
},
{
"cell_type": "markdown",
"id": "1a5a632e-ba9e-4488-b586-640ad5494f62",
"metadata": {},
"source": [
"## Example: Dict input, message output\n",
"\n",
"Let's create a simple chain that takes a dict as input and returns a BaseMessage.\n",
"\n",
"In this case the `\"question\"` key in the input represents our input message, and the `\"history\"` key is where our historical messages will be injected."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "2a150d6f-8878-4950-8634-a608c5faad56",
"metadata": {},
"outputs": [],
"source": [
"from typing import Optional\n",
"\n",
"from langchain.chat_models import ChatAnthropic\n",
"from langchain.memory.chat_message_histories import RedisChatMessageHistory\n",
"from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
"from langchain.schema.chat_history import BaseChatMessageHistory\n",
"from langchain.schema.runnable.history import RunnableWithMessageHistory"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "3185edba-4eb6-4b32-80c6-577c0d19af97",
"metadata": {},
"outputs": [],
"source": [
"prompt = ChatPromptTemplate.from_messages(\n",
" [\n",
" (\"system\", \"You're an assistant who's good at {ability}\"),\n",
" MessagesPlaceholder(variable_name=\"history\"),\n",
" (\"human\", \"{question}\"),\n",
" ]\n",
")\n",
"\n",
"chain = prompt | ChatAnthropic(model=\"claude-2\")"
]
},
{
"cell_type": "markdown",
"id": "f9d81796-ce61-484c-89e2-6c567d5e54ef",
"metadata": {},
"source": [
"### Adding message history\n",
"\n",
"To add message history to our original chain we wrap it in the `RunnableWithMessageHistory` class.\n",
"\n",
"Crucially, we also need to define a method that takes a session_id string and based on it returns a `BaseChatMessageHistory`. Given the same input, this method should return an equivalent output.\n",
"\n",
"In this case we'll also want to specify `input_messages_key` (the key to be treated as the latest input message) and `history_messages_key` (the key to add historical messages to)."
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "ca7c64d8-e138-4ef8-9734-f82076c47d80",
"metadata": {},
"outputs": [],
"source": [
"chain_with_history = RunnableWithMessageHistory(\n",
" chain,\n",
" lambda session_id: RedisChatMessageHistory(session_id, url=REDIS_URL),\n",
" input_messages_key=\"question\",\n",
" history_messages_key=\"history\",\n",
")"
]
},
{
"cell_type": "markdown",
"id": "37eefdec-9901-4650-b64c-d3c097ed5f4d",
"metadata": {},
"source": [
"## Invoking with config\n",
"\n",
"Whenever we call our chain with message history, we need to include a config that contains the `session_id`\n",
"```python\n",
"config={\"configurable\": {\"session_id\": \"<SESSION_ID>\"}}\n",
"```\n",
"\n",
"Given the same configuration, our chain should be pulling from the same chat message history."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "a85bcc22-ca4c-4ad5-9440-f94be7318f3e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=' Cosine is one of the basic trigonometric functions in mathematics. It is defined as the ratio of the adjacent side to the hypotenuse in a right triangle.\\n\\nSome key properties and facts about cosine:\\n\\n- It is denoted by cos(θ), where θ is the angle in a right triangle. \\n\\n- The cosine of an acute angle is always positive. For angles greater than 90 degrees, cosine can be negative.\\n\\n- Cosine is one of the three main trig functions along with sine and tangent.\\n\\n- The cosine of 0 degrees is 1. As the angle increases towards 90 degrees, the cosine value decreases towards 0.\\n\\n- The range of values for cosine is -1 to 1.\\n\\n- The cosine function maps angles in a circle to the x-coordinate on the unit circle.\\n\\n- Cosine is used to find adjacent side lengths in right triangles, and has many other applications in mathematics, physics, engineering and more.\\n\\n- Key cosine identities include: cos(A+B) = cosAcosB sinAsinB and cos(2A) = cos^2(A) sin^2(A)\\n\\nSo in summary, cosine is a fundamental trig')"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain_with_history.invoke(\n",
" {\"ability\": \"math\", \"question\": \"What does cosine mean?\"},\n",
" config={\"configurable\": {\"session_id\": \"foobar\"}},\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "ab29abd3-751f-41ce-a1b0-53f6b565e79d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=' The inverse of the cosine function is called the arccosine or inverse cosine, often denoted as cos-1(x) or arccos(x).\\n\\nThe key properties and facts about arccosine:\\n\\n- It is defined as the angle θ between 0 and π radians whose cosine is x. So arccos(x) = θ such that cos(θ) = x.\\n\\n- The range of arccosine is 0 to π radians (0 to 180 degrees).\\n\\n- The domain of arccosine is -1 to 1. \\n\\n- arccos(cos(θ)) = θ for values of θ from 0 to π radians.\\n\\n- arccos(x) is the angle in a right triangle whose adjacent side is x and hypotenuse is 1.\\n\\n- arccos(0) = 90 degrees. As x increases from 0 to 1, arccos(x) decreases from 90 to 0 degrees.\\n\\n- arccos(1) = 0 degrees. arccos(-1) = 180 degrees.\\n\\n- The graph of y = arccos(x) is part of the unit circle, restricted to x')"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain_with_history.invoke(\n",
" {\"ability\": \"math\", \"question\": \"What's its inverse\"},\n",
" config={\"configurable\": {\"session_id\": \"foobar\"}},\n",
")"
]
},
{
"cell_type": "markdown",
"id": "da3d1feb-b4bb-4624-961c-7db2e1180df7",
"metadata": {},
"source": [
":::tip [Langsmith trace](https://smith.langchain.com/public/863a003b-7ca8-4b24-be9e-d63ec13c106e/r)\n",
":::"
]
},
{
"cell_type": "markdown",
"id": "61d5115e-64a1-4ad5-b676-8afd4ef6093e",
"metadata": {},
"source": [
"Looking at the Langsmith trace for the second call, we can see that when constructing the prompt, a \"history\" variable has been injected which is a list of two messages (our first input and first output)."
]
},
{
"cell_type": "markdown",
"id": "028cf151-6cd5-4533-b3cf-c8d735554647",
"metadata": {},
"source": [
"## Example: messages input, dict output"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "0bb446b5-6251-45fe-a92a-4c6171473c53",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'output_message': AIMessage(content=' Here is a summary of Simone de Beauvoir\\'s views on free will:\\n\\n- De Beauvoir was an existentialist philosopher and believed strongly in the concept of free will. She rejected the idea that human nature or instincts determine behavior.\\n\\n- Instead, de Beauvoir argued that human beings define their own essence or nature through their actions and choices. As she famously wrote, \"One is not born, but rather becomes, a woman.\"\\n\\n- De Beauvoir believed that while individuals are situated in certain cultural contexts and social conditions, they still have agency and the ability to transcend these situations. Freedom comes from choosing one\\'s attitude toward these constraints.\\n\\n- She emphasized the radical freedom and responsibility of the individual. We are \"condemned to be free\" because we cannot escape making choices and taking responsibility for our choices. \\n\\n- De Beauvoir felt that many people evade their freedom and responsibility by adopting rigid mindsets, ideologies, or conforming uncritically to social roles.\\n\\n- She advocated for the recognition of ambiguity in the human condition and warned against the quest for absolute rules that deny freedom and responsibility. Authentic living involves embracing ambiguity.\\n\\nIn summary, de Beauvoir promoted an existential ethics')}"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain.schema.messages import HumanMessage\n",
"from langchain.schema.runnable import RunnableMap\n",
"\n",
"chain = RunnableMap({\"output_message\": ChatAnthropic(model=\"claude-2\")})\n",
"chain_with_history = RunnableWithMessageHistory(\n",
" chain,\n",
" lambda session_id: RedisChatMessageHistory(session_id, url=REDIS_URL),\n",
" output_messages_key=\"output_message\",\n",
")\n",
"\n",
"chain_with_history.invoke(\n",
" [HumanMessage(content=\"What did Simone de Beauvoir believe about free will\")],\n",
" config={\"configurable\": {\"session_id\": \"baz\"}},\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "601ce3ff-aea8-424d-8e54-fd614256af4f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'output_message': AIMessage(content=\" There are many similarities between Simone de Beauvoir's views on free will and those of Jean-Paul Sartre, though some key differences emerge as well:\\n\\nSimilarities with Sartre:\\n\\n- Both were existentialist thinkers who rejected determinism and emphasized human freedom and responsibility.\\n\\n- They agreed that existence precedes essence - there is no predefined human nature that determines who we are.\\n\\n- Individuals must define themselves through their choices and actions. This leads to anxiety but also freedom.\\n\\n- The human condition is characterized by ambiguity and uncertainty, rather than fixed meanings/values.\\n\\n- Both felt that most people evade their freedom through self-deception, conformity, or adopting collective identities/values uncritically.\\n\\nDifferences from Sartre: \\n\\n- Sartre placed more emphasis on the burden and anguish of radical freedom. De Beauvoir focused more on its positive potential.\\n\\n- De Beauvoir critiqued Sartre's premise that human relations are necessarily conflictual. She saw more potential for mutual recognition.\\n\\n- Sartre saw the Other's gaze as a threat to freedom. De Beauvoir put more stress on how the Other's gaze can confirm\")}"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain_with_history.invoke(\n",
" [HumanMessage(content=\"How did this compare to Sartre\")],\n",
" config={\"configurable\": {\"session_id\": \"baz\"}},\n",
")"
]
},
{
"cell_type": "markdown",
"id": "b898d1b1-11e6-4d30-a8dd-cc5e45533611",
"metadata": {},
"source": [
":::tip [LangSmith trace](https://smith.langchain.com/public/f6c3e1d1-a49d-4955-a9fa-c6519df74fa7/r)\n",
":::"
]
},
{
"cell_type": "markdown",
"id": "1724292c-01c6-44bb-83e8-9cdb6bf01483",
"metadata": {},
"source": [
"## More examples\n",
"\n",
"We could also do any of the below:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fd89240b-5a25-48f8-9568-5c1127f9ffad",
"metadata": {},
"outputs": [],
"source": [
"from operator import itemgetter\n",
"\n",
"# messages in, messages out\n",
"RunnableWithMessageHistory(\n",
" ChatAnthropic(model=\"claude-2\"),\n",
" lambda session_id: RedisChatMessageHistory(session_id, url=REDIS_URL),\n",
")\n",
"\n",
"# dict with single key for all messages in, messages out\n",
"RunnableWithMessageHistory(\n",
" itemgetter(\"input_messages\") | ChatAnthropic(model=\"claude-2\"),\n",
" lambda session_id: RedisChatMessageHistory(session_id, url=REDIS_URL),\n",
" input_messages_key=\"input_messages\",\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "poetry-venv",
"language": "python",
"name": "poetry-venv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -40,8 +40,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.prompts import PromptTemplate\n",
"from langchain.chat_models import ChatAnthropic\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.schema.output_parser import StrOutputParser"
]
},

View File

@@ -20,7 +20,7 @@ Whenever your LCEL chains have steps that can be executed in parallel (eg if you
Configure retries and fallbacks for any part of your LCEL chain. This is a great way to make your chains more reliable at scale. Were currently working on adding streaming support for retries/fallbacks, so you can get the added reliability without any latency cost.
**Access intermediate results**
For more complex chains its often very useful to access the results of intermediate steps even before the final output is produced. This can be used let end-users know something is happening, or even just to debug your chain. You can stream intermediate results, and its available on every [LangServe](/docs/langserve) server.
For more complex chains its often very useful to access the results of intermediate steps even before the final output is produced. This can be used to let end-users know something is happening, or even just to debug your chain. You can stream intermediate results, and its available on every [LangServe](/docs/langserve) server.
**Input and output schemas**
Input and output schemas give every LCEL chain Pydantic and JSONSchema schemas inferred from the structure of your chain. This can be used for validation of inputs and outputs, and is an integral part of LangServe.
@@ -30,4 +30,4 @@ As your chains get more and more complex, it becomes increasingly important to u
With LCEL, **all** steps are automatically logged to [LangSmith](/docs/langsmith/) for maximum observability and debuggability.
**Seamless LangServe deployment integration**
Any chain created with LCEL can be easily deployed using LangServe.
Any chain created with LCEL can be easily deployed using [LangServe](/docs/langserve).

View File

@@ -57,8 +57,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.prompts import ChatPromptTemplate\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.prompts import ChatPromptTemplate\n",
"\n",
"model = ChatOpenAI()\n",
"prompt = ChatPromptTemplate.from_template(\"tell me a joke about {topic}\")\n",

View File

@@ -29,7 +29,7 @@ If you want to install from source, you can do so by cloning the repo and be sur
pip install -e .
```
## Langchain experimental
## LangChain experimental
The `langchain-experimental` package holds experimental LangChain code, intended for research and experimental uses.
Install with:
@@ -37,14 +37,6 @@ Install with:
pip install langchain-experimental
```
## LangChain CLI
The LangChain CLI is useful for working with LangChain templates and other LangServe projects.
Install with:
```bash
pip install langchain-cli
```
## LangServe
LangServe helps developers deploy LangChain runnables and chains as a REST API.
LangServe is automatically installed by LangChain CLI.
@@ -55,6 +47,14 @@ pip install "langserve[all]"
```
for both client and server dependencies. Or `pip install "langserve[client]"` for client code, and `pip install "langserve[server]"` for server code.
## LangChain CLI
The LangChain CLI is useful for working with LangChain templates and other LangServe projects.
Install with:
```bash
pip install langchain-cli
```
## LangSmith SDK
The LangSmith SDK is automatically installed by LangChain.
If not using LangChain, install with:

View File

@@ -14,7 +14,7 @@ This framework consists of several parts.
- **[LangServe](/docs/langserve)**: A library for deploying LangChain chains as a REST API.
- **[LangSmith](/docs/langsmith)**: A developer platform that lets you debug, test, evaluate, and monitor chains built on any LLM framework and seamlessly integrates with LangChain.
![LangChain Diagram](/img/langchain_stack.png)
![LangChain Diagram](/svg/langchain_stack.svg)
Together, these products simplify the entire application lifecycle:
- **Develop**: Write your applications in LangChain/LangChain.js. Hit the ground running using Templates for reference.
@@ -49,7 +49,7 @@ LCEL is a declarative way to compose chains. LCEL was designed from day 1 to sup
- **[Overview](/docs/expression_language/)**: LCEL and its benefits
- **[Interface](/docs/expression_language/interface)**: The standard interface for LCEL objects
- **[How-to](/docs/expression_language/interface)**: Key features of LCEL
- **[How-to](/docs/expression_language/how_to)**: Key features of LCEL
- **[Cookbook](/docs/expression_language/cookbook)**: Example code for accomplishing common tasks

View File

@@ -4,7 +4,7 @@ In this quickstart we'll show you how to:
- Get setup with LangChain, LangSmith and LangServe
- Use the most basic and common components of LangChain: prompt templates, models, and output parsers
- Use LangChain Expression Language, the protocol that LangChain is built on and which facilitates component chaining
- Build simple application with LangChain
- Build a simple application with LangChain
- Trace your application with LangSmith
- Serve your application with LangServe

View File

@@ -34,7 +34,8 @@
},
"outputs": [],
"source": [
"from typing import Optional, Any\n",
"from typing import Any, Optional\n",
"\n",
"from langchain.evaluation import PairwiseStringEvaluator\n",
"\n",
"\n",
@@ -116,10 +117,11 @@
},
"outputs": [],
"source": [
"from typing import Optional, Any\n",
"from langchain.evaluation import PairwiseStringEvaluator\n",
"from langchain.chat_models import ChatAnthropic\n",
"from typing import Any, Optional\n",
"\n",
"from langchain.chains import LLMChain\n",
"from langchain.chat_models import ChatAnthropic\n",
"from langchain.evaluation import PairwiseStringEvaluator\n",
"\n",
"\n",
"class CustomPreferenceEvaluator(PairwiseStringEvaluator):\n",

View File

@@ -1,448 +1,447 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Comparing Chain Outputs\n",
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/guides/evaluation/examples/comparisons.ipynb)\n",
"\n",
"Suppose you have two different prompts (or LLMs). How do you know which will generate \"better\" results?\n",
"\n",
"One automated way to predict the preferred configuration is to use a `PairwiseStringEvaluator` like the `PairwiseStringEvalChain`<a name=\"cite_ref-1\"></a>[<sup>[1]</sup>](#cite_note-1). This chain prompts an LLM to select which output is preferred, given a specific input.\n",
"\n",
"For this evaluation, we will need 3 things:\n",
"1. An evaluator\n",
"2. A dataset of inputs\n",
"3. 2 (or more) LLMs, Chains, or Agents to compare\n",
"\n",
"Then we will aggregate the results to determine the preferred model.\n",
"\n",
"### Step 1. Create the Evaluator\n",
"\n",
"In this example, you will use gpt-4 to select which output is preferred."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.evaluation import load_evaluator\n",
"\n",
"eval_chain = load_evaluator(\"pairwise_string\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Step 2. Select Dataset\n",
"\n",
"If you already have real usage data for your LLM, you can use a representative sample. More examples\n",
"provide more reliable results. We will use some example queries someone might have about how to use langchain here."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Found cached dataset parquet (/Users/wfh/.cache/huggingface/datasets/LangChainDatasets___parquet/LangChainDatasets--langchain-howto-queries-bbb748bbee7e77aa/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a2358d37246640ce95e0f9940194590a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from langchain.evaluation.loading import load_dataset\n",
"\n",
"dataset = load_dataset(\"langchain-howto-queries\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Step 3. Define Models to Compare\n",
"\n",
"We will be comparing two agents in this case."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.utilities import SerpAPIWrapper\n",
"from langchain.agents import initialize_agent, Tool\n",
"from langchain.agents import AgentType\n",
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"\n",
"# Initialize the language model\n",
"# You can add your own OpenAI API key by adding openai_api_key=\"<your_api_key>\"\n",
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
"\n",
"# Initialize the SerpAPIWrapper for search functionality\n",
"# Replace <your_api_key> in openai_api_key=\"<your_api_key>\" with your actual SerpAPI key.\n",
"search = SerpAPIWrapper()\n",
"\n",
"# Define a list of tools offered by the agent\n",
"tools = [\n",
" Tool(\n",
" name=\"Search\",\n",
" func=search.run,\n",
" coroutine=search.arun,\n",
" description=\"Useful when you need to answer questions about current events. You should ask targeted questions.\",\n",
" ),\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"functions_agent = initialize_agent(\n",
" tools, llm, agent=AgentType.OPENAI_MULTI_FUNCTIONS, verbose=False\n",
")\n",
"conversations_agent = initialize_agent(\n",
" tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=False\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Step 4. Generate Responses\n",
"\n",
"We will generate outputs for each of the models before evaluating them."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "87277cb39a1a4726bb7cc533a24e2ea4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/20 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from tqdm.notebook import tqdm\n",
"import asyncio\n",
"\n",
"results = []\n",
"agents = [functions_agent, conversations_agent]\n",
"concurrency_level = 6 # How many concurrent agents to run. May need to decrease if OpenAI is rate limiting.\n",
"\n",
"# We will only run the first 20 examples of this dataset to speed things up\n",
"# This will lead to larger confidence intervals downstream.\n",
"batch = []\n",
"for example in tqdm(dataset[:20]):\n",
" batch.extend([agent.acall(example[\"inputs\"]) for agent in agents])\n",
" if len(batch) >= concurrency_level:\n",
" batch_results = await asyncio.gather(*batch, return_exceptions=True)\n",
" results.extend(list(zip(*[iter(batch_results)] * 2)))\n",
" batch = []\n",
"if batch:\n",
" batch_results = await asyncio.gather(*batch, return_exceptions=True)\n",
" results.extend(list(zip(*[iter(batch_results)] * 2)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 5. Evaluate Pairs\n",
"\n",
"Now it's time to evaluate the results. For each agent response, run the evaluation chain to select which output is preferred (or return a tie).\n",
"\n",
"Randomly select the input order to reduce the likelihood that one model will be preferred just because it is presented first."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import random\n",
"\n",
"\n",
"def predict_preferences(dataset, results) -> list:\n",
" preferences = []\n",
"\n",
" for example, (res_a, res_b) in zip(dataset, results):\n",
" input_ = example[\"inputs\"]\n",
" # Flip a coin to reduce persistent position bias\n",
" if random.random() < 0.5:\n",
" pred_a, pred_b = res_a, res_b\n",
" a, b = \"a\", \"b\"\n",
" else:\n",
" pred_a, pred_b = res_b, res_a\n",
" a, b = \"b\", \"a\"\n",
" eval_res = eval_chain.evaluate_string_pairs(\n",
" prediction=pred_a[\"output\"] if isinstance(pred_a, dict) else str(pred_a),\n",
" prediction_b=pred_b[\"output\"] if isinstance(pred_b, dict) else str(pred_b),\n",
" input=input_,\n",
" )\n",
" if eval_res[\"value\"] == \"A\":\n",
" preferences.append(a)\n",
" elif eval_res[\"value\"] == \"B\":\n",
" preferences.append(b)\n",
" else:\n",
" preferences.append(None) # No preference\n",
" return preferences"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"preferences = predict_preferences(dataset, results)"
]
},
{
"cell_type": "markdown",
"metadata": {
"tags": []
},
"source": [
"**Print out the ratio of preferences.**"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"OpenAI Functions Agent: 95.00%\n",
"None: 5.00%\n"
]
}
],
"source": [
"from collections import Counter\n",
"\n",
"name_map = {\n",
" \"a\": \"OpenAI Functions Agent\",\n",
" \"b\": \"Structured Chat Agent\",\n",
"}\n",
"counts = Counter(preferences)\n",
"pref_ratios = {k: v / len(preferences) for k, v in counts.items()}\n",
"for k, v in pref_ratios.items():\n",
" print(f\"{name_map.get(k)}: {v:.2%}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Estimate Confidence Intervals\n",
"\n",
"The results seem pretty clear, but if you want to have a better sense of how confident we are, that model \"A\" (the OpenAI Functions Agent) is the preferred model, we can calculate confidence intervals. \n",
"\n",
"Below, use the Wilson score to estimate the confidence interval."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from math import sqrt\n",
"\n",
"\n",
"def wilson_score_interval(\n",
" preferences: list, which: str = \"a\", z: float = 1.96\n",
") -> tuple:\n",
" \"\"\"Estimate the confidence interval using the Wilson score.\n",
"\n",
" See: https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval\n",
" for more details, including when to use it and when it should not be used.\n",
" \"\"\"\n",
" total_preferences = preferences.count(\"a\") + preferences.count(\"b\")\n",
" n_s = preferences.count(which)\n",
"\n",
" if total_preferences == 0:\n",
" return (0, 0)\n",
"\n",
" p_hat = n_s / total_preferences\n",
"\n",
" denominator = 1 + (z**2) / total_preferences\n",
" adjustment = (z / denominator) * sqrt(\n",
" p_hat * (1 - p_hat) / total_preferences\n",
" + (z**2) / (4 * total_preferences * total_preferences)\n",
" )\n",
" center = (p_hat + (z**2) / (2 * total_preferences)) / denominator\n",
" lower_bound = min(max(center - adjustment, 0.0), 1.0)\n",
" upper_bound = min(max(center + adjustment, 0.0), 1.0)\n",
"\n",
" return (lower_bound, upper_bound)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The \"OpenAI Functions Agent\" would be preferred between 83.18% and 100.00% percent of the time (with 95% confidence).\n",
"The \"Structured Chat Agent\" would be preferred between 0.00% and 16.82% percent of the time (with 95% confidence).\n"
]
}
],
"source": [
"for which_, name in name_map.items():\n",
" low, high = wilson_score_interval(preferences, which=which_)\n",
" print(\n",
" f'The \"{name}\" would be preferred between {low:.2%} and {high:.2%} percent of the time (with 95% confidence).'\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Print out the p-value.**"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The p-value is 0.00000. If the null hypothesis is true (i.e., if the selected eval chain actually has no preference between the models),\n",
"then there is a 0.00038% chance of observing the OpenAI Functions Agent be preferred at least 19\n",
"times out of 19 trials.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/gf/6rnp_mbx5914kx7qmmh7xzmw0000gn/T/ipykernel_15978/384907688.py:6: DeprecationWarning: 'binom_test' is deprecated in favour of 'binomtest' from version 1.7.0 and will be removed in Scipy 1.12.0.\n",
" p_value = stats.binom_test(successes, n, p=0.5, alternative=\"two-sided\")\n"
]
}
],
"source": [
"from scipy import stats\n",
"\n",
"preferred_model = max(pref_ratios, key=pref_ratios.get)\n",
"successes = preferences.count(preferred_model)\n",
"n = len(preferences) - preferences.count(None)\n",
"p_value = stats.binom_test(successes, n, p=0.5, alternative=\"two-sided\")\n",
"print(\n",
" f\"\"\"The p-value is {p_value:.5f}. If the null hypothesis is true (i.e., if the selected eval chain actually has no preference between the models),\n",
"then there is a {p_value:.5%} chance of observing the {name_map.get(preferred_model)} be preferred at least {successes}\n",
"times out of {n} trials.\"\"\"\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a name=\"cite_note-1\"></a>_1. Note: Automated evals are still an open research topic and are best used alongside other evaluation approaches. \n",
"LLM preferences exhibit biases, including banal ones like the order of outputs.\n",
"In choosing preferences, \"ground truth\" may not be taken into account, which may lead to scores that aren't grounded in utility._"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Comparing Chain Outputs\n",
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/guides/evaluation/examples/comparisons.ipynb)\n",
"\n",
"Suppose you have two different prompts (or LLMs). How do you know which will generate \"better\" results?\n",
"\n",
"One automated way to predict the preferred configuration is to use a `PairwiseStringEvaluator` like the `PairwiseStringEvalChain`<a name=\"cite_ref-1\"></a>[<sup>[1]</sup>](#cite_note-1). This chain prompts an LLM to select which output is preferred, given a specific input.\n",
"\n",
"For this evaluation, we will need 3 things:\n",
"1. An evaluator\n",
"2. A dataset of inputs\n",
"3. 2 (or more) LLMs, Chains, or Agents to compare\n",
"\n",
"Then we will aggregate the results to determine the preferred model.\n",
"\n",
"### Step 1. Create the Evaluator\n",
"\n",
"In this example, you will use gpt-4 to select which output is preferred."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.evaluation import load_evaluator\n",
"\n",
"eval_chain = load_evaluator(\"pairwise_string\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Step 2. Select Dataset\n",
"\n",
"If you already have real usage data for your LLM, you can use a representative sample. More examples\n",
"provide more reliable results. We will use some example queries someone might have about how to use langchain here."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Found cached dataset parquet (/Users/wfh/.cache/huggingface/datasets/LangChainDatasets___parquet/LangChainDatasets--langchain-howto-queries-bbb748bbee7e77aa/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)\n"
]
},
"nbformat": 4,
"nbformat_minor": 4
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a2358d37246640ce95e0f9940194590a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from langchain.evaluation.loading import load_dataset\n",
"\n",
"dataset = load_dataset(\"langchain-howto-queries\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Step 3. Define Models to Compare\n",
"\n",
"We will be comparing two agents in this case."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.agents import AgentType, Tool, initialize_agent\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.utilities import SerpAPIWrapper\n",
"\n",
"# Initialize the language model\n",
"# You can add your own OpenAI API key by adding openai_api_key=\"<your_api_key>\"\n",
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
"\n",
"# Initialize the SerpAPIWrapper for search functionality\n",
"# Replace <your_api_key> in openai_api_key=\"<your_api_key>\" with your actual SerpAPI key.\n",
"search = SerpAPIWrapper()\n",
"\n",
"# Define a list of tools offered by the agent\n",
"tools = [\n",
" Tool(\n",
" name=\"Search\",\n",
" func=search.run,\n",
" coroutine=search.arun,\n",
" description=\"Useful when you need to answer questions about current events. You should ask targeted questions.\",\n",
" ),\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"functions_agent = initialize_agent(\n",
" tools, llm, agent=AgentType.OPENAI_MULTI_FUNCTIONS, verbose=False\n",
")\n",
"conversations_agent = initialize_agent(\n",
" tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=False\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Step 4. Generate Responses\n",
"\n",
"We will generate outputs for each of the models before evaluating them."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "87277cb39a1a4726bb7cc533a24e2ea4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/20 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import asyncio\n",
"\n",
"from tqdm.notebook import tqdm\n",
"\n",
"results = []\n",
"agents = [functions_agent, conversations_agent]\n",
"concurrency_level = 6 # How many concurrent agents to run. May need to decrease if OpenAI is rate limiting.\n",
"\n",
"# We will only run the first 20 examples of this dataset to speed things up\n",
"# This will lead to larger confidence intervals downstream.\n",
"batch = []\n",
"for example in tqdm(dataset[:20]):\n",
" batch.extend([agent.acall(example[\"inputs\"]) for agent in agents])\n",
" if len(batch) >= concurrency_level:\n",
" batch_results = await asyncio.gather(*batch, return_exceptions=True)\n",
" results.extend(list(zip(*[iter(batch_results)] * 2)))\n",
" batch = []\n",
"if batch:\n",
" batch_results = await asyncio.gather(*batch, return_exceptions=True)\n",
" results.extend(list(zip(*[iter(batch_results)] * 2)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 5. Evaluate Pairs\n",
"\n",
"Now it's time to evaluate the results. For each agent response, run the evaluation chain to select which output is preferred (or return a tie).\n",
"\n",
"Randomly select the input order to reduce the likelihood that one model will be preferred just because it is presented first."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import random\n",
"\n",
"\n",
"def predict_preferences(dataset, results) -> list:\n",
" preferences = []\n",
"\n",
" for example, (res_a, res_b) in zip(dataset, results):\n",
" input_ = example[\"inputs\"]\n",
" # Flip a coin to reduce persistent position bias\n",
" if random.random() < 0.5:\n",
" pred_a, pred_b = res_a, res_b\n",
" a, b = \"a\", \"b\"\n",
" else:\n",
" pred_a, pred_b = res_b, res_a\n",
" a, b = \"b\", \"a\"\n",
" eval_res = eval_chain.evaluate_string_pairs(\n",
" prediction=pred_a[\"output\"] if isinstance(pred_a, dict) else str(pred_a),\n",
" prediction_b=pred_b[\"output\"] if isinstance(pred_b, dict) else str(pred_b),\n",
" input=input_,\n",
" )\n",
" if eval_res[\"value\"] == \"A\":\n",
" preferences.append(a)\n",
" elif eval_res[\"value\"] == \"B\":\n",
" preferences.append(b)\n",
" else:\n",
" preferences.append(None) # No preference\n",
" return preferences"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"preferences = predict_preferences(dataset, results)"
]
},
{
"cell_type": "markdown",
"metadata": {
"tags": []
},
"source": [
"**Print out the ratio of preferences.**"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"OpenAI Functions Agent: 95.00%\n",
"None: 5.00%\n"
]
}
],
"source": [
"from collections import Counter\n",
"\n",
"name_map = {\n",
" \"a\": \"OpenAI Functions Agent\",\n",
" \"b\": \"Structured Chat Agent\",\n",
"}\n",
"counts = Counter(preferences)\n",
"pref_ratios = {k: v / len(preferences) for k, v in counts.items()}\n",
"for k, v in pref_ratios.items():\n",
" print(f\"{name_map.get(k)}: {v:.2%}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Estimate Confidence Intervals\n",
"\n",
"The results seem pretty clear, but if you want to have a better sense of how confident we are, that model \"A\" (the OpenAI Functions Agent) is the preferred model, we can calculate confidence intervals. \n",
"\n",
"Below, use the Wilson score to estimate the confidence interval."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from math import sqrt\n",
"\n",
"\n",
"def wilson_score_interval(\n",
" preferences: list, which: str = \"a\", z: float = 1.96\n",
") -> tuple:\n",
" \"\"\"Estimate the confidence interval using the Wilson score.\n",
"\n",
" See: https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval\n",
" for more details, including when to use it and when it should not be used.\n",
" \"\"\"\n",
" total_preferences = preferences.count(\"a\") + preferences.count(\"b\")\n",
" n_s = preferences.count(which)\n",
"\n",
" if total_preferences == 0:\n",
" return (0, 0)\n",
"\n",
" p_hat = n_s / total_preferences\n",
"\n",
" denominator = 1 + (z**2) / total_preferences\n",
" adjustment = (z / denominator) * sqrt(\n",
" p_hat * (1 - p_hat) / total_preferences\n",
" + (z**2) / (4 * total_preferences * total_preferences)\n",
" )\n",
" center = (p_hat + (z**2) / (2 * total_preferences)) / denominator\n",
" lower_bound = min(max(center - adjustment, 0.0), 1.0)\n",
" upper_bound = min(max(center + adjustment, 0.0), 1.0)\n",
"\n",
" return (lower_bound, upper_bound)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The \"OpenAI Functions Agent\" would be preferred between 83.18% and 100.00% percent of the time (with 95% confidence).\n",
"The \"Structured Chat Agent\" would be preferred between 0.00% and 16.82% percent of the time (with 95% confidence).\n"
]
}
],
"source": [
"for which_, name in name_map.items():\n",
" low, high = wilson_score_interval(preferences, which=which_)\n",
" print(\n",
" f'The \"{name}\" would be preferred between {low:.2%} and {high:.2%} percent of the time (with 95% confidence).'\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Print out the p-value.**"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The p-value is 0.00000. If the null hypothesis is true (i.e., if the selected eval chain actually has no preference between the models),\n",
"then there is a 0.00038% chance of observing the OpenAI Functions Agent be preferred at least 19\n",
"times out of 19 trials.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/gf/6rnp_mbx5914kx7qmmh7xzmw0000gn/T/ipykernel_15978/384907688.py:6: DeprecationWarning: 'binom_test' is deprecated in favour of 'binomtest' from version 1.7.0 and will be removed in Scipy 1.12.0.\n",
" p_value = stats.binom_test(successes, n, p=0.5, alternative=\"two-sided\")\n"
]
}
],
"source": [
"from scipy import stats\n",
"\n",
"preferred_model = max(pref_ratios, key=pref_ratios.get)\n",
"successes = preferences.count(preferred_model)\n",
"n = len(preferences) - preferences.count(None)\n",
"p_value = stats.binom_test(successes, n, p=0.5, alternative=\"two-sided\")\n",
"print(\n",
" f\"\"\"The p-value is {p_value:.5f}. If the null hypothesis is true (i.e., if the selected eval chain actually has no preference between the models),\n",
"then there is a {p_value:.5%} chance of observing the {name_map.get(preferred_model)} be preferred at least {successes}\n",
"times out of {n} trials.\"\"\"\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a name=\"cite_note-1\"></a>_1. Note: Automated evals are still an open research topic and are best used alongside other evaluation approaches. \n",
"LLM preferences exhibit biases, including banal ones like the order of outputs.\n",
"In choosing preferences, \"ground truth\" may not be taken into account, which may lead to scores that aren't grounded in utility._"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@@ -1,209 +1,209 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "4460f924-1738-4dc5-999f-c26383aba0a4",
"metadata": {},
"source": [
"# Custom String Evaluator\n",
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/guides/evaluation/string/custom.ipynb)\n",
"\n",
"You can make your own custom string evaluators by inheriting from the `StringEvaluator` class and implementing the `_evaluate_strings` (and `_aevaluate_strings` for async support) methods.\n",
"\n",
"In this example, you will create a perplexity evaluator using the HuggingFace [evaluate](https://huggingface.co/docs/evaluate/index) library.\n",
"[Perplexity](https://en.wikipedia.org/wiki/Perplexity) is a measure of how well the generated text would be predicted by the model used to compute the metric."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "90ec5942-4b14-47b1-baff-9dd2a9f17a4e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# %pip install evaluate > /dev/null"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "54fdba68-0ae7-4102-a45b-dabab86c97ac",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from typing import Any, Optional\n",
"\n",
"from langchain.evaluation import StringEvaluator\n",
"from evaluate import load\n",
"\n",
"\n",
"class PerplexityEvaluator(StringEvaluator):\n",
" \"\"\"Evaluate the perplexity of a predicted string.\"\"\"\n",
"\n",
" def __init__(self, model_id: str = \"gpt2\"):\n",
" self.model_id = model_id\n",
" self.metric_fn = load(\n",
" \"perplexity\", module_type=\"metric\", model_id=self.model_id, pad_token=0\n",
" )\n",
"\n",
" def _evaluate_strings(\n",
" self,\n",
" *,\n",
" prediction: str,\n",
" reference: Optional[str] = None,\n",
" input: Optional[str] = None,\n",
" **kwargs: Any,\n",
" ) -> dict:\n",
" results = self.metric_fn.compute(\n",
" predictions=[prediction], model_id=self.model_id\n",
" )\n",
" ppl = results[\"perplexities\"][0]\n",
" return {\"score\": ppl}"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "52767568-8075-4f77-93c9-80e1a7e5cba3",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"evaluator = PerplexityEvaluator()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "697ee0c0-d1ae-4a55-a542-a0f8e602c28a",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using pad_token, but it is not set yet.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "467109d44654486e8b415288a319fc2c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"{'score': 190.3675537109375}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"evaluator.evaluate_strings(prediction=\"The rains in Spain fall mainly on the plain.\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "5089d9d1-eae6-4d47-b4f6-479e5d887d74",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using pad_token, but it is not set yet.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d3266f6f06d746e1bb03ce4aca07d9b9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"{'score': 1982.0709228515625}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# The perplexity is much higher since LangChain was introduced after 'gpt-2' was released and because it is never used in the following context.\n",
"evaluator.evaluate_strings(prediction=\"The rains in Spain fall mainly on LangChain.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5eaa178f-6ba3-47ae-b3dc-1b196af6d213",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
"cells": [
{
"cell_type": "markdown",
"id": "4460f924-1738-4dc5-999f-c26383aba0a4",
"metadata": {},
"source": [
"# Custom String Evaluator\n",
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/guides/evaluation/string/custom.ipynb)\n",
"\n",
"You can make your own custom string evaluators by inheriting from the `StringEvaluator` class and implementing the `_evaluate_strings` (and `_aevaluate_strings` for async support) methods.\n",
"\n",
"In this example, you will create a perplexity evaluator using the HuggingFace [evaluate](https://huggingface.co/docs/evaluate/index) library.\n",
"[Perplexity](https://en.wikipedia.org/wiki/Perplexity) is a measure of how well the generated text would be predicted by the model used to compute the metric."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "90ec5942-4b14-47b1-baff-9dd2a9f17a4e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# %pip install evaluate > /dev/null"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "54fdba68-0ae7-4102-a45b-dabab86c97ac",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from typing import Any, Optional\n",
"\n",
"from evaluate import load\n",
"from langchain.evaluation import StringEvaluator\n",
"\n",
"\n",
"class PerplexityEvaluator(StringEvaluator):\n",
" \"\"\"Evaluate the perplexity of a predicted string.\"\"\"\n",
"\n",
" def __init__(self, model_id: str = \"gpt2\"):\n",
" self.model_id = model_id\n",
" self.metric_fn = load(\n",
" \"perplexity\", module_type=\"metric\", model_id=self.model_id, pad_token=0\n",
" )\n",
"\n",
" def _evaluate_strings(\n",
" self,\n",
" *,\n",
" prediction: str,\n",
" reference: Optional[str] = None,\n",
" input: Optional[str] = None,\n",
" **kwargs: Any,\n",
" ) -> dict:\n",
" results = self.metric_fn.compute(\n",
" predictions=[prediction], model_id=self.model_id\n",
" )\n",
" ppl = results[\"perplexities\"][0]\n",
" return {\"score\": ppl}"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "52767568-8075-4f77-93c9-80e1a7e5cba3",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"evaluator = PerplexityEvaluator()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "697ee0c0-d1ae-4a55-a542-a0f8e602c28a",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using pad_token, but it is not set yet.\n"
]
},
"nbformat": 4,
"nbformat_minor": 5
{
"name": "stdout",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "467109d44654486e8b415288a319fc2c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"{'score': 190.3675537109375}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"evaluator.evaluate_strings(prediction=\"The rains in Spain fall mainly on the plain.\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "5089d9d1-eae6-4d47-b4f6-479e5d887d74",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using pad_token, but it is not set yet.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d3266f6f06d746e1bb03ce4aca07d9b9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"{'score': 1982.0709228515625}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# The perplexity is much higher since LangChain was introduced after 'gpt-2' was released and because it is never used in the following context.\n",
"evaluator.evaluate_strings(prediction=\"The rains in Spain fall mainly on LangChain.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5eaa178f-6ba3-47ae-b3dc-1b196af6d213",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -33,7 +33,7 @@
}
],
"source": [
"from langchain.evaluation import JsonValidityEvaluator, load_evaluator\n",
"from langchain.evaluation import JsonValidityEvaluator\n",
"\n",
"evaluator = JsonValidityEvaluator()\n",
"# Equivalently\n",

View File

@@ -24,8 +24,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.evaluation import load_evaluator\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.evaluation import load_evaluator\n",
"\n",
"evaluator = load_evaluator(\"labeled_score_string\", llm=ChatOpenAI(model=\"gpt-4\"))"
]

Some files were not shown because too many files have changed in this diff Show More