Compare commits

..

138 Commits

Author SHA1 Message Date
Bagatur
26c86a197c bump 228 (#7393) 2023-07-08 03:05:20 -04:00
SvMax
1d649b127e Added param to return only a structured json from the get_format_instructions method (#5848)
I just added a parameter to the method get_format_instructions, to
return directly the JSON instructions without the leading instruction
sentence. I'm planning to use it to define the structure of a JSON
object passed in input, the get_format_instructions().

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-07-08 02:57:26 -04:00
Bagatur
362bc301df fix jina (#7392) 2023-07-08 02:41:54 -04:00
Delgermurun
a1603fccfb integrate JinaChat (#6927)
Integration with https://chat.jina.ai/api. It is OpenAI compatible API.

- Twitter handle:
[https://twitter.com/JinaAI_](https://twitter.com/JinaAI_)

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-07-08 02:17:04 -04:00
William FH
4ba7396f96 Add single run eval loader (#7390)
Plus 
- add evaluation name to make string and embedding validators work with
the run evaluator loader.
- Rm unused root validator
2023-07-07 23:06:49 -07:00
Roger Yu
633b673b85 Update pinecone.ipynb (#7382)
Fix typo
2023-07-08 01:48:03 -04:00
Oleg Zabluda
4d697d3f24 Allow passing custom prompts to GraphIndexCreator (#7381)
---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-07-08 01:47:53 -04:00
William FH
612a74eb7e Make Ref Example Threadsafe (#7383)
Have noticed transient ref example misalignment. I believe this is
caused by the logic of assigning an example within the thread executor
rather than before.
2023-07-07 21:50:42 -07:00
William FH
4789c99bc2 Add String Distance and Embedding Evaluators (#7123)
Add a string evaluator and pairwise string evaluator implementation for:
- Embedding distance
- String distance

Update docs
2023-07-07 21:44:31 -07:00
ljeagle
fb6e63dc36 Upgrade the AwaDB from 0.3.5 to 0.3.6 (#7363) 2023-07-07 20:41:17 -07:00
William FH
c5edbea34a Load Run Evaluator (#7101)
Current problems:
1. Evaluating LLMs or Chat models isn't smooth. Even specifying
'generations' as the output inserts a redundant list into the eval
template
2. Configuring input / prediction / reference keys in the
`get_qa_evaluator` function is confusing. Unless you are using a chain
with the default keys, you have to specify all the variables and need to
reason about whether the key corresponds to the traced run's inputs,
outputs or the examples inputs or outputs.


Proposal:
- Configure the run evaluator according to a model. Use the model type
and input/output keys to assert compatibility where possible. Only need
to specify a reference_key for certain evaluators (which is less
confusing than specifying input keys)


When does this work:
- If you have your langchain model available (assumed always for
run_on_dataset flow)
- If you are evaluating an LLM, Chat model, or chain
- If the LLM or chat models are traced by langchain (wouldn't work if
you add an incompatible schema via the REST API)

When would this fail:
- Currently if you directly create an example from an LLM run, the
outputs are generations with all the extra metadata present. A simple
`example_key` and dumping all to the template could make the evaluations
unreliable
- Doesn't help if you're not using the low level API
- If you want to instantiate the evaluator without instantiating your
chain or LLM (maybe common for monitoring, for instance) -> could also
load from run or run type though

What's ugly:
- Personally think it's better to load evaluators one by one since
passing a config down is pretty confusing.
- Lots of testing needs to be added
- Inconsistent in that it makes a separate run and example input mapper
instead of the original `RunEvaluatorInputMapper`, which maps a run and
example to a single input.

Example usage running the for an LLM, Chat Model, and Agent.

```
# Test running for the string evaluators
evaluator_names = ["qa", "criteria"]

model = ChatOpenAI()
configured_evaluators = load_run_evaluators_for_model(evaluator_names, model=model, reference_key="answer")
run_on_dataset(ds_name, model, run_evaluators=configured_evaluators)
```


<details>
  <summary>Full code with dataset upload</summary>
```
## Create dataset
from langchain.evaluation.run_evaluators.loading import load_run_evaluators_for_model
from langchain.evaluation import load_dataset
import pandas as pd

lcds = load_dataset("llm-math")
df = pd.DataFrame(lcds)

from uuid import uuid4
from langsmith import Client
client = Client()
ds_name = "llm-math - " + str(uuid4())[0:8]
ds = client.upload_dataframe(df, name=ds_name, input_keys=["question"], output_keys=["answer"])



## Define the models we'll test over
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, AgentType

from langchain.tools import tool

llm = OpenAI(temperature=0)
chat_model = ChatOpenAI(temperature=0)

@tool
    def sum(a: float, b: float) -> float:
        """Add two numbers"""
        return a + b
    
def construct_agent():
    return initialize_agent(
        llm=chat_model,
        tools=[sum],
        agent=AgentType.OPENAI_MULTI_FUNCTIONS,
    )

agent = construct_agent()

# Test running for the string evaluators
evaluator_names = ["qa", "criteria"]

models = [llm, chat_model, agent]
run_evaluators = []
for model in models:
    run_evaluators.append(load_run_evaluators_for_model(evaluator_names, model=model, reference_key="answer"))
    

# Run on LLM, Chat Model, and Agent
from langchain.client.runner_utils import run_on_dataset

to_test = [llm, chat_model, construct_agent]

for model, configured_evaluators in zip(to_test, run_evaluators):
    run_on_dataset(ds_name, model, run_evaluators=configured_evaluators, verbose=True)
```
</details>

---------

Co-authored-by: Nuno Campos <nuno@boringbits.io>
2023-07-07 19:57:59 -07:00
Bagatur
1ac347b4e3 update databerry-chaindesk redirect (#7378) 2023-07-07 19:11:46 -04:00
Joshua Carroll
705d2f5b92 Update the API Reference link in Streamlit integration docs (#7377)
This page:


https://python.langchain.com/docs/modules/callbacks/integrations/streamlit

Has a bad API Reference link currently. This PR fixes it to the correct
link.

Also updates the embedded app link to
https://langchain-mrkl.streamlit.app/ (better name) which is hosted in
langchain-ai/streamlit-agent repo
2023-07-07 17:35:57 -04:00
Georges Petrov
ec033ae277 Rename Databerry to Chaindesk (#7022)
---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-07-07 17:28:04 -04:00
Philip Meier
da5b0723d2 update MosaicML inputs and outputs (#7348)
As of today (July 7, 2023), the [MosaicML
API](https://docs.mosaicml.com/en/latest/inference.html#text-completion-requests)
uses `"inputs"` for the prompt

This PR adds support for this new format.
---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-07-07 17:23:11 -04:00
Bearnardd
184ede4e48 Fix buggy output from GraphQAChain (#7372)
fixes https://github.com/hwchase17/langchain/issues/7289
A simple fix of the buggy output of `graph_qa`. If we have several
entities with triplets then the last entry of `triplets` for a given
entity merges with the first entry of the `triplets` of the next entity.
2023-07-07 17:19:53 -04:00
Harrison Chase
7cdf97ba9b Harrison/add to imports (#7370)
pgvector cleanup
2023-07-07 16:27:44 -04:00
Bagatur
4d427b2397 Base language model docstrings (#7104) 2023-07-07 16:09:10 -04:00
ॐ shivam mamgain
2179d4eef8 Fix for KeyError in MlflowCallbackHandler (#7051)
- Description: `MlflowCallbackHandler` fails with `KeyError: "['name']
not in index"`. See https://github.com/hwchase17/langchain/issues/5770
for more details. Root cause is that LangChain does not pass "name" as a
part of `serialized` argument to `on_llm_start()` callback method. The
commit where this change was made is probably this:
18af149e91.
My bug fix derives "name" from "id" field.
  - Issue: https://github.com/hwchase17/langchain/issues/5770
---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-07-07 16:08:06 -04:00
Alex Gamble
df746ad821 Add a callback handler for Context (https://getcontext.ai) (#7151)
### Description

Adding a callback handler for Context. Context is a product analytics
platform for AI chat experiences to help you understand how users are
interacting with your product.

I've added the callback library + an example notebook showing its use.

### Dependencies

Requires the user to install the `context-python` library. The library
is lazily-loaded when the callback is instantiated.

### Announcing the feature

We spoke with Harrison a few weeks ago about also doing a blog post
announcing our integration, so will coordinate this with him. Our
Twitter handle for the company is @getcontextai, and the founders are
@_agamble and @HenrySG.

Thanks in advance!
2023-07-07 15:33:29 -04:00
Austin
c9a0f24646 Add verbose parameter for llamacpp (#7253)
**Title:** Add verbose parameter for llamacpp

**Description:**
This pull request adds a 'verbose' parameter to the llamacpp module. The
'verbose' parameter, when set to True, will enable the output of
detailed logs during the execution of the Llama model. This added
parameter can aid in debugging and understanding the internal processes
of the module.

The verbose parameter is a boolean that prints verbose output to stderr
when set to True. By default, the verbose parameter is set to True but
can be toggled off if less output is desired. This new parameter has
been added to the `validate_environment` method of the `LlamaCpp` class
which initializes the `llama_cpp.Llama` API:

```python
class LlamaCpp(LLM):
    ...
    @root_validator()
    def validate_environment(cls, values: Dict) -> Dict:
        ...
        model_param_names = [
            ...
            "verbose",  # New verbose parameter added
        ]
        ...
        values["client"] = Llama(model_path, **model_params)
        ...
```
---------

Signed-off-by: teleprint-me <77757836+teleprint-me@users.noreply.github.com>
2023-07-07 15:08:25 -04:00
Kenny
34a2755a54 Allow passing api key into OpenAIWhisperParser (#7281)
This just allows the user to pass in an api_key directly into
OpenAIWhisperParser. Very simple addition.
2023-07-07 15:07:45 -04:00
mrkhalil6
4e7d0c115b Add support for filters and namespaces in similarity search in Pinecone similarity_score_threshold (#7301)
At the moment, pinecone vectorStore does not support filters and
namespaces when using similarity_score_threshold search type.
In this PR, I've implemented that. It passes all the kwargs except
"score_threshold" as that is not a supported argument for method
"similarity_search_with_score".
---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-07-07 15:03:59 -04:00
Manuel Saelices
01dca1e438 Add context to an output parsing error on Pydantic schema to improve exception handling (#7344)
## Changes

- [X] Fill the `llm_output` param when there is an output parsing error
in a Pydantic schema so that we can get the original text that failed to
parse when handling the exception

## Background

With this change, we could do something like this:

```
output_parser = PydanticOutputParser(pydantic_object=pydantic_obj)
chain = ConversationChain(..., output_parser=output_parser)
try:
    response: PydanticSchema = chain.predict(input=input)
except OutputParserException as exc:
    logger.error(
        'OutputParserException while parsing chatbot response: %s', exc.llm_output,
    )
```
---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-07-07 14:49:37 -04:00
Raouf Chebri
1ac6deda89 update extension name (#7359)
hi @rlancemartin ,

We had a new deployment and the `pg_extension` creation command was
updated from `CREATE EXTENSION pg_embedding` to `CREATE EXTENSION
embedding`.

https://github.com/neondatabase/neon/pull/4646

The extension not made public yet. No users will be affected by this.
Will be public next week.

Please let me know if you have any questions.

Thank you in advance 🙏
2023-07-07 11:35:51 -07:00
William FH
4e180dc54e Unset Cache in Tests (#7362)
This is impacting other unit tests that use callbacks since the cache is
still set (just empty)
2023-07-07 11:05:09 -07:00
German Martin
3ce4e46c8c The Fellowship of the Vectors: New Embeddings Filter using clustering. (#7015)
Continuing with Tolkien inspired series of langchain tools. I bring to
you:
**The Fellowship of the Vectors**, AKA EmbeddingsClusteringFilter.
This document filter uses embeddings to group vectors together into
clusters, then allows you to pick an arbitrary number of documents
vector based on proximity to the cluster centers. That's a
representative sample of the cluster.

The original idea is from [Greg Kamradt](https://github.com/gkamradt)
from this video (Level4):
https://www.youtube.com/watch?v=qaPMdcCqtWk&t=365s

I added few tricks to make it a bit more versatile, so you can
parametrize what to do with duplicate documents in case of cluster
overlap: replace the duplicates with the next closest document or remove
it. This allow you to use it as an special kind of redundant filter too.
Additionally you can choose 2 diff orders: grouped by cluster or
respecting the original retriever scores.
In my use case I was using the docs grouped by cluster to run refine
chains per cluster to generate summarization over a large corpus of
documents.
Let me know if you want to change anything!

@rlancemartin, @eyurtsev, @hwchase17,

---------

Co-authored-by: rlm <pexpresss31@gmail.com>
2023-07-07 10:28:17 -07:00
Leonid Ganeline
b489466488 docs: dependents update 4 (#7360)
Updated links and counters of the `dependents` page.
2023-07-07 13:22:30 -04:00
William FH
38ca5c84cb Explicitly list requires_reference in function (#7357) 2023-07-07 10:04:03 -07:00
Harrison Chase
49b2b0e3c0 change embedding to None (#7355) 2023-07-07 12:33:03 -04:00
imaprogrammer
a2830e3056 Update chroma.py: Persist directory from client_settings if provided there (#7087)
Change details:
- Description: When calling db.persist(), a check prevents from it
proceeding as the constructor only sets member `_persist_directory` from
parameters. But the ChromaDB client settings also has this parameter,
and if the client_settings parameter is used without passing the
persist_directory (which is optional), the `persist` method raises
`ValueError` for not setting `_persist_directory`. This change fixes it
by setting the member `_persist_directory` variable from client_settings
if it is set, else uses the constructor parameter.
- Issue: I didn't find any github issue of this, but I discovered it
after calling the persist method
  - Dependencies: None
- Tag maintainer: vectorstore related change - @rlancemartin, @eyurtsev
  - Twitter handle: Don't have one :(

*Additional discussion*: We may need to discuss the way I implemented
the fallback using `or`.

---------

Co-authored-by: rlm <pexpresss31@gmail.com>
2023-07-07 09:20:27 -07:00
Bagatur
cb4e88e4fb bump 227 (#7354) 2023-07-07 11:52:35 -04:00
Bagatur
d1c7237034 openai fn update nb (#7352) 2023-07-07 11:52:21 -04:00
Bagatur
0ed2da7020 bump 226 (#7335) 2023-07-07 05:59:13 -04:00
Bagatur
1c8cff32f1 Generic OpenAI fn chain (#7270)
Add loading functions for openai function chains and add docs page
2023-07-07 05:44:53 -04:00
Bagatur
fd7145970f Output parser redirect (#7330)
Related to ##7311
2023-07-07 04:26:34 -04:00
OwenElliott
3074306ae1 Marqo Vector Store Examples & Type Hints (#7326)
This PR improves the example notebook for the Marqo vectorstore
implementation by adding a new RetrievalQAWithSourcesChain example. The
`embedding` parameter in `from_documents` has its type updated to
`Union[Embeddings, None]` and a default parameter of None because this
is ignored in Marqo.

This PR also upgrades the Marqo version to 0.11.0 to remove the device
parameter after a breaking change to the API.

Related to #7068 @tomhamer @hwchase17

---------

Co-authored-by: Tom Hamer <tom@marqo.ai>
2023-07-07 04:11:20 -04:00
Nayjest
5809c3d29d Pack of small fixes and refactorings that don't affect functionality (#6990)
Description: Pack of small fixes and refactorings that don't affect
functionality, just making code prettier & fixing some misspelling
(hand-filtered improvements proposed by SeniorAi.online, prototype of
code improving tool based on gpt4), agents and callbacks folders was
covered.

Dependencies: Nothing changed

Twitter: https://twitter.com/nayjest

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-07-07 03:40:49 -04:00
Bagatur
87f75cb322 Add base Chain docstrings (#7114) 2023-07-07 03:06:33 -04:00
Leonid Ganeline
284d40b7af docstrings top level update (#7173)
Updated docstrings so, that [API
Reference](https://api.python.langchain.com/en/latest/api_reference.html)
page has text in the second column (class/function/... description.
2023-07-07 02:42:28 -04:00
Stav Sapir
8d961b9e33 add preset ability to textgen llm (#7196)
add an ability for textgen llm to work with preset provided by text gen
webui API.

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-07-07 02:41:24 -04:00
Bagatur
a9c5b4bcea Bagatur/clarifai update (#7324)
This PR improves upon the Clarifai LangChain integration with improved docs, errors, args and the addition of embedding model support in LancChain for Clarifai's embedding models and an overview of the various ways you can integrate with Clarifai added to the docs.

---------

Co-authored-by: Matthew Zeiler <zeiler@clarifai.com>
2023-07-07 02:23:20 -04:00
Oleg Zabluda
9954eff8fd Rename prompt_template => _DEFAULT_GRAPH_QA_TEMPLATE and PROMPT => GRAPH_QA_PROMPT to make consistent with the rest of the files (#7250)
Rename prompt_template => _DEFAULT_GRAPH_QA_TEMPLATE to make consistent
with the rest of the file.
2023-07-07 02:17:40 -04:00
Nikhil Kumar Gupta
6095a0a310 Added number_of_head_rows to pandas agent parameters (#7271)
Description: Added number_of_head_rows as a parameter to pandas agent.
number_of_head_rows allows the user to select the number of rows to pass
with the prompt when include_df_in_prompt is True. This gives the
ability to control the token length and can be helpful in dealing with
large dataframe.
---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-07-07 02:17:26 -04:00
John Landahl
e047541b5f Corrected a typo in elasticsearch.ipynb (#7318)
Simple typo fix
2023-07-07 01:35:32 -04:00
Subsegment
152dc59060 docs : add cnosdb to Ecosystem Integrations (#7316)
- Implement a `from_cnosdb` method for the `SQLDatabase` class
  - Write CnosDB documentation and add it to Ecosystem Integrations
2023-07-07 01:35:22 -04:00
Bagatur
927c8eb91a Refac package version check (#7312) 2023-07-07 01:21:53 -04:00
Sparsh Jain
bac56618b4 Solving anthropic packaging version issue (#7306)
- Description: Solving, anthropic packaging version issue by clearing
the mixup from package.version that is being confused with version from
- importlib.metadata.version.

  - Issue: it fixes the issue #7283 
  - Maintainer: @hwchase17 

The following change has been explained in the comment -
https://github.com/hwchase17/langchain/issues/7283#issuecomment-1624328978
2023-07-06 19:35:42 -04:00
Jason B. Koh
d642609a23 Fix: Recognize List at from_function (#7178)
- Description: pydantic's `ModelField.type_` only exposes the native
data type but not complex type hints like `List`. Thus, generating a
Tool with `from_function` through function signature produces incorrect
argument schemas (e.g., `str` instead of `List[str]`)
  - Issue: N/A
  - Dependencies: N/A
  - Tag maintainer: @hinthornw
  - Twitter handle: `mapped`

All the unittest (with an additional one in this PR) passed, though I
didn't try integration tests...
2023-07-06 17:22:09 -04:00
Chathura Rathnayake
ec10787bc7 Fixed the confluence loader ".csv" files loading issue (#7195)
- Description: Sometimes there are csv attachments with the media type
"application/vnd.ms-excel". These files failed to be loaded via the xlrd
library. It throws a corrupted file error. I fixed it by separately
processing excel files using pandas. Excel files will be processed just
like before.

- Dependencies: pandas, os, io

---------

Co-authored-by: Chathura <chathurar@yaalalabs.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-07-06 17:21:43 -04:00
Andre Elizondo
b21c2f8704 Update docs for whylabs (langkit) callback handler (#7293)
- Description: Update docs for whylabs callback handler
  - Issue: none
  - Dependencies: none
  - Tag maintainer: @agola11 
  - Twitter handle: @useautomation @whylabs

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Jamie Broomall <jamie@whylabs.ai>
2023-07-06 17:21:28 -04:00
William FH
e736d60516 Load Evaluator (#6942)
Create a `load_evaluators()` function so you don't have to import all
the individual evaluator classes
2023-07-06 13:58:58 -07:00
David Duong
12d14f8947 Fix secrets serialisation for ChatAnthropic (#7300) 2023-07-06 21:57:12 +01:00
William FH
cb9ff6efb8 Add function call params to invocation params (#7240) 2023-07-06 13:56:07 -07:00
William FH
1f4a51cb9c Add Agent Trajectory Interface (#7122) 2023-07-06 13:33:33 -07:00
Bagatur
a6b39afe0e rm side nav (#7297) 2023-07-06 15:19:29 -04:00
Bruno Bornsztein
1a4ca3eff9 handle missing finish_reason (#7296)
In some cases, the OpenAI response is missing the `finish_reason`
attribute. It seems to happen when using Ada or Babbage and
`stream=true`, but I can't always reproduce it. This change just
gracefully handles the missing key.
2023-07-06 15:13:51 -04:00
Leonid Ganeline
6ff9e9b34a updated huggingface_hub examples (#7292)
Added examples for models:
- Google `Flan`
- TII `Falcon`
- Salesforce `XGen`
2023-07-06 15:04:37 -04:00
Avinash Raj
09acbb8410 Modified PromptLayerChatOpenAI class to support function call (#6366)
Introduction of newest function calling feature doesn't work properly
with PromptLayerChatOpenAI model since on the `_generate` method,
functions argument are not even getting passed to the `ChatOpenAI` base
class which results in empty `ai_message.additional_kwargs`

Fixes  #6365
2023-07-06 13:16:04 -04:00
Dídac Sabatés
e0cb3ea90c Fix sql_database.ipynb link (#6525)
Looks like the
[SQLDatabaseChain](https://langchain.readthedocs.io/en/latest/modules/chains/examples/sqlite.html)
in the SQL Database Agent page was broken I've change it to the SQL
Chain page
2023-07-06 13:07:37 -04:00
Leonid Ganeline
4450791edd docs: tutorials update (#7230)
updated `tutorials.mdx`:
- added a link to new `Deeplearning AI` course on LangChain
- added links to other tutorial videos
- fixed format

@baskaryan, @hwchase17
2023-07-06 12:44:23 -04:00
Diego Machado
a7ae35fe4e Fix duplicated sentence in documentation's introduction (#6351)
Fix duplicated sentence in documentation's introduction
2023-07-06 12:12:18 -04:00
Bagatur
681f2678a3 add elasticknn to init (#7284) 2023-07-06 11:58:24 -04:00
hayao-k
c23e16c459 docs: Fixed typos in Amazon Kendra Retriever documentation (#7261)
## Description
Fixed to the official service name Amazon Kendra.

## Tag maintainer
@baskaryan
2023-07-06 11:56:52 -04:00
zhujiangwei
8c371e12eb refactor BedrockEmbeddings class (#7266)
#### Description
refactor BedrockEmbeddings class to clean code as below:

1. inline content type and accept
2. rewrite input_body as a dictionary literal
3. no need to declare embeddings variable, so remove it
2023-07-06 11:56:30 -04:00
Chui
c7cf11b8ab Remove whitespace in filename (#7264) 2023-07-06 11:55:42 -04:00
Jan Kubica
fed64ae060 Chroma: add vector search with scores (#6864)
- Description: Adding to Chroma integration the option to run a
similarity search by a vector with relevance scores. Fixing two minor
typos.
  
  - Issue: The "lambda_mult" typo is related to #4861 
  
  - Maintainer: @rlancemartin, @eyurtsev
2023-07-06 10:01:55 -04:00
William FH
576880abc5 Re-use Trajectory Evaluator (#7248)
Use the trajectory eval chain in the run evaluation implementation and
update the prepare inputs method to apply to both asynca nd sync
2023-07-06 07:00:24 -07:00
zhaoshengbo
e8f24164f0 Improve the alibaba cloud opensearch vector store documentation (#6964)
Based on user feedback, we have improved the Alibaba Cloud OpenSearch
vector store documentation.

Co-authored-by: zhaoshengbo <shengbo.zsb@alibaba-inc.com>
2023-07-06 09:47:49 -04:00
Eduard van Valkenburg
ae5aa496ee PowerBI updates (#7143)
<!-- Thank you for contributing to LangChain!

Replace this comment with:
  - Description: a description of the change, 
  - Issue: the issue # it fixes (if applicable),
  - Dependencies: any dependencies required for this change,
- Tag maintainer: for a quicker response, tag the relevant maintainer
(see below),
- Twitter handle: we announce bigger features on Twitter. If your PR
gets announced and you'd like a mention, we'll gladly shout you out!

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
  2. an example notebook showing its use.

Maintainer responsibilities:
  - General / Misc / if you don't know who to tag: @baskaryan
  - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev
  - Models / Prompts: @hwchase17, @baskaryan
  - Memory: @hwchase17
  - Agents / Tools / Toolkits: @hinthornw
  - Tracing / Callbacks: @agola11
  - Async: @agola11

If no one reviews your PR within a few days, feel free to @-mention the
same people again.

See contribution guidelines for more information on how to write/run
tests, lint, etc:
https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md
 -->

Several updates for the PowerBI tools:

- Handle 0 records returned by requesting redo with different filtering
- Handle too large results by optionally tokenizing the result and
comparing against a max (change in signature, non-breaking)
- Implemented LLMChain with Chat for chat models for the tools. 
- Updates to the main prompt including tables
- Update to Tool prompt with TOPN function
- Split the tool prompt to allow the LLMChain with ChatPromptTemplate

Smaller fixes for stability.

For visibility: @hinthornw
2023-07-06 09:39:23 -04:00
emarco177
b9d6d4cd4c added template repo for CI/CD deployment on Google Cloud Run (#7218)
Replace this comment with:
- Description: added documentation for a template repo that helps
dockerizing and deploying a LangChain using a Cloud Build CI/CD pipeline
to Google Cloud build serverless
  - Issue: None,
  - Dependencies: None,
  - Tag maintainer: @baskaryan,
  - Twitter handle: EdenEmarco177

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
  2. an example notebook showing its use.
2023-07-06 09:38:38 -04:00
Leonid Kuligin
8b19f6a0da Added retries for Vertex LLM (#7219)
#7217

---------

Co-authored-by: Leonid Kuligin <kuligin@google.com>
2023-07-06 09:38:01 -04:00
William FH
ec66d5188c Add Better Errors for Comparison Chain (#7033)
+ change to ABC - this lets us add things like the evaluation name for
loading
2023-07-06 06:37:04 -07:00
Stefano Lottini
e61cfb6e99 FLARE Example notebook: switch to named arg to pass pydantic validation (#7267)
Adding the name of the parameter to comply with latest requirements by
Pydantic usage for BaseModels.
2023-07-06 09:32:00 -04:00
Sasmitha Manathunga
0c7a5cb206 Fix inconsistent behavior of CharacterTextSplitter when changing keep_separator (#7263)
- Description:
- When `keep_separator` is `True` the `_split_text_with_regex()` method
in `text_splitter` uses regex to split, but when `keep_separator` is
`False` it uses `str.split()`. This causes problems when the separator
is a special regex character like `.` or `*`. This PR fixes that by
using `re.split()` in both cases.
- Issue: #7262 
- Tag maintainer: @baskaryan
2023-07-06 09:30:03 -04:00
os1ma
b151d4257a docs: Update documentation for Wikipedia tool to use WikipediaQueryRun (#7258)
**Description**
In the following page, "Wikipedia" tool is explained.

https://python.langchain.com/docs/modules/agents/tools/integrations/wikipedia

However, the WikipediaAPIWrapper being used is not a tool. This PR
updated the documentation to use a tool WikipediaQueryRun.

**Issue**
None

**Tag maintainer**
Agents / Tools / Toolkits: @hinthornw
2023-07-06 09:29:38 -04:00
Jeroen Van Goey
887bb12287 Use correct Language for html_splitter (#7274)
`html_splitter` was using `Language.MARKDOWN`.
2023-07-06 09:24:25 -04:00
Shantanu Nair
f773c21723 Update supabase match_docs ddl and notebook to use expected id type (#7257)
- Description: Switch supabase match function DDL to use expected uuid
type instead of bigint
- Issue: https://github.com/hwchase17/langchain/issues/6743,
https://github.com/hwchase17/langchain/issues/7179
  - Tag maintainer:  @rlancemartin, @eyurtsev
  - Twitter handle: https://twitter.com/ShantanuNair
2023-07-06 09:22:41 -04:00
Myeongseop Kim
0e878ccc2d Add HumanInputChatModel (#7256)
- Description: This is a chat model equivalent of HumanInputLLM. An
example notebook is also added.
  - Tag maintainer: @hwchase17, @baskaryan
  - Twitter handle: N/A
2023-07-06 09:21:03 -04:00
Myeongseop Kim
57d8a3d1e8 Make tqdm for OpenAIEmbeddings optional (#7247)
- Description: I have added a `show_progress_bar` parameter (defaults.to
`False`) to the `OpenAIEmbeddings`. If the user sets `show_progress_bar`
to `True`, a progress bar will be displayed.
  - Issue: #7246
  - Dependencies: N/A
  - Tag maintainer: @hwchase17, @baskaryan
  - Twitter handle: N/A
2023-07-05 23:36:01 -04:00
Harrison Chase
c36f852846 fix conversational retrieval docs (#7245) 2023-07-05 21:51:33 -04:00
Harrison Chase
035ad33a5b bump ver to 225 (#7244) 2023-07-05 21:22:18 -04:00
Shantanu Nair
cabd358c3a Add missing token_max in reduce.py acombine_docs (#7241)
Replace this comment with:
- Description: reduce.py reduce chain implementation's acombine_docs
call does not propagate token_max. Without this, the async call will end
up using 3000 tokens, the default, for the collapse chain.
  - Tag maintainer: @hwchase17 @agola11 @baskaryan 
  - Twitter handle: https://twitter.com/ShantanuNair

Related PR: https://github.com/hwchase17/langchain/pull/7201 and
https://github.com/hwchase17/langchain/pull/7204

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-07-05 21:02:45 -04:00
Harrison Chase
52b016920c Harrison/update anthropic (#7237)
Co-authored-by: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com>
2023-07-05 21:02:35 -04:00
Harrison Chase
695e7027e6 Harrison/parameter (#7081)
add parameter to use original question or not

---------

Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-07-05 20:51:25 -04:00
Yevgnen
930e319ca7 Add concurrency to GitbookLoader (#7069)
- Description: Fetch all pages concurrently.
- Dependencies: `scrape_all` -> `fetch_all` -> `_fetch_with_rate_limit`
-> `_fetch` (might be broken currently:
https://github.com/hwchase17/langchain/pull/6519)
  - Tag maintainer: @rlancemartin, @eyurtsev

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-07-05 20:51:10 -04:00
Hashem Alsaket
6aa66fd2b0 Update Hugging Face Hub notebook (#7236)
Description: `flan-t5-xl` hangs, updated to `flan-t5-xxl`. Tested all
stabilityai LLMs- all hang so removed from tutorial. Temperature > 0 to
prevent unintended determinism.
Issue: #3275 
Tag maintainer: @baskaryan
2023-07-05 20:45:02 -04:00
Mykola Zomchak
8afc8e6f5d Fix web_base.py (#6519)
Fix for bug in SitemapLoader

`aiohttp` `get` does not accept `verify` argument, and currently throws
error, so SitemapLoader is not working

This PR fixes it by removing `verify` param for `get` function call

Fixes #6107

#### Who can review?

Tag maintainers/contributors who might be interested:

@eyurtsev

---------

Co-authored-by: techcenary <127699216+techcenary@users.noreply.github.com>
2023-07-05 16:53:57 -07:00
William FH
f891f7d69f Skip evaluation of unfinished runs (#7235)
Cut down on errors logged

Co-authored-by: Ankush Gola <9536492+agola11@users.noreply.github.com>
2023-07-05 16:35:20 -07:00
William FH
83cf01683e Add 'eval' tag (#7209)
Add an "eval" tag to traced evaluation runs

Most of this PR is actually
https://github.com/hwchase17/langchain/pull/7207 but I can't diff off
two separate PRs

---------

Co-authored-by: Ankush Gola <9536492+agola11@users.noreply.github.com>
2023-07-05 16:28:34 -07:00
William FH
607708a411 Add tags support for langchaintracer (#7207) 2023-07-05 16:19:04 -07:00
William FH
75aa408f10 Send evaluator logs to new session (#7206)
Also stop specifying "eval" mode since explicit project modes are
deprecated
2023-07-05 16:15:29 -07:00
Harrison Chase
0dc700eebf Harrison/scene xplain (#7228)
Co-authored-by: Kevin Pham <37129444+deoxykev@users.noreply.github.com>
2023-07-05 18:34:50 -04:00
Harrison Chase
d6541da161 remove arize nb (#7238)
was causing some issues with docs build
2023-07-05 18:34:20 -04:00
Mike Nitsenko
d669b9ece9 Document loader for Cube Semantic Layer (#6882)
### Description

This pull request introduces the "Cube Semantic Layer" document loader,
which demonstrates the retrieval of Cube's data model metadata in a
format suitable for passing to LLMs as embeddings. This enhancement aims
to provide contextual information and improve the understanding of data.

Twitter handle:
@the_cube_dev

---------

Co-authored-by: rlm <pexpresss31@gmail.com>
2023-07-05 15:18:12 -07:00
Tom
e533da8bf2 Adding Marqo to vectorstore ecosystem (#7068)
This PR brings in a vectorstore interface for
[Marqo](https://www.marqo.ai/).

The Marqo vectorstore exposes some of Marqo's functionality in addition
the the VectorStore base class. The Marqo vectorstore also makes the
embedding parameter optional because inference for embeddings is an
inherent part of Marqo.

Docs, notebook examples and integration tests included.

Related PR:
https://github.com/hwchase17/langchain/pull/2807

---------

Co-authored-by: Tom Hamer <tom@marqo.ai>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-07-05 14:44:12 -07:00
Filip Haltmayer
836d2009cb Update milvus and zilliz docstring (#7216)
Description:

Updating the docstrings for Milvus and Zilliz so that they appear
correctly on https://integrations.langchain.com/vectorstores. No changes
done to code.

Maintainer: 

@baskaryan

Signed-off-by: Filip Haltmayer <filip.haltmayer@zilliz.com>
2023-07-05 17:03:51 -04:00
Matt Robinson
d65b1951bd docs: update docs strings for base unstructured loaders (#7222)
### Summary

Updates the docstrings for the unstructured base loaders so more useful
information appears on the integrations page. If these look good, will
add similar docstrings to the other loaders.

### Reviewers
  - @rlancemartin
  - @eyurtsev
  - @hwchase17
2023-07-05 17:02:26 -04:00
Mike Salvatore
265f05b10e Enable InMemoryDocstore to be constructed without providing a dict (#6976)
- Description: Allow `InMemoryDocstore` to be created without passing a
dict to the constructor; the constructor can create a dict at runtime if
one isn't provided.
- Tag maintainer: @dev2049
2023-07-05 16:56:31 -04:00
Harrison Chase
47e7d09dff fix arize nb (#7227) 2023-07-05 16:55:48 -04:00
Feras Almannaa
79b59a8e06 optimize pgvector add_texts (#7185)
- Description: At the moment, inserting new embeddings to pgvector is
querying all embeddings every time as the defined `embeddings`
relationship is using the default params, which sets `lazy="select"`.
This change drastically improves the performance and adds a few
additional cleanups:
* remove `collection.embeddings.append` as it was querying all
embeddings on insert, replace with `collection_id` param
* centralize storing logic in add_embeddings function to reduce
duplication
  * remove boilerplate

- Issue: No issue was opened.
- Dependencies: None.
- Tag maintainer: this is a vectorstore update, so I think
@rlancemartin, @eyurtsev
- Twitter handle: @falmannaa
2023-07-05 13:19:42 -07:00
Harrison Chase
6711854e30 Harrison/dataforseo (#7214)
Co-authored-by: Alexander <sune357@gmail.com>
2023-07-05 16:02:02 -04:00
Richy Wang
cab7d86f23 Implement delete interface of vector store on AnalyticDB (#7170)
Hi, there
  This pull request contains two commit:
**1. Implement delete interface with optional ids parameter on
AnalyticDB.**
**2. Allow customization of database connection behavior by exposing
engine_args parameter in interfaces.**
- This commit adds the `engine_args` parameter to the interfaces,
allowing users to customize the behavior of the database connection. The
`engine_args` parameter accepts a dictionary of additional arguments
that will be passed to the create_engine function. Users can now modify
various aspects of the database connection, such as connection pool size
and recycle time. This enhancement provides more flexibility and control
to users when interacting with the database through the exposed
interfaces.

This commit is related to VectorStores @rlancemartin @eyurtsev 

Thank you for your attention and consideration.
2023-07-05 13:01:00 -07:00
Mike Salvatore
3ae11b7582 Handle kwargs in FAISS.load_local() (#6987)
- Description: This allows parameters such as `relevance_score_fn` to be
passed to the `FAISS` constructor via the `load_local()` class method.
-  Tag maintainer: @rlancemartin @eyurtsev
2023-07-05 15:56:40 -04:00
Jamal
a2f191a322 Replace JIRA Arbitrary Code Execution vulnerability with finer grain API wrapper (#6992)
This fixes #4833 and the critical vulnerability
https://nvd.nist.gov/vuln/detail/CVE-2023-34540

Previously, the JIRA API Wrapper had a mode that simply pipelined user
input into an `exec()` function.
[The intended use of the 'other' mode is to cover any of Atlassian's API
that don't have an existing
interface](cc33bde74f/langchain/tools/jira/prompt.py (L24))

Fortunately all of the [Atlassian JIRA API methods are subfunctions of
their `Jira`
class](https://atlassian-python-api.readthedocs.io/jira.html), so this
implementation calls these subfunctions directly.

As well as passing a string representation of the function to call, the
implementation flexibly allows for optionally passing args and/or
keyword-args. These are given as part of the dictionary input. Example:
```
    {
        "function": "update_issue_field",   #function to execute
        "args": [                           #list of ordered args similar to other examples in this JiraAPIWrapper
            "key",
            {"summary": "New summary"}
        ],
        "kwargs": {}                        #dict of key value keyword-args pairs
    }
```

the above is equivalent to `self.jira.update_issue_field("key",
{"summary": "New summary"})`

Alternate query schema designs are welcome to make querying easier
without passing and evaluating arbitrary python code. I considered
parsing (without evaluating) input python code and extracting the
function, args, and kwargs from there and then pipelining them into the
callable function via `*f(args, **kwargs)` - but this seemed more
direct.

@vowelparrot @dev2049

---------

Co-authored-by: Jamal Rahman <jamal.rahman@builder.ai>
2023-07-05 15:56:01 -04:00
Hakan Tekgul
61938a02a1 Create arize_llm_observability.ipynb (#7000)
Adding documentation and notebook for Arize callback handler. 

  - @dev2049
  - Agents / Tools / Toolkits: @vowelparrot
  - Tracing / Callbacks: @agola11
2023-07-05 15:55:47 -04:00
Leonid Ganeline
ecee4d6e92 docs: update youtube videos and tutorials (#6515)
added tutorials.mdx; updated youtube.mdx

Rationale: the Tutorials section in the documentation is top-priority.
(for example, https://pytorch.org/docs/stable/index.html) Not every
project has resources to make tutorials. We have such a privilege.
Community experts created several tutorials on YouTube. But the tutorial
links are now hidden on the YouTube page and not easily discovered by
first-time visitors.

- Added new videos and tutorials that were created since the last
update.
- Made some reprioritization between videos on the base of the view
numbers.

#### Who can review?

  - @hwchase17
    - @dev2049
2023-07-05 12:50:31 -07:00
Santiago Delgado
fa55c5a16b Fixed Office365 tool __init__.py files, tests, and get_tools() function (#7046)
## Description
Added Office365 tool modules to `__init__.py` files
## Issue
As described in Issue
https://github.com/hwchase17/langchain/issues/6936, the Office365
toolkit can't be loaded easily because it is not included in the
`__init__.py` files.
## Reviewer
@dev2049
2023-07-05 15:46:21 -04:00
wewebber-merlin
8a7c95e555 Retryable exception for empty OpenAI embedding. (#7070)
Description:

The OpenAI "embeddings" API intermittently falls into a failure state
where an embedding is returned as [ Nan ], rather than the expected 1536
floats. This patch checks for that state (specifically, for an embedding
of length 1) and if it occurs, throws an ApiError, which will cause the
chunk to be retried.

Issue:

I have been unable to find an official langchain issue for this problem,
but it is discussed (by another user) at
https://stackoverflow.com/questions/76469415/getting-embeddings-of-length-1-from-langchain-openaiembeddings

Maintainer: @dev2049

Testing: 

Since this is an intermittent OpenAI issue, I have not provided a unit
or integration test. The provided code has, though, been run
successfully over several million tokens.

---------

Co-authored-by: William Webber <william@williamwebber.com>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-07-05 15:23:45 -04:00
Nuno Campos
e4459e423b Mark some output parsers as serializable (cross-checked w/ JS) (#7083)
<!-- Thank you for contributing to LangChain!

Replace this comment with:
  - Description: a description of the change, 
  - Issue: the issue # it fixes (if applicable),
  - Dependencies: any dependencies required for this change,
- Tag maintainer: for a quicker response, tag the relevant maintainer
(see below),
- Twitter handle: we announce bigger features on Twitter. If your PR
gets announced and you'd like a mention, we'll gladly shout you out!

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
  2. an example notebook showing its use.

Maintainer responsibilities:
  - General / Misc / if you don't know who to tag: @dev2049
  - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev
  - Models / Prompts: @hwchase17, @dev2049
  - Memory: @hwchase17
  - Agents / Tools / Toolkits: @vowelparrot
  - Tracing / Callbacks: @agola11
  - Async: @agola11

If no one reviews your PR within a few days, feel free to @-mention the
same people again.

See contribution guidelines for more information on how to write/run
tests, lint, etc:
https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md
 -->
2023-07-05 14:53:56 -04:00
Ankush Gola
4c1c05c2c7 support adding custom metadata to runs (#7120)
- [x] wire up tools
- [x] wire up retrievers
- [x] add integration test

<!-- Thank you for contributing to LangChain!

Replace this comment with:
  - Description: a description of the change, 
  - Issue: the issue # it fixes (if applicable),
  - Dependencies: any dependencies required for this change,
- Tag maintainer: for a quicker response, tag the relevant maintainer
(see below),
- Twitter handle: we announce bigger features on Twitter. If your PR
gets announced and you'd like a mention, we'll gladly shout you out!

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
  2. an example notebook showing its use.

Maintainer responsibilities:
  - General / Misc / if you don't know who to tag: @baskaryan
  - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev
  - Models / Prompts: @hwchase17, @baskaryan
  - Memory: @hwchase17
  - Agents / Tools / Toolkits: @hinthornw
  - Tracing / Callbacks: @agola11
  - Async: @agola11

If no one reviews your PR within a few days, feel free to @-mention the
same people again.

See contribution guidelines for more information on how to write/run
tests, lint, etc:
https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md
 -->
2023-07-05 11:11:38 -07:00
Josh Reini
30d8d1d3d0 add trulens integration (#7096)
Description: Add TruLens integration.

Twitter: @trulensml

For review:
  - Tracing: @agola11
  - Tools: @hinthornw
2023-07-05 14:04:55 -04:00
Hyoseung Kim
9abf1847f4 Fix steamship import error (#7133)
Description: Fix steamship import error

When running multi_modal_output_agent:
field "steamship" not yet prepared so type is still a ForwardRef, you
might need to call SteamshipImageGenerationTool.update_forward_refs().

Tag maintainer: @hinthornw

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-07-05 14:04:38 -04:00
Mohammad Mohtashim
7d92e9407b Jinja2 validation changed to issue warnings rather than issuing exceptions. (#7161)
- Description: If their are missing or extra variables when validating
Jinja 2 template then a warning is issued rather than raising an
exception. This allows for better flexibility for the developer as
described in #7044. Also changed the relevant test so pytest is checking
for raised warnings rather than exceptions.
  - Issue: #7044 
  - Tag maintainer: @hwchase17, @baskaryan

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-07-05 14:04:29 -04:00
whying
e288410e72 fix: Chroma filter symbols not supporting LIKE and CONTAIN (#7169)
Fixing issue with SelfQueryRetriever due to unsupported LIKE and CONTAIN
comparators in Chroma's WHERE filter statements. This pull request
introduces a redefined set of comparators in Chroma to address the
problem and make it compatible with SelfQueryRetriever. For information
on the comparators supported by Chroma's filter, please refer to
https://docs.trychroma.com/usage-guide#using-where-filters.
<img width="495" alt="image"
src="https://github.com/hwchase17/langchain/assets/22267652/34789191-0293-4f63-9bdf-ad1e1f2567c4">

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-07-05 14:04:18 -04:00
Nuno Campos
26409b01bd Remove extra base model (#7213)
<!-- Thank you for contributing to LangChain!

Replace this comment with:
  - Description: a description of the change, 
  - Issue: the issue # it fixes (if applicable),
  - Dependencies: any dependencies required for this change,
- Tag maintainer: for a quicker response, tag the relevant maintainer
(see below),
- Twitter handle: we announce bigger features on Twitter. If your PR
gets announced and you'd like a mention, we'll gladly shout you out!

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
  2. an example notebook showing its use.

Maintainer responsibilities:
  - General / Misc / if you don't know who to tag: @baskaryan
  - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev
  - Models / Prompts: @hwchase17, @baskaryan
  - Memory: @hwchase17
  - Agents / Tools / Toolkits: @hinthornw
  - Tracing / Callbacks: @agola11
  - Async: @agola11

If no one reviews your PR within a few days, feel free to @-mention the
same people again.

See contribution guidelines for more information on how to write/run
tests, lint, etc:
https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md
 -->
2023-07-05 14:02:27 -04:00
Samhita Alla
6f358bb04a make textstat optional in the flyte callback handler (#7186)
<!-- Thank you for contributing to LangChain!

Replace this comment with:
  - Description: a description of the change, 
  - Issue: the issue # it fixes (if applicable),
  - Dependencies: any dependencies required for this change,
- Tag maintainer: for a quicker response, tag the relevant maintainer
(see below),
- Twitter handle: we announce bigger features on Twitter. If your PR
gets announced and you'd like a mention, we'll gladly shout you out!

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
  2. an example notebook showing its use.

Maintainer responsibilities:
  - General / Misc / if you don't know who to tag: @baskaryan
  - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev
  - Models / Prompts: @hwchase17, @baskaryan
  - Memory: @hwchase17
  - Agents / Tools / Toolkits: @hinthornw
  - Tracing / Callbacks: @agola11
  - Async: @agola11

If no one reviews your PR within a few days, feel free to @-mention the
same people again.

See contribution guidelines for more information on how to write/run
tests, lint, etc:
https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md
 -->

This PR makes the `textstat` library optional in the Flyte callback
handler.

@hinthornw, would you mind reviewing this PR since you merged the flyte
callback handler code previously?

---------

Signed-off-by: Samhita Alla <aallasamhita@gmail.com>
2023-07-05 13:15:56 -04:00
Conrad Fernandez
6eff0fa2ca Added documentation for add_texts function for Pinecone integration (#7134)
- Description: added some documentation to the Pinecone vector store
docs page.
- Issue: #7126 
- Dependencies: None
- Tag maintainer: @baskaryan 

I can add more documentation on the Pinecone integration functions as I
am going to go in great depth into this area. Just wanted to check with
the maintainers is if this is all good.
2023-07-05 13:11:37 -04:00
Nuno Campos
81e5b1ad36 Add serialized object to retriever start callback (#7074)
<!-- Thank you for contributing to LangChain!

Replace this comment with:
  - Description: a description of the change, 
  - Issue: the issue # it fixes (if applicable),
  - Dependencies: any dependencies required for this change,
- Tag maintainer: for a quicker response, tag the relevant maintainer
(see below),
- Twitter handle: we announce bigger features on Twitter. If your PR
gets announced and you'd like a mention, we'll gladly shout you out!

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
  2. an example notebook showing its use.

Maintainer responsibilities:
  - General / Misc / if you don't know who to tag: @dev2049
  - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev
  - Models / Prompts: @hwchase17, @dev2049
  - Memory: @hwchase17
  - Agents / Tools / Toolkits: @vowelparrot
  - Tracing / Callbacks: @agola11
  - Async: @agola11

If no one reviews your PR within a few days, feel free to @-mention the
same people again.

See contribution guidelines for more information on how to write/run
tests, lint, etc:
https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md
 -->
2023-07-05 18:04:43 +01:00
Efkan S. Goktepe
baf48d3583 Replace stop clause with shorter, pythonic alternative (#7159)
Replace this comment with:
- Description: Replace `if var is not None:` with `if var:`, a concise
and pythonic alternative
  - Issue: N/A
  - Dependencies: None
  - Tag maintainer: Unsure
  - Twitter handle: N/A

Signed-off-by: serhatgktp <efkan@ibm.com>
2023-07-05 13:03:22 -04:00
Shuqian
8045870a0f fix: prevent adding an empty string to the result queue in AsyncIteratorCallbackHandler (#7180)
- Description: Modify the code for
AsyncIteratorCallbackHandler.on_llm_new_token to ensure that it does not
add an empty string to the result queue.
- Tag maintainer: @agola11

When using AsyncIteratorCallbackHandler with OpenAIFunctionsAgent, if
the LLM response function_call instead of direct answer, the
AsyncIteratorCallbackHandler.on_llm_new_token would be called with empty
string.
see also: langchain.chat_models.openai.ChatOpenAI._generate

An alternative solution is to modify the
langchain.chat_models.openai.ChatOpenAI._generate and do not call the
run_manager.on_llm_new_token when the token is empty string.
I am not sure which solution is better.

@hwchase17

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-07-05 13:00:35 -04:00
felixocker
db98c44f8f Support for SPARQL (#7165)
# [SPARQL](https://www.w3.org/TR/rdf-sparql-query/) for
[LangChain](https://github.com/hwchase17/langchain)

## Description
LangChain support for knowledge graphs relying on W3C standards using
RDFlib: SPARQL/ RDF(S)/ OWL with special focus on RDF \
* Works with local files, files from the web, and SPARQL endpoints
* Supports both SELECT and UPDATE queries
* Includes both a Jupyter notebook with an example and integration tests

## Contribution compared to related PRs and discussions
* [Wikibase agent](https://github.com/hwchase17/langchain/pull/2690) -
uses SPARQL, but specifically for wikibase querying
* [Cypher qa](https://github.com/hwchase17/langchain/pull/5078) - graph
DB question answering for Neo4J via Cypher
* [PR 6050](https://github.com/hwchase17/langchain/pull/6050) - tries
something similar, but does not cover UPDATE queries and supports only
RDF
* Discussions on [w3c mailing list](mailto:semantic-web@w3.org) related
to the combination of LLMs (specifically ChatGPT) and knowledge graphs

## Dependencies
* [RDFlib](https://github.com/RDFLib/rdflib)

## Tag maintainer
Graph database related to memory -> @hwchase17
2023-07-05 13:00:16 -04:00
Paul Cook
7cd0936b1c Update in_memory.py to fix "TypeError: keywords must be strings" (#7202)
Update in_memory.py to fix "TypeError: keywords must be strings" on
certain dictionaries

Simple fix to prevent a "TypeError: keywords must be strings" error I
encountered in my use case.

@baskaryan 

Thanks! Hope useful!

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-07-05 12:48:38 -04:00
Prakul Agarwal
38f853dfa3 Fixed typos in MongoDB Atlas Vector Search documentation (#7174)
Fix for typos in MongoDB Atlas Vector Search documentation
<!-- Thank you for contributing to LangChain!

Replace this comment with:
  - Description: a description of the change, 
  - Issue: the issue # it fixes (if applicable),
  - Dependencies: any dependencies required for this change,
- Tag maintainer: for a quicker response, tag the relevant maintainer
(see below),
- Twitter handle: we announce bigger features on Twitter. If your PR
gets announced and you'd like a mention, we'll gladly shout you out!

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
  2. an example notebook showing its use.

Maintainer responsibilities:
  - General / Misc / if you don't know who to tag: @baskaryan
  - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev
  - Models / Prompts: @hwchase17, @baskaryan
  - Memory: @hwchase17
  - Agents / Tools / Toolkits: @hinthornw
  - Tracing / Callbacks: @agola11
  - Async: @agola11

If no one reviews your PR within a few days, feel free to @-mention the
same people again.

See contribution guidelines for more information on how to write/run
tests, lint, etc:
https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md
 -->
2023-07-05 12:48:00 -04:00
Shuqian
ee1d488c03 fix: rename the invalid function name of GoogleSerperResults Tool for OpenAIFunctionCall (#7176)
- Description: rename the invalid function name of GoogleSerperResults
Tool for OpenAIFunctionCall
- Tag maintainer: @hinthornw

When I use the GoogleSerperResults in OpenAIFunctionCall agent, the
following error occurs:
```shell
openai.error.InvalidRequestError: 'Google Serrper Results JSON' does not match '^[a-zA-Z0-9_-]{1,64}$' - 'functions.0.name'
```

So I rename the GoogleSerperResults's property "name" from "Google
Serrper Results JSON" to "google_serrper_results_json" just like
GoogleSerperRun's name: "google_serper", and it works.
I guess this should be reasonable.
2023-07-05 12:47:50 -04:00
Nir Gazit
6666e422c6 fix: missing parameter in POST/PUT/PATCH HTTP requests (#7194)
<!-- Thank you for contributing to LangChain!

Replace this comment with:
  - Description: a description of the change, 
  - Issue: the issue # it fixes (if applicable),
  - Dependencies: any dependencies required for this change,
- Tag maintainer: for a quicker response, tag the relevant maintainer
(see below),
- Twitter handle: we announce bigger features on Twitter. If your PR
gets announced and you'd like a mention, we'll gladly shout you out!

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
  2. an example notebook showing its use.

Maintainer responsibilities:
  - General / Misc / if you don't know who to tag: @baskaryan
  - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev
  - Models / Prompts: @hwchase17, @baskaryan
  - Memory: @hwchase17
  - Agents / Tools / Toolkits: @hinthornw
  - Tracing / Callbacks: @agola11
  - Async: @agola11

If no one reviews your PR within a few days, feel free to @-mention the
same people again.

See contribution guidelines for more information on how to write/run
tests, lint, etc:
https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md
 -->
@hinthornw

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-07-05 12:47:30 -04:00
Harrison Chase
8410c6a747 add token max parameter (#7204) 2023-07-05 12:09:25 -04:00
Harrison Chase
7b585c7585 add tqdm to embeddings (#7205)
for longer running embeddings, can be helpful to visualize
2023-07-05 12:04:22 -04:00
Raouf Chebri
6fc24743b7 Add pg_hnsw vectorstore integration (#6893)
Hi @rlancemartin, @eyurtsev!

- Description: Adding HNSW extension support for Postgres. Similar to
pgvector vectorstore, with 3 differences
      1. it uses HNSW extension for exact and ANN searches, 
      2. Vectors are of type array of real
      3. Only supports L2
      
- Dependencies: [HNSW](https://github.com/knizhnik/hnsw) extension for
Postgres
  
  - Example:
  ```python
    db = HNSWVectoreStore.from_documents(
      embedding=embeddings,
      documents=docs,
      collection_name=collection_name,
      connection_string=connection_string
  )
  
  query = "What did the president say about Ketanji Brown Jackson"
docs_with_score: List[Tuple[Document, float]] =
db.similarity_search_with_score(query)
  ```

The example notebook is in the PR too.
2023-07-05 08:10:10 -07:00
Harrison Chase
79fb90aafd bump version to 224 (#7203) 2023-07-05 10:41:26 -04:00
Harrison Chase
1415966d64 propogate token max (#7201) 2023-07-05 10:25:48 -04:00
Harrison Chase
a94c4cca68 more formatting (#7200) 2023-07-05 10:03:02 -04:00
Harrison Chase
e18e838aae fix weird bold issues in docs (#7198) 2023-07-05 09:52:49 -04:00
Baichuan Sun
e27ba9d92b fix AmazonAPIGateway _identifying_params (#7167)
- correct `endpoint_name` to `api_url`
- add `headers`

<!-- Thank you for contributing to LangChain!

Replace this comment with:
  - Description: a description of the change, 
  - Issue: the issue # it fixes (if applicable),
  - Dependencies: any dependencies required for this change,
- Tag maintainer: for a quicker response, tag the relevant maintainer
(see below),
- Twitter handle: we announce bigger features on Twitter. If your PR
gets announced and you'd like a mention, we'll gladly shout you out!

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
  2. an example notebook showing its use.

Maintainer responsibilities:
  - General / Misc / if you don't know who to tag: @baskaryan
  - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev
  - Models / Prompts: @hwchase17, @baskaryan
  - Memory: @hwchase17
  - Agents / Tools / Toolkits: @hinthornw
  - Tracing / Callbacks: @agola11
  - Async: @agola11

If no one reviews your PR within a few days, feel free to @-mention the
same people again.

See contribution guidelines for more information on how to write/run
tests, lint, etc:
https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md
 -->
2023-07-04 23:14:51 -04:00
Harrison Chase
39e685b80f Harrison/conv retrieval docs (#7080)
Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-07-04 20:17:43 -04:00
Shuqian
bf9e4ef35f feat: implement python repl tool arun (#7125)
Description: implement python repl tool arun
Tag maintainer: @agola11
2023-07-04 20:15:49 -04:00
Alex Iribarren
9cfb311ecb Remove duplicate lines (#7138)
I believe these two lines are unnecessary, the variable `function_call`
is already defined.
2023-07-04 20:13:27 -04:00
volodymyr-memsql
405865c91a feat(SingleStoreVectorStore): change connection attributes in the database connection (#7142)
Minor change to the SingleStoreVectorStore:

Updated connection attributes names according to the SingleStoreDB
recommendations

@rlancemartin, @eyurtsev

---------

Co-authored-by: Volodymyr Tkachuk <vtkachuk-ua@singlestore.com>
2023-07-04 20:12:56 -04:00
348 changed files with 14299 additions and 2245 deletions

View File

@@ -165,28 +165,35 @@ Classes
callbacks.aim_callback.AimCallbackHandler
callbacks.argilla_callback.ArgillaCallbackHandler
callbacks.arize_callback.ArizeCallbackHandler
callbacks.arthur_callback.ArthurCallbackHandler
callbacks.base.AsyncCallbackHandler
callbacks.base.BaseCallbackHandler
callbacks.base.BaseCallbackManager
callbacks.clearml_callback.ClearMLCallbackHandler
callbacks.comet_ml_callback.CometCallbackHandler
callbacks.file.FileCallbackHandler
callbacks.flyte_callback.FlyteCallbackHandler
callbacks.human.HumanApprovalCallbackHandler
callbacks.human.HumanRejectedException
callbacks.infino_callback.InfinoCallbackHandler
callbacks.manager.AsyncCallbackManager
callbacks.manager.AsyncCallbackManagerForChainRun
callbacks.manager.AsyncCallbackManagerForLLMRun
callbacks.manager.AsyncCallbackManagerForRetrieverRun
callbacks.manager.AsyncCallbackManagerForToolRun
callbacks.manager.AsyncParentRunManager
callbacks.manager.AsyncRunManager
callbacks.manager.BaseRunManager
callbacks.manager.CallbackManager
callbacks.manager.CallbackManagerForChainRun
callbacks.manager.CallbackManagerForLLMRun
callbacks.manager.CallbackManagerForRetrieverRun
callbacks.manager.CallbackManagerForToolRun
callbacks.manager.ParentRunManager
callbacks.manager.RunManager
callbacks.mlflow_callback.MlflowCallbackHandler
callbacks.openai_info.OpenAICallbackHandler
callbacks.promptlayer_callback.PromptLayerCallbackHandler
callbacks.stdout.StdOutCallbackHandler
callbacks.streaming_aiter.AsyncIteratorCallbackHandler
callbacks.streaming_aiter_final_only.AsyncFinalIteratorCallbackHandler
@@ -229,6 +236,8 @@ Functions
callbacks.aim_callback.import_aim
callbacks.clearml_callback.import_clearml
callbacks.comet_ml_callback.import_comet_ml
callbacks.flyte_callback.analyze_text
callbacks.flyte_callback.import_flytekit
callbacks.infino_callback.import_infino
callbacks.manager.env_var_is_set
callbacks.manager.get_openai_callback
@@ -283,9 +292,11 @@ Classes
chains.base.Chain
chains.combine_documents.base.AnalyzeDocumentChain
chains.combine_documents.base.BaseCombineDocumentsChain
chains.combine_documents.map_reduce.CombineDocsProtocol
chains.combine_documents.map_reduce.MapReduceDocumentsChain
chains.combine_documents.map_rerank.MapRerankDocumentsChain
chains.combine_documents.reduce.AsyncCombineDocsProtocol
chains.combine_documents.reduce.CombineDocsProtocol
chains.combine_documents.reduce.ReduceDocumentsChain
chains.combine_documents.refine.RefineDocumentsChain
chains.combine_documents.stuff.StuffDocumentsChain
chains.constitutional_ai.base.ConstitutionalChain
@@ -299,8 +310,10 @@ Classes
chains.flare.prompts.FinishedOutputParser
chains.graph_qa.base.GraphQAChain
chains.graph_qa.cypher.GraphCypherQAChain
chains.graph_qa.hugegraph.HugeGraphQAChain
chains.graph_qa.kuzu.KuzuQAChain
chains.graph_qa.nebulagraph.NebulaGraphQAChain
chains.graph_qa.sparql.GraphSparqlQAChain
chains.hyde.base.HypotheticalDocumentEmbedder
chains.llm.LLMChain
chains.llm_bash.base.LLMBashChain
@@ -363,7 +376,6 @@ Functions
.. autosummary::
:toctree: chains
chains.combine_documents.base.format_document
chains.graph_qa.cypher.extract_cypher
chains.loading.load_chain
chains.loading.load_chain_from_config
@@ -415,6 +427,7 @@ Classes
chat_models.fake.FakeListChatModel
chat_models.google_palm.ChatGooglePalm
chat_models.google_palm.ChatGooglePalmError
chat_models.human.HumanInputChatModel
chat_models.openai.ChatOpenAI
chat_models.promptlayer_openai.PromptLayerChatOpenAI
chat_models.vertexai.ChatVertexAI
@@ -513,6 +526,7 @@ Classes
document_loaders.blob_loaders.youtube_audio.YoutubeAudioLoader
document_loaders.blockchain.BlockchainDocumentLoader
document_loaders.blockchain.BlockchainType
document_loaders.brave_search.BraveSearchLoader
document_loaders.chatgpt.ChatGPTLoader
document_loaders.college_confidential.CollegeConfidentialLoader
document_loaders.confluence.ConfluenceLoader
@@ -520,6 +534,7 @@ Classes
document_loaders.conllu.CoNLLULoader
document_loaders.csv_loader.CSVLoader
document_loaders.csv_loader.UnstructuredCSVLoader
document_loaders.cube_semantic.CubeSemanticLoader
document_loaders.dataframe.DataFrameLoader
document_loaders.diffbot.DiffbotLoader
document_loaders.directory.DirectoryLoader
@@ -736,6 +751,7 @@ Classes
embeddings.self_hosted.SelfHostedEmbeddings
embeddings.self_hosted_hugging_face.SelfHostedHuggingFaceEmbeddings
embeddings.self_hosted_hugging_face.SelfHostedHuggingFaceInstructEmbeddings
embeddings.spacy_embeddings.SpacyEmbeddings
embeddings.tensorflow_hub.TensorflowHubEmbeddings
embeddings.vertexai.VertexAIEmbeddings
@@ -790,6 +806,9 @@ Classes
evaluation.comparison.eval_chain.PairwiseStringResultOutputParser
evaluation.criteria.eval_chain.CriteriaEvalChain
evaluation.criteria.eval_chain.CriteriaResultOutputParser
evaluation.embedding_distance.base.EmbeddingDistance
evaluation.embedding_distance.base.EmbeddingDistanceEvalChain
evaluation.embedding_distance.base.PairwiseEmbeddingDistanceEvalChain
evaluation.qa.eval_chain.ContextQAEvalChain
evaluation.qa.eval_chain.CotQAEvalChain
evaluation.qa.eval_chain.QAEvalChain
@@ -799,10 +818,16 @@ Classes
evaluation.run_evaluators.implementations.ChoicesOutputParser
evaluation.run_evaluators.implementations.CriteriaOutputParser
evaluation.run_evaluators.implementations.StringRunEvaluatorInputMapper
evaluation.run_evaluators.implementations.TrajectoryEvalOutputParser
evaluation.run_evaluators.implementations.TrajectoryInputMapper
evaluation.run_evaluators.implementations.TrajectoryRunEvalOutputParser
evaluation.schema.AgentTrajectoryEvaluator
evaluation.schema.EvaluatorType
evaluation.schema.LLMEvalChain
evaluation.schema.PairwiseStringEvaluator
evaluation.schema.StringEvaluator
evaluation.string_distance.base.PairwiseStringDistanceEvalChain
evaluation.string_distance.base.StringDistance
evaluation.string_distance.base.StringDistanceEvalChain
Functions
--------------
@@ -812,6 +837,8 @@ Functions
:toctree: evaluation
evaluation.loading.load_dataset
evaluation.loading.load_evaluator
evaluation.loading.load_evaluators
evaluation.run_evaluators.implementations.get_criteria_evaluator
evaluation.run_evaluators.implementations.get_qa_evaluator
evaluation.run_evaluators.implementations.get_trajectory_evaluator
@@ -1057,6 +1084,7 @@ Functions
llms.aviary.get_completions
llms.aviary.get_models
llms.base.create_base_retry_decorator
llms.base.get_prompts
llms.base.update_cache
llms.cohere.completion_with_retry
@@ -1069,6 +1097,7 @@ Functions
llms.openai.completion_with_retry
llms.openai.update_token_usage
llms.utils.enforce_stop_tokens
llms.vertexai.completion_with_retry
llms.vertexai.is_codey_model
:mod:`langchain.load`: Load
@@ -1241,7 +1270,6 @@ Classes
:toctree: prompts
:template: class.rst
prompts.base.BasePromptTemplate
prompts.base.StringPromptTemplate
prompts.base.StringPromptValue
prompts.chat.AIMessagePromptTemplate
@@ -1316,7 +1344,7 @@ Classes
retrievers.azure_cognitive_search.AzureCognitiveSearchRetriever
retrievers.chatgpt_plugin_retriever.ChatGPTPluginRetriever
retrievers.contextual_compression.ContextualCompressionRetriever
retrievers.databerry.DataberryRetriever
retrievers.chaindesk.ChaindeskRetriever
retrievers.docarray.DocArrayRetriever
retrievers.docarray.SearchType
retrievers.document_compressors.base.BaseDocumentCompressor
@@ -1348,7 +1376,7 @@ Classes
retrievers.multi_query.LineListOutputParser
retrievers.multi_query.MultiQueryRetriever
retrievers.pinecone_hybrid_search.PineconeHybridSearchRetriever
retrievers.pupmed.PubMedRetriever
retrievers.pubmed.PubMedRetriever
retrievers.remote_retriever.RemoteLangChainRetriever
retrievers.self_query.base.SelfQueryRetriever
retrievers.self_query.chroma.ChromaTranslator
@@ -1400,28 +1428,29 @@ Classes
:toctree: schema
:template: class.rst
schema.AIMessage
schema.AgentFinish
schema.BaseChatMessageHistory
schema.BaseDocumentTransformer
schema.BaseLLMOutputParser
schema.BaseMemory
schema.BaseMessage
schema.BaseOutputParser
schema.BaseRetriever
schema.ChatGeneration
schema.ChatMessage
schema.ChatResult
schema.Document
schema.FunctionMessage
schema.Generation
schema.HumanMessage
schema.LLMResult
schema.NoOpOutputParser
schema.OutputParserException
schema.PromptValue
schema.RunInfo
schema.SystemMessage
schema.agent.AgentFinish
schema.document.BaseDocumentTransformer
schema.document.Document
schema.memory.BaseChatMessageHistory
schema.memory.BaseMemory
schema.messages.AIMessage
schema.messages.BaseMessage
schema.messages.ChatMessage
schema.messages.FunctionMessage
schema.messages.HumanMessage
schema.messages.SystemMessage
schema.output.ChatGeneration
schema.output.ChatResult
schema.output.Generation
schema.output.LLMResult
schema.output.RunInfo
schema.output_parser.BaseLLMOutputParser
schema.output_parser.BaseOutputParser
schema.output_parser.NoOpOutputParser
schema.output_parser.OutputParserException
schema.prompt.PromptValue
schema.prompt_template.BasePromptTemplate
schema.retriever.BaseRetriever
Functions
--------------
@@ -1430,9 +1459,10 @@ Functions
.. autosummary::
:toctree: schema
schema.get_buffer_string
schema.messages_from_dict
schema.messages_to_dict
schema.messages.get_buffer_string
schema.messages.messages_from_dict
schema.messages.messages_to_dict
schema.prompt_template.format_document
:mod:`langchain.server`: Server
================================
@@ -1535,6 +1565,8 @@ Classes
tools.bing_search.tool.BingSearchRun
tools.brave_search.tool.BraveSearch
tools.convert_to_openai.FunctionDescription
tools.dataforseo_api_search.tool.DataForSeoAPISearchResults
tools.dataforseo_api_search.tool.DataForSeoAPISearchRun
tools.ddg_search.tool.DuckDuckGoSearchResults
tools.ddg_search.tool.DuckDuckGoSearchRun
tools.file_management.copy.CopyFileTool
@@ -1708,6 +1740,7 @@ Classes
utilities.bibtex.BibtexparserWrapper
utilities.bing_search.BingSearchAPIWrapper
utilities.brave_search.BraveSearchWrapper
utilities.dataforseo_api_search.DataForSeoAPIWrapper
utilities.duckduckgo_search.DuckDuckGoSearchAPIWrapper
utilities.google_places_api.GooglePlacesAPIWrapper
utilities.google_search.GoogleSearchAPIWrapper
@@ -1805,12 +1838,17 @@ Classes
vectorstores.faiss.FAISS
vectorstores.hologres.Hologres
vectorstores.lancedb.LanceDB
vectorstores.marqo.Marqo
vectorstores.matching_engine.MatchingEngine
vectorstores.milvus.Milvus
vectorstores.mongodb_atlas.MongoDBAtlasVectorSearch
vectorstores.myscale.MyScale
vectorstores.myscale.MyScaleSettings
vectorstores.opensearch_vector_search.OpenSearchVectorSearch
vectorstores.pgembedding.BaseModel
vectorstores.pgembedding.CollectionStore
vectorstores.pgembedding.EmbeddingStore
vectorstores.pgembedding.PGEmbedding
vectorstores.pgvector.BaseModel
vectorstores.pgvector.CollectionStore
vectorstores.pgvector.DistanceStrategy

View File

@@ -16,22 +16,6 @@
{%- set development_attrs = '' %}
{%- endif %}
{# title, link, link_attrs #}
{%- set drop_down_navigation = [
('Getting Started', pathto('getting_started'), ''),
('Tutorial', pathto('tutorial/index'), ''),
("What's new", pathto('whats_new/v' + version), ''),
('Glossary', pathto('glossary'), ''),
('Development', development_link, development_attrs),
('FAQ', pathto('faq'), ''),
('Support', pathto('support'), ''),
('Related packages', pathto('related_projects'), ''),
('Roadmap', pathto('roadmap'), ''),
('Governance', pathto('governance'), ''),
('About us', pathto('about'), ''),
('GitHub', 'https://github.com/scikit-learn/scikit-learn', ''),
('Other Versions and Download', 'https://scikit-learn.org/dev/versions.html', '')]
-%}
<nav id="navbar" class="{{ nav_bar_class }} navbar navbar-expand-md navbar-light bg-light py-0">
<div class="container-fluid {{ top_container_cls }} px-0">

View File

Before

Width:  |  Height:  |  Size: 157 KiB

After

Width:  |  Height:  |  Size: 157 KiB

View File

@@ -0,0 +1,124 @@
# Tutorials
⛓ icon marks a new addition [last update 2023-07-05]
---------------------
### DeepLearning.AI courses
by [Harrison Chase](https://github.com/hwchase17) and [Andrew Ng](https://en.wikipedia.org/wiki/Andrew_Ng)
- [LangChain for LLM Application Development](https://learn.deeplearning.ai/langchain)
- ⛓ [LangChain Chat with Your Data](https://learn.deeplearning.ai/langchain-chat-with-your-data)
### Handbook
[LangChain AI Handbook](https://www.pinecone.io/learn/langchain/) By **James Briggs** and **Francisco Ingham**
### Short Tutorials
[LangChain Crash Course - Build apps with language models](https://youtu.be/LbT1yp6quS8) by [Patrick Loeber](https://www.youtube.com/@patloeber)
[LangChain Crash Course: Build an AutoGPT app in 25 minutes](https://youtu.be/MlK6SIjcjE8) by [Nicholas Renotte](https://www.youtube.com/@NicholasRenotte)
[LangChain Explained in 13 Minutes | QuickStart Tutorial for Beginners](https://youtu.be/aywZrzNaKjs) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
## Tutorials
### [LangChain for Gen AI and LLMs](https://www.youtube.com/playlist?list=PLIUOU7oqGTLieV9uTIFMm6_4PXg-hlN6F) by [James Briggs](https://www.youtube.com/@jamesbriggs)
- #1 [Getting Started with `GPT-3` vs. Open Source LLMs](https://youtu.be/nE2skSRWTTs)
- #2 [Prompt Templates for `GPT 3.5` and other LLMs](https://youtu.be/RflBcK0oDH0)
- #3 [LLM Chains using `GPT 3.5` and other LLMs](https://youtu.be/S8j9Tk0lZHU)
- [LangChain Data Loaders, Tokenizers, Chunking, and Datasets - Data Prep 101](https://youtu.be/eqOfr4AGLk8)
- #4 [Chatbot Memory for `Chat-GPT`, `Davinci` + other LLMs](https://youtu.be/X05uK0TZozM)
- #5 [Chat with OpenAI in LangChain](https://youtu.be/CnAgB3A5OlU)
- #6 [Fixing LLM Hallucinations with Retrieval Augmentation in LangChain](https://youtu.be/kvdVduIJsc8)
- #7 [LangChain Agents Deep Dive with `GPT 3.5`](https://youtu.be/jSP-gSEyVeI)
- #8 [Create Custom Tools for Chatbots in LangChain](https://youtu.be/q-HNphrWsDE)
- #9 [Build Conversational Agents with Vector DBs](https://youtu.be/H6bCqqw9xyI)
- [Using NEW `MPT-7B` in Hugging Face and LangChain](https://youtu.be/DXpk9K7DgMo)
- ⛓ [`MPT-30B` Chatbot with LangChain](https://youtu.be/pnem-EhT6VI)
### [LangChain 101](https://www.youtube.com/playlist?list=PLqZXAkvF1bPNQER9mLmDbntNfSpzdDIU5) by [Greg Kamradt (Data Indy)](https://www.youtube.com/@DataIndependent)
- [What Is LangChain? - LangChain + `ChatGPT` Overview](https://youtu.be/_v_fgW2SkkQ)
- [Quickstart Guide](https://youtu.be/kYRB-vJFy38)
- [Beginner Guide To 7 Essential Concepts](https://youtu.be/2xxziIWmaSA)
- [Beginner Guide To 9 Use Cases](https://youtu.be/vGP4pQdCocw)
- [Agents Overview + Google Searches](https://youtu.be/Jq9Sf68ozk0)
- [`OpenAI` + `Wolfram Alpha`](https://youtu.be/UijbzCIJ99g)
- [Ask Questions On Your Custom (or Private) Files](https://youtu.be/EnT-ZTrcPrg)
- [Connect `Google Drive Files` To `OpenAI`](https://youtu.be/IqqHqDcXLww)
- [`YouTube Transcripts` + `OpenAI`](https://youtu.be/pNcQ5XXMgH4)
- [Question A 300 Page Book (w/ `OpenAI` + `Pinecone`)](https://youtu.be/h0DHDp1FbmQ)
- [Workaround `OpenAI's` Token Limit With Chain Types](https://youtu.be/f9_BWhCI4Zo)
- [Build Your Own OpenAI + LangChain Web App in 23 Minutes](https://youtu.be/U_eV8wfMkXU)
- [Working With The New `ChatGPT API`](https://youtu.be/e9P7FLi5Zy8)
- [OpenAI + LangChain Wrote Me 100 Custom Sales Emails](https://youtu.be/y1pyAQM-3Bo)
- [Structured Output From `OpenAI` (Clean Dirty Data)](https://youtu.be/KwAXfey-xQk)
- [Connect `OpenAI` To +5,000 Tools (LangChain + `Zapier`)](https://youtu.be/7tNm0yiDigU)
- [Use LLMs To Extract Data From Text (Expert Mode)](https://youtu.be/xZzvwR9jdPA)
- [Extract Insights From Interview Transcripts Using LLMs](https://youtu.be/shkMOHwJ4SM)
- [5 Levels Of LLM Summarizing: Novice to Expert](https://youtu.be/qaPMdcCqtWk)
- [Control Tone & Writing Style Of Your LLM Output](https://youtu.be/miBG-a3FuhU)
- [Build Your Own `AI Twitter Bot` Using LLMs](https://youtu.be/yLWLDjT01q8)
- [ChatGPT made my interview questions for me (`Streamlit` + LangChain)](https://youtu.be/zvoAMx0WKkw)
- [Function Calling via ChatGPT API - First Look With LangChain](https://youtu.be/0-zlUy7VUjg)
- ⛓ [Extract Topics From Video/Audio With LLMs (Topic Modeling w/ LangChain)](https://youtu.be/pEkxRQFNAs4)
### [LangChain How to and guides](https://www.youtube.com/playlist?list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ) by [Sam Witteveen](https://www.youtube.com/@samwitteveenai)
- [LangChain Basics - LLMs & PromptTemplates with Colab](https://youtu.be/J_0qvRt4LNk)
- [LangChain Basics - Tools and Chains](https://youtu.be/hI2BY7yl_Ac)
- [`ChatGPT API` Announcement & Code Walkthrough with LangChain](https://youtu.be/phHqvLHCwH4)
- [Conversations with Memory (explanation & code walkthrough)](https://youtu.be/X550Zbz_ROE)
- [Chat with `Flan20B`](https://youtu.be/VW5LBavIfY4)
- [Using `Hugging Face Models` locally (code walkthrough)](https://youtu.be/Kn7SX2Mx_Jk)
- [`PAL` : Program-aided Language Models with LangChain code](https://youtu.be/dy7-LvDu-3s)
- [Building a Summarization System with LangChain and `GPT-3` - Part 1](https://youtu.be/LNq_2s_H01Y)
- [Building a Summarization System with LangChain and `GPT-3` - Part 2](https://youtu.be/d-yeHDLgKHw)
- [Microsoft's `Visual ChatGPT` using LangChain](https://youtu.be/7YEiEyfPF5U)
- [LangChain Agents - Joining Tools and Chains with Decisions](https://youtu.be/ziu87EXZVUE)
- [Comparing LLMs with LangChain](https://youtu.be/rFNG0MIEuW0)
- [Using `Constitutional AI` in LangChain](https://youtu.be/uoVqNFDwpX4)
- [Talking to `Alpaca` with LangChain - Creating an Alpaca Chatbot](https://youtu.be/v6sF8Ed3nTE)
- [Talk to your `CSV` & `Excel` with LangChain](https://youtu.be/xQ3mZhw69bc)
- [`BabyAGI`: Discover the Power of Task-Driven Autonomous Agents!](https://youtu.be/QBcDLSE2ERA)
- [Improve your `BabyAGI` with LangChain](https://youtu.be/DRgPyOXZ-oE)
- [Master `PDF` Chat with LangChain - Your essential guide to queries on documents](https://youtu.be/ZzgUqFtxgXI)
- [Using LangChain with `DuckDuckGO` `Wikipedia` & `PythonREPL` Tools](https://youtu.be/KerHlb8nuVc)
- [Building Custom Tools and Agents with LangChain (gpt-3.5-turbo)](https://youtu.be/biS8G8x8DdA)
- [LangChain Retrieval QA Over Multiple Files with `ChromaDB`](https://youtu.be/3yPBVii7Ct0)
- [LangChain Retrieval QA with Instructor Embeddings & `ChromaDB` for PDFs](https://youtu.be/cFCGUjc33aU)
- [LangChain + Retrieval Local LLMs for Retrieval QA - No OpenAI!!!](https://youtu.be/9ISVjh8mdlA)
- [`Camel` + LangChain for Synthetic Data & Market Research](https://youtu.be/GldMMK6-_-g)
- [Information Extraction with LangChain & `Kor`](https://youtu.be/SW1ZdqH0rRQ)
- [Converting a LangChain App from OpenAI to OpenSource](https://youtu.be/KUDn7bVyIfc)
- [Using LangChain `Output Parsers` to get what you want out of LLMs](https://youtu.be/UVn2NroKQCw)
- [Building a LangChain Custom Medical Agent with Memory](https://youtu.be/6UFtRwWnHws)
- [Understanding `ReACT` with LangChain](https://youtu.be/Eug2clsLtFs)
- [`OpenAI Functions` + LangChain : Building a Multi Tool Agent](https://youtu.be/4KXK6c6TVXQ)
- [What can you do with 16K tokens in LangChain?](https://youtu.be/z2aCZBAtWXs)
- [Tagging and Extraction - Classification using `OpenAI Functions`](https://youtu.be/a8hMgIcUEnE)
- ⛓ [HOW to Make Conversational Form with LangChain](https://youtu.be/IT93On2LB5k)
### [LangChain](https://www.youtube.com/playlist?list=PLVEEucA9MYhOu89CX8H3MBZqayTbcCTMr) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
- [LangChain Crash Course — All You Need to Know to Build Powerful Apps with LLMs](https://youtu.be/5-fc4Tlgmro)
- [Working with MULTIPLE `PDF` Files in LangChain: `ChatGPT` for your Data](https://youtu.be/s5LhRdh5fu4)
- [`ChatGPT` for YOUR OWN `PDF` files with LangChain](https://youtu.be/TLf90ipMzfE)
- [Talk to YOUR DATA without OpenAI APIs: LangChain](https://youtu.be/wrD-fZvT6UI)
- [Langchain: PDF Chat App (GUI) | ChatGPT for Your PDF FILES](https://youtu.be/RIWbalZ7sTo)
- [LangFlow: Build Chatbots without Writing Code](https://youtu.be/KJ-ux3hre4s)
- [LangChain: Giving Memory to LLMs](https://youtu.be/dxO6pzlgJiY)
- [BEST OPEN Alternative to `OPENAI's EMBEDDINGs` for Retrieval QA: LangChain](https://youtu.be/ogEalPMUCSY)
### LangChain by [Chat with data](https://www.youtube.com/@chatwithdata)
- [LangChain Beginner's Tutorial for `Typescript`/`Javascript`](https://youtu.be/bH722QgRlhQ)
- [`GPT-4` Tutorial: How to Chat With Multiple `PDF` Files (~1000 pages of Tesla's 10-K Annual Reports)](https://youtu.be/Ix9WIZpArm0)
- [`GPT-4` & LangChain Tutorial: How to Chat With A 56-Page `PDF` Document (w/`Pinecone`)](https://youtu.be/ih9PBGVVOO4)
- [LangChain & Supabase Tutorial: How to Build a ChatGPT Chatbot For Your Website](https://youtu.be/R2FMzcsmQY8)
- [LangChain Agents: Build Personal Assistants For Your Data (Q&A with Harrison Chase and Mayo Oshin)](https://youtu.be/gVkF8cwfBLI)
---------------------
⛓ icon marks a new addition [last update 2023-07-05]

View File

@@ -138,7 +138,11 @@
},
{
"source": "/en/latest/integrations/databerry.html",
"destination": "/docs/ecosystem/integrations/databerry"
"destination": "/docs/ecosystem/integrations/chaindesk"
},
{
"source": "/docs/ecosystem/integrations/databerry",
"destination": "/docs/ecosystem/integrations/chaindesk"
},
{
"source": "/en/latest/integrations/databricks/databricks.html",
@@ -1330,7 +1334,11 @@
},
{
"source": "/en/latest/modules/indexes/retrievers/examples/databerry.html",
"destination": "/docs/modules/data_connection/retrievers/integrations/databerry"
"destination": "/docs/modules/data_connection/retrievers/integrations/chaindesk"
},
{
"source": "/docs/modules/data_connection/retrievers/integrations/databerry",
"destination": "/docs/modules/data_connection/retrievers/integrations/chaindesk"
},
{
"source": "/en/latest/modules/indexes/retrievers/examples/elastic_search_bm25.html",
@@ -1864,6 +1872,14 @@
"source": "/en/latest/modules/models/llms/integrations/writer.html",
"destination": "/docs/modules/model_io/models/llms/integrations/writer"
},
{
"source": "/en/latest/modules/prompts/output_parsers.html",
"destination": "/docs/modules/model_io/output_parsers/"
},
{
"source": "/docs/modules/prompts/output_parsers.html",
"destination": "/docs/modules/model_io/output_parsers/"
},
{
"source": "/en/latest/modules/prompts/output_parsers/examples/datetime.html",
"destination": "/docs/modules/model_io/output_parsers/datetime"
@@ -2117,4 +2133,4 @@
"destination": "/docs/:path*"
}
]
}
}

View File

@@ -1,6 +1,6 @@
# YouTube tutorials
# YouTube videos
This is a collection of `LangChain` videos on `YouTube`.
⛓ icon marks a new addition [last update 2023-06-20]
### [Official LangChain YouTube channel](https://www.youtube.com/@LangChain)
@@ -9,7 +9,6 @@ This is a collection of `LangChain` videos on `YouTube`.
- [LangChain and Weaviate with Harrison Chase and Bob van Luijt - Weaviate Podcast #36](https://youtu.be/lhby7Ql7hbk) by [Weaviate • Vector Database](https://www.youtube.com/@Weaviate)
- [LangChain Demo + Q&A with Harrison Chase](https://youtu.be/zaYTXQFR0_s?t=788) by [Full Stack Deep Learning](https://www.youtube.com/@FullStackDeepLearning)
- [LangChain Agents: Build Personal Assistants For Your Data (Q&A with Harrison Chase and Mayo Oshin)](https://youtu.be/gVkF8cwfBLI) by [Chat with data](https://www.youtube.com/@chatwithdata)
- ⛓️ [LangChain "Agents in Production" Webinar](https://youtu.be/k8GNCCs16F4) by [LangChain](https://www.youtube.com/@LangChain)
## Videos (sorted by views)
@@ -31,6 +30,9 @@ This is a collection of `LangChain` videos on `YouTube`.
- [`Weaviate` + LangChain for LLM apps presented by Erika Cardenas](https://youtu.be/7AGj4Td5Lgw) by [`Weaviate` • Vector Database](https://www.youtube.com/@Weaviate)
- [Langchain Overview — How to Use Langchain & `ChatGPT`](https://youtu.be/oYVYIq0lOtI) by [Python In Office](https://www.youtube.com/@pythoninoffice6568)
- [Langchain Overview - How to Use Langchain & `ChatGPT`](https://youtu.be/oYVYIq0lOtI) by [Python In Office](https://www.youtube.com/@pythoninoffice6568)
- [LangChain Tutorials](https://www.youtube.com/watch?v=FuqdVNB_8c0&list=PL9V0lbeJ69brU-ojMpU1Y7Ic58Tap0Cw6) by [Edrick](https://www.youtube.com/@edrickdch):
- [LangChain, Chroma DB, OpenAI Beginner Guide | ChatGPT with your PDF](https://youtu.be/FuqdVNB_8c0)
- [LangChain 101: The Complete Beginner's Guide](https://youtu.be/P3MAbZ2eMUI)
- [Custom langchain Agent & Tools with memory. Turn any `Python function` into langchain tool with Gpt 3](https://youtu.be/NIG8lXk0ULg) by [echohive](https://www.youtube.com/@echohive)
- [LangChain: Run Language Models Locally - `Hugging Face Models`](https://youtu.be/Xxxuw4_iCzw) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
- [`ChatGPT` with any `YouTube` video using langchain and `chromadb`](https://youtu.be/TQZfB2bzVwU) by [echohive](https://www.youtube.com/@echohive)
@@ -46,154 +48,68 @@ This is a collection of `LangChain` videos on `YouTube`.
- [Langchain + `Zapier` Agent](https://youtu.be/yribLAb-pxA) by [Merk](https://www.youtube.com/@merksworld)
- [Connecting the Internet with `ChatGPT` (LLMs) using Langchain And Answers Your Questions](https://youtu.be/9Y0TBC63yZg) by [Kamalraj M M](https://www.youtube.com/@insightbuilder)
- [Build More Powerful LLM Applications for Businesss with LangChain (Beginners Guide)](https://youtu.be/sp3-WLKEcBg) by[ No Code Blackbox](https://www.youtube.com/@nocodeblackbox)
- ⛓️ [LangFlow LLM Agent Demo for 🦜🔗LangChain](https://youtu.be/zJxDHaWt-6o) by [Cobus Greyling](https://www.youtube.com/@CobusGreylingZA)
- ⛓️ [Chatbot Factory: Streamline Python Chatbot Creation with LLMs and Langchain](https://youtu.be/eYer3uzrcuM) by [Finxter](https://www.youtube.com/@CobusGreylingZA)
- ⛓️ [LangChain Tutorial - ChatGPT mit eigenen Daten](https://youtu.be/0XDLyY90E2c) by [Coding Crashkurse](https://www.youtube.com/@codingcrashkurse6429)
- ⛓️ [Chat with a `CSV` | LangChain Agents Tutorial (Beginners)](https://youtu.be/tjeti5vXWOU) by [GoDataProf](https://www.youtube.com/@godataprof)
- ⛓️ [Introdução ao Langchain - #Cortes - Live DataHackers](https://youtu.be/fw8y5VRei5Y) by [Prof. João Gabriel Lima](https://www.youtube.com/@profjoaogabriellima)
- ⛓️ [LangChain: Level up `ChatGPT` !? | LangChain Tutorial Part 1](https://youtu.be/vxUGx8aZpDE) by [Code Affinity](https://www.youtube.com/@codeaffinitydev)
- ⛓️ [KI schreibt krasses Youtube Skript 😲😳 | LangChain Tutorial Deutsch](https://youtu.be/QpTiXyK1jus) by [SimpleKI](https://www.youtube.com/@simpleki)
- ⛓️ [Chat with Audio: Langchain, `Chroma DB`, OpenAI, and `Assembly AI`](https://youtu.be/Kjy7cx1r75g) by [AI Anytime](https://www.youtube.com/@AIAnytime)
- ⛓️ [QA over documents with Auto vector index selection with Langchain router chains](https://youtu.be/9G05qybShv8) by [echohive](https://www.youtube.com/@echohive)
- ⛓️ [Build your own custom LLM application with `Bubble.io` & Langchain (No Code & Beginner friendly)](https://youtu.be/O7NhQGu1m6c) by [No Code Blackbox](https://www.youtube.com/@nocodeblackbox)
- ⛓️ [Simple App to Question Your Docs: Leveraging `Streamlit`, `Hugging Face Spaces`, LangChain, and `Claude`!](https://youtu.be/X4YbNECRr7o) by [Chris Alexiuk](https://www.youtube.com/@chrisalexiuk)
- ⛓️ [LANGCHAIN AI- `ConstitutionalChainAI` + Databutton AI ASSISTANT Web App](https://youtu.be/5zIU6_rdJCU) by [Avra](https://www.youtube.com/@Avra_b)
- ⛓️ [LANGCHAIN AI AUTONOMOUS AGENT WEB APP - 👶 `BABY AGI` 🤖 with EMAIL AUTOMATION using `DATABUTTON`](https://youtu.be/cvAwOGfeHgw) by [Avra](https://www.youtube.com/@Avra_b)
- ⛓️ [The Future of Data Analysis: Using A.I. Models in Data Analysis (LangChain)](https://youtu.be/v_LIcVyg5dk) by [Absent Data](https://www.youtube.com/@absentdata)
- ⛓️ [Memory in LangChain | Deep dive (python)](https://youtu.be/70lqvTFh_Yg) by [Eden Marco](https://www.youtube.com/@EdenMarco)
- ⛓️ [9 LangChain UseCases | Beginner's Guide | 2023](https://youtu.be/zS8_qosHNMw) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
- ⛓️ [Use Large Language Models in Jupyter Notebook | LangChain | Agents & Indexes](https://youtu.be/JSe11L1a_QQ) by [Abhinaw Tiwari](https://www.youtube.com/@AbhinawTiwariAT)
- ⛓️ [How to Talk to Your Langchain Agent | `11 Labs` + `Whisper`](https://youtu.be/N4k459Zw2PU) by [VRSEN](https://www.youtube.com/@vrsen)
- ⛓️ [LangChain Deep Dive: 5 FUN AI App Ideas To Build Quickly and Easily](https://youtu.be/mPYEPzLkeks) by [James NoCode](https://www.youtube.com/@jamesnocode)
- ⛓️ [BEST OPEN Alternative to OPENAI's EMBEDDINGs for Retrieval QA: LangChain](https://youtu.be/ogEalPMUCSY) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
- ⛓️ [LangChain 101: Models](https://youtu.be/T6c_XsyaNSQ) by [Mckay Wrigley](https://www.youtube.com/@realmckaywrigley)
- ⛓️ [LangChain with JavaScript Tutorial #1 | Setup & Using LLMs](https://youtu.be/W3AoeMrg27o) by [Leon van Zyl](https://www.youtube.com/@leonvanzyl)
- ⛓️ [LangChain Overview & Tutorial for Beginners: Build Powerful AI Apps Quickly & Easily (ZERO CODE)](https://youtu.be/iI84yym473Q) by [James NoCode](https://www.youtube.com/@jamesnocode)
- ⛓️ [LangChain In Action: Real-World Use Case With Step-by-Step Tutorial](https://youtu.be/UO699Szp82M) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
- ⛓️ [Summarizing and Querying Multiple Papers with LangChain](https://youtu.be/p_MQRWH5Y6k) by [Automata Learning Lab](https://www.youtube.com/@automatalearninglab)
- ⛓️ [Using Langchain (and `Replit`) through `Tana`, ask `Google`/`Wikipedia`/`Wolfram Alpha` to fill out a table](https://youtu.be/Webau9lEzoI) by [Stian Håklev](https://www.youtube.com/@StianHaklev)
- ⛓️ [Langchain PDF App (GUI) | Create a ChatGPT For Your `PDF` in Python](https://youtu.be/wUAUdEw5oxM) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
- ⛓️ [Auto-GPT with LangChain 🔥 | Create Your Own Personal AI Assistant](https://youtu.be/imDfPmMKEjM) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
- ⛓️ [Create Your OWN Slack AI Assistant with Python & LangChain](https://youtu.be/3jFXRNn2Bu8) by [Dave Ebbelaar](https://www.youtube.com/@daveebbelaar)
- ⛓️ [How to Create LOCAL Chatbots with GPT4All and LangChain [Full Guide]](https://youtu.be/4p1Fojur8Zw) by [Liam Ottley](https://www.youtube.com/@LiamOttley)
- ⛓️ [Build a `Multilingual PDF` Search App with LangChain, `Cohere` and `Bubble`](https://youtu.be/hOrtuumOrv8) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
- ⛓️ [Building a LangChain Agent (code-free!) Using `Bubble` and `Flowise`](https://youtu.be/jDJIIVWTZDE) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
- ⛓️ [Build a LangChain-based Semantic PDF Search App with No-Code Tools Bubble and Flowise](https://youtu.be/s33v5cIeqA4) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
- ⛓️ [LangChain Memory Tutorial | Building a ChatGPT Clone in Python](https://youtu.be/Cwq91cj2Pnc) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
- ⛓️ [ChatGPT For Your DATA | Chat with Multiple Documents Using LangChain](https://youtu.be/TeDgIDqQmzs) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
- ⛓️ [`Llama Index`: Chat with Documentation using URL Loader](https://youtu.be/XJRoDEctAwA) by [Merk](https://www.youtube.com/@merksworld)
- ⛓️ [Using OpenAI, LangChain, and `Gradio` to Build Custom GenAI Applications](https://youtu.be/1MsmqMg3yUc) by [David Hundley](https://www.youtube.com/@dkhundley)
- ⛓️ [LangChain, Chroma DB, OpenAI Beginner Guide | ChatGPT with your PDF](https://youtu.be/FuqdVNB_8c0)
- [LangChain Crash Course: Build an AutoGPT app in 25 minutes](https://youtu.be/MlK6SIjcjE8) by [Nicholas Renotte](https://www.youtube.com/@NicholasRenotte)
- [LangChain Crash Course - Build apps with language models](https://youtu.be/LbT1yp6quS8) by [Patrick Loeber](https://www.youtube.com/@patloeber)
- [LangChain Explained in 13 Minutes | QuickStart Tutorial for Beginners](https://youtu.be/aywZrzNaKjs) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
- [LangFlow LLM Agent Demo for 🦜🔗LangChain](https://youtu.be/zJxDHaWt-6o) by [Cobus Greyling](https://www.youtube.com/@CobusGreylingZA)
- [Chatbot Factory: Streamline Python Chatbot Creation with LLMs and Langchain](https://youtu.be/eYer3uzrcuM) by [Finxter](https://www.youtube.com/@CobusGreylingZA)
- [LangChain Tutorial - ChatGPT mit eigenen Daten](https://youtu.be/0XDLyY90E2c) by [Coding Crashkurse](https://www.youtube.com/@codingcrashkurse6429)
- [Chat with a `CSV` | LangChain Agents Tutorial (Beginners)](https://youtu.be/tjeti5vXWOU) by [GoDataProf](https://www.youtube.com/@godataprof)
- [Introdução ao Langchain - #Cortes - Live DataHackers](https://youtu.be/fw8y5VRei5Y) by [Prof. João Gabriel Lima](https://www.youtube.com/@profjoaogabriellima)
- [LangChain: Level up `ChatGPT` !? | LangChain Tutorial Part 1](https://youtu.be/vxUGx8aZpDE) by [Code Affinity](https://www.youtube.com/@codeaffinitydev)
- [KI schreibt krasses Youtube Skript 😲😳 | LangChain Tutorial Deutsch](https://youtu.be/QpTiXyK1jus) by [SimpleKI](https://www.youtube.com/@simpleki)
- [Chat with Audio: Langchain, `Chroma DB`, OpenAI, and `Assembly AI`](https://youtu.be/Kjy7cx1r75g) by [AI Anytime](https://www.youtube.com/@AIAnytime)
- [QA over documents with Auto vector index selection with Langchain router chains](https://youtu.be/9G05qybShv8) by [echohive](https://www.youtube.com/@echohive)
- [Build your own custom LLM application with `Bubble.io` & Langchain (No Code & Beginner friendly)](https://youtu.be/O7NhQGu1m6c) by [No Code Blackbox](https://www.youtube.com/@nocodeblackbox)
- [Simple App to Question Your Docs: Leveraging `Streamlit`, `Hugging Face Spaces`, LangChain, and `Claude`!](https://youtu.be/X4YbNECRr7o) by [Chris Alexiuk](https://www.youtube.com/@chrisalexiuk)
- [LANGCHAIN AI- `ConstitutionalChainAI` + Databutton AI ASSISTANT Web App](https://youtu.be/5zIU6_rdJCU) by [Avra](https://www.youtube.com/@Avra_b)
- [LANGCHAIN AI AUTONOMOUS AGENT WEB APP - 👶 `BABY AGI` 🤖 with EMAIL AUTOMATION using `DATABUTTON`](https://youtu.be/cvAwOGfeHgw) by [Avra](https://www.youtube.com/@Avra_b)
- [The Future of Data Analysis: Using A.I. Models in Data Analysis (LangChain)](https://youtu.be/v_LIcVyg5dk) by [Absent Data](https://www.youtube.com/@absentdata)
- [Memory in LangChain | Deep dive (python)](https://youtu.be/70lqvTFh_Yg) by [Eden Marco](https://www.youtube.com/@EdenMarco)
- [9 LangChain UseCases | Beginner's Guide | 2023](https://youtu.be/zS8_qosHNMw) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
- [Use Large Language Models in Jupyter Notebook | LangChain | Agents & Indexes](https://youtu.be/JSe11L1a_QQ) by [Abhinaw Tiwari](https://www.youtube.com/@AbhinawTiwariAT)
- [How to Talk to Your Langchain Agent | `11 Labs` + `Whisper`](https://youtu.be/N4k459Zw2PU) by [VRSEN](https://www.youtube.com/@vrsen)
- [LangChain Deep Dive: 5 FUN AI App Ideas To Build Quickly and Easily](https://youtu.be/mPYEPzLkeks) by [James NoCode](https://www.youtube.com/@jamesnocode)
- [BEST OPEN Alternative to OPENAI's EMBEDDINGs for Retrieval QA: LangChain](https://youtu.be/ogEalPMUCSY) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
- [LangChain 101: Models](https://youtu.be/T6c_XsyaNSQ) by [Mckay Wrigley](https://www.youtube.com/@realmckaywrigley)
- [LangChain with JavaScript Tutorial #1 | Setup & Using LLMs](https://youtu.be/W3AoeMrg27o) by [Leon van Zyl](https://www.youtube.com/@leonvanzyl)
- [LangChain Overview & Tutorial for Beginners: Build Powerful AI Apps Quickly & Easily (ZERO CODE)](https://youtu.be/iI84yym473Q) by [James NoCode](https://www.youtube.com/@jamesnocode)
- [LangChain In Action: Real-World Use Case With Step-by-Step Tutorial](https://youtu.be/UO699Szp82M) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
- [Summarizing and Querying Multiple Papers with LangChain](https://youtu.be/p_MQRWH5Y6k) by [Automata Learning Lab](https://www.youtube.com/@automatalearninglab)
- [Using Langchain (and `Replit`) through `Tana`, ask `Google`/`Wikipedia`/`Wolfram Alpha` to fill out a table](https://youtu.be/Webau9lEzoI) by [Stian Håklev](https://www.youtube.com/@StianHaklev)
- [Langchain PDF App (GUI) | Create a ChatGPT For Your `PDF` in Python](https://youtu.be/wUAUdEw5oxM) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
- [Auto-GPT with LangChain 🔥 | Create Your Own Personal AI Assistant](https://youtu.be/imDfPmMKEjM) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
- [Create Your OWN Slack AI Assistant with Python & LangChain](https://youtu.be/3jFXRNn2Bu8) by [Dave Ebbelaar](https://www.youtube.com/@daveebbelaar)
- [How to Create LOCAL Chatbots with GPT4All and LangChain [Full Guide]](https://youtu.be/4p1Fojur8Zw) by [Liam Ottley](https://www.youtube.com/@LiamOttley)
- [Build a `Multilingual PDF` Search App with LangChain, `Cohere` and `Bubble`](https://youtu.be/hOrtuumOrv8) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
- [Building a LangChain Agent (code-free!) Using `Bubble` and `Flowise`](https://youtu.be/jDJIIVWTZDE) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
- [Build a LangChain-based Semantic PDF Search App with No-Code Tools Bubble and Flowise](https://youtu.be/s33v5cIeqA4) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
- [LangChain Memory Tutorial | Building a ChatGPT Clone in Python](https://youtu.be/Cwq91cj2Pnc) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
- [ChatGPT For Your DATA | Chat with Multiple Documents Using LangChain](https://youtu.be/TeDgIDqQmzs) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
- [`Llama Index`: Chat with Documentation using URL Loader](https://youtu.be/XJRoDEctAwA) by [Merk](https://www.youtube.com/@merksworld)
- [Using OpenAI, LangChain, and `Gradio` to Build Custom GenAI Applications](https://youtu.be/1MsmqMg3yUc) by [David Hundley](https://www.youtube.com/@dkhundley)
- [LangChain, Chroma DB, OpenAI Beginner Guide | ChatGPT with your PDF](https://youtu.be/FuqdVNB_8c0)
- ⛓ [Build AI chatbot with custom knowledge base using OpenAI API and GPT Index](https://youtu.be/vDZAZuaXf48) by [Irina Nik](https://www.youtube.com/@irina_nik)
- ⛓ [Build Your Own Auto-GPT Apps with LangChain (Python Tutorial)](https://youtu.be/NYSWn1ipbgg) by [Dave Ebbelaar](https://www.youtube.com/@daveebbelaar)
- ⛓ [Chat with Multiple `PDFs` | LangChain App Tutorial in Python (Free LLMs and Embeddings)](https://youtu.be/dXxQ0LR-3Hg) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
- ⛓ [Chat with a `CSV` | `LangChain Agents` Tutorial (Beginners)](https://youtu.be/tjeti5vXWOU) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
- ⛓ [Create Your Own ChatGPT with `PDF` Data in 5 Minutes (LangChain Tutorial)](https://youtu.be/au2WVVGUvc8) by [Liam Ottley](https://www.youtube.com/@LiamOttley)
- ⛓ [Using ChatGPT with YOUR OWN Data. This is magical. (LangChain OpenAI API)](https://youtu.be/9AXP7tCI9PI) by [TechLead](https://www.youtube.com/@TechLead)
- ⛓ [Build a Custom Chatbot with OpenAI: `GPT-Index` & LangChain | Step-by-Step Tutorial](https://youtu.be/FIDv6nc4CgU) by [Fabrikod](https://www.youtube.com/@fabrikod)
- ⛓ [`Flowise` is an open source no-code UI visual tool to build 🦜🔗LangChain applications](https://youtu.be/CovAPtQPU0k) by [Cobus Greyling](https://www.youtube.com/@CobusGreylingZA)
- ⛓ [LangChain & GPT 4 For Data Analysis: The `Pandas` Dataframe Agent](https://youtu.be/rFQ5Kmkd4jc) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
- ⛓ [`GirlfriendGPT` - AI girlfriend with LangChain](https://youtu.be/LiN3D1QZGQw) by [Toolfinder AI](https://www.youtube.com/@toolfinderai)
- ⛓ [`PrivateGPT`: Chat to your FILES OFFLINE and FREE [Installation and Tutorial]](https://youtu.be/G7iLllmx4qc) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
- ⛓ [How to build with Langchain 10x easier | ⛓️ LangFlow & `Flowise`](https://youtu.be/Ya1oGL7ZTvU) by [AI Jason](https://www.youtube.com/@AIJasonZ)
- ⛓ [Getting Started With LangChain In 20 Minutes- Build Celebrity Search Application](https://youtu.be/_FpT1cwcSLg) by [Krish Naik](https://www.youtube.com/@krishnaik06)
## Tutorial Series
⛓ icon marks a new addition [last update 2023-05-15]
### DeepLearning.AI course
⛓[LangChain for LLM Application Development](https://learn.deeplearning.ai/langchain) by Harrison Chase presented by [Andrew Ng](https://en.wikipedia.org/wiki/Andrew_Ng)
### Handbook
[LangChain AI Handbook](https://www.pinecone.io/learn/langchain/) By **James Briggs** and **Francisco Ingham**
### Tutorials
[LangChain Tutorials](https://www.youtube.com/watch?v=FuqdVNB_8c0&list=PL9V0lbeJ69brU-ojMpU1Y7Ic58Tap0Cw6) by [Edrick](https://www.youtube.com/@edrickdch):
- ⛓ [LangChain, Chroma DB, OpenAI Beginner Guide | ChatGPT with your PDF](https://youtu.be/FuqdVNB_8c0)
- ⛓ [LangChain 101: The Complete Beginner's Guide](https://youtu.be/P3MAbZ2eMUI)
[LangChain Crash Course: Build an AutoGPT app in 25 minutes](https://youtu.be/MlK6SIjcjE8) by [Nicholas Renotte](https://www.youtube.com/@NicholasRenotte)
[LangChain Crash Course - Build apps with language models](https://youtu.be/LbT1yp6quS8) by [Patrick Loeber](https://www.youtube.com/@patloeber)
[LangChain Explained in 13 Minutes | QuickStart Tutorial for Beginners](https://youtu.be/aywZrzNaKjs) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
### [LangChain for Gen AI and LLMs](https://www.youtube.com/playlist?list=PLIUOU7oqGTLieV9uTIFMm6_4PXg-hlN6F) by [James Briggs](https://www.youtube.com/@jamesbriggs):
- #1 [Getting Started with `GPT-3` vs. Open Source LLMs](https://youtu.be/nE2skSRWTTs)
- #2 [Prompt Templates for `GPT 3.5` and other LLMs](https://youtu.be/RflBcK0oDH0)
- #3 [LLM Chains using `GPT 3.5` and other LLMs](https://youtu.be/S8j9Tk0lZHU)
- #4 [Chatbot Memory for `Chat-GPT`, `Davinci` + other LLMs](https://youtu.be/X05uK0TZozM)
- #5 [Chat with OpenAI in LangChain](https://youtu.be/CnAgB3A5OlU)
- ⛓ #6 [Fixing LLM Hallucinations with Retrieval Augmentation in LangChain](https://youtu.be/kvdVduIJsc8)
- ⛓ #7 [LangChain Agents Deep Dive with GPT 3.5](https://youtu.be/jSP-gSEyVeI)
- ⛓ #8 [Create Custom Tools for Chatbots in LangChain](https://youtu.be/q-HNphrWsDE)
- ⛓ #9 [Build Conversational Agents with Vector DBs](https://youtu.be/H6bCqqw9xyI)
### [LangChain 101](https://www.youtube.com/playlist?list=PLqZXAkvF1bPNQER9mLmDbntNfSpzdDIU5) by [Data Independent](https://www.youtube.com/@DataIndependent):
- [What Is LangChain? - LangChain + `ChatGPT` Overview](https://youtu.be/_v_fgW2SkkQ)
- [Quickstart Guide](https://youtu.be/kYRB-vJFy38)
- [Beginner Guide To 7 Essential Concepts](https://youtu.be/2xxziIWmaSA)
- [`OpenAI` + `Wolfram Alpha`](https://youtu.be/UijbzCIJ99g)
- [Ask Questions On Your Custom (or Private) Files](https://youtu.be/EnT-ZTrcPrg)
- [Connect `Google Drive Files` To `OpenAI`](https://youtu.be/IqqHqDcXLww)
- [`YouTube Transcripts` + `OpenAI`](https://youtu.be/pNcQ5XXMgH4)
- [Question A 300 Page Book (w/ `OpenAI` + `Pinecone`)](https://youtu.be/h0DHDp1FbmQ)
- [Workaround `OpenAI's` Token Limit With Chain Types](https://youtu.be/f9_BWhCI4Zo)
- [Build Your Own OpenAI + LangChain Web App in 23 Minutes](https://youtu.be/U_eV8wfMkXU)
- [Working With The New `ChatGPT API`](https://youtu.be/e9P7FLi5Zy8)
- [OpenAI + LangChain Wrote Me 100 Custom Sales Emails](https://youtu.be/y1pyAQM-3Bo)
- [Structured Output From `OpenAI` (Clean Dirty Data)](https://youtu.be/KwAXfey-xQk)
- [Connect `OpenAI` To +5,000 Tools (LangChain + `Zapier`)](https://youtu.be/7tNm0yiDigU)
- [Use LLMs To Extract Data From Text (Expert Mode)](https://youtu.be/xZzvwR9jdPA)
- ⛓ [Extract Insights From Interview Transcripts Using LLMs](https://youtu.be/shkMOHwJ4SM)
- ⛓ [5 Levels Of LLM Summarizing: Novice to Expert](https://youtu.be/qaPMdcCqtWk)
### [LangChain How to and guides](https://www.youtube.com/playlist?list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ) by [Sam Witteveen](https://www.youtube.com/@samwitteveenai):
- [LangChain Basics - LLMs & PromptTemplates with Colab](https://youtu.be/J_0qvRt4LNk)
- [LangChain Basics - Tools and Chains](https://youtu.be/hI2BY7yl_Ac)
- [`ChatGPT API` Announcement & Code Walkthrough with LangChain](https://youtu.be/phHqvLHCwH4)
- [Conversations with Memory (explanation & code walkthrough)](https://youtu.be/X550Zbz_ROE)
- [Chat with `Flan20B`](https://youtu.be/VW5LBavIfY4)
- [Using `Hugging Face Models` locally (code walkthrough)](https://youtu.be/Kn7SX2Mx_Jk)
- [`PAL` : Program-aided Language Models with LangChain code](https://youtu.be/dy7-LvDu-3s)
- [Building a Summarization System with LangChain and `GPT-3` - Part 1](https://youtu.be/LNq_2s_H01Y)
- [Building a Summarization System with LangChain and `GPT-3` - Part 2](https://youtu.be/d-yeHDLgKHw)
- [Microsoft's `Visual ChatGPT` using LangChain](https://youtu.be/7YEiEyfPF5U)
- [LangChain Agents - Joining Tools and Chains with Decisions](https://youtu.be/ziu87EXZVUE)
- [Comparing LLMs with LangChain](https://youtu.be/rFNG0MIEuW0)
- [Using `Constitutional AI` in LangChain](https://youtu.be/uoVqNFDwpX4)
- [Talking to `Alpaca` with LangChain - Creating an Alpaca Chatbot](https://youtu.be/v6sF8Ed3nTE)
- [Talk to your `CSV` & `Excel` with LangChain](https://youtu.be/xQ3mZhw69bc)
- [`BabyAGI`: Discover the Power of Task-Driven Autonomous Agents!](https://youtu.be/QBcDLSE2ERA)
- [Improve your `BabyAGI` with LangChain](https://youtu.be/DRgPyOXZ-oE)
- ⛓ [Master `PDF` Chat with LangChain - Your essential guide to queries on documents](https://youtu.be/ZzgUqFtxgXI)
- ⛓ [Using LangChain with `DuckDuckGO` `Wikipedia` & `PythonREPL` Tools](https://youtu.be/KerHlb8nuVc)
- ⛓ [Building Custom Tools and Agents with LangChain (gpt-3.5-turbo)](https://youtu.be/biS8G8x8DdA)
- ⛓ [LangChain Retrieval QA Over Multiple Files with `ChromaDB`](https://youtu.be/3yPBVii7Ct0)
- ⛓ [LangChain Retrieval QA with Instructor Embeddings & `ChromaDB` for PDFs](https://youtu.be/cFCGUjc33aU)
- ⛓ [LangChain + Retrieval Local LLMs for Retrieval QA - No OpenAI!!!](https://youtu.be/9ISVjh8mdlA)
### [LangChain](https://www.youtube.com/playlist?list=PLVEEucA9MYhOu89CX8H3MBZqayTbcCTMr) by [Prompt Engineering](https://www.youtube.com/@engineerprompt):
- [LangChain Crash Course — All You Need to Know to Build Powerful Apps with LLMs](https://youtu.be/5-fc4Tlgmro)
- [Working with MULTIPLE `PDF` Files in LangChain: `ChatGPT` for your Data](https://youtu.be/s5LhRdh5fu4)
- [`ChatGPT` for YOUR OWN `PDF` files with LangChain](https://youtu.be/TLf90ipMzfE)
- [Talk to YOUR DATA without OpenAI APIs: LangChain](https://youtu.be/wrD-fZvT6UI)
- ⛓️ [CHATGPT For WEBSITES: Custom ChatBOT](https://youtu.be/RBnuhhmD21U)
### LangChain by [Chat with data](https://www.youtube.com/@chatwithdata)
- [LangChain Beginner's Tutorial for `Typescript`/`Javascript`](https://youtu.be/bH722QgRlhQ)
- [`GPT-4` Tutorial: How to Chat With Multiple `PDF` Files (~1000 pages of Tesla's 10-K Annual Reports)](https://youtu.be/Ix9WIZpArm0)
- [`GPT-4` & LangChain Tutorial: How to Chat With A 56-Page `PDF` Document (w/`Pinecone`)](https://youtu.be/ih9PBGVVOO4)
- ⛓ [LangChain & Supabase Tutorial: How to Build a ChatGPT Chatbot For Your Website](https://youtu.be/R2FMzcsmQY8)
### [Get SH\*T Done with Prompt Engineering and LangChain](https://www.youtube.com/watch?v=muXbPpG_ys4&list=PLEJK-H61Xlwzm5FYLDdKt_6yibO33zoMW) by [Venelin Valkov](https://www.youtube.com/@venelin_valkov)
### [Prompt Engineering and LangChain](https://www.youtube.com/watch?v=muXbPpG_ys4&list=PLEJK-H61Xlwzm5FYLDdKt_6yibO33zoMW) by [Venelin Valkov](https://www.youtube.com/@venelin_valkov)
- [Getting Started with LangChain: Load Custom Data, Run OpenAI Models, Embeddings and `ChatGPT`](https://www.youtube.com/watch?v=muXbPpG_ys4)
- [Loaders, Indexes & Vectorstores in LangChain: Question Answering on `PDF` files with `ChatGPT`](https://www.youtube.com/watch?v=FQnvfR8Dmr0)
- [LangChain Models: `ChatGPT`, `Flan Alpaca`, `OpenAI Embeddings`, Prompt Templates & Streaming](https://www.youtube.com/watch?v=zy6LiK5F5-s)
- [LangChain Chains: Use `ChatGPT` to Build Conversational Agents, Summaries and Q&A on Text With LLMs](https://www.youtube.com/watch?v=h1tJZQPcimM)
- [Analyze Custom CSV Data with `GPT-4` using Langchain](https://www.youtube.com/watch?v=Ew3sGdX8at4)
- [Build ChatGPT Chatbots with LangChain Memory: Understanding and Implementing Memory in Conversations](https://youtu.be/CyuUlf54wTs)
- [Build ChatGPT Chatbots with LangChain Memory: Understanding and Implementing Memory in Conversations](https://youtu.be/CyuUlf54wTs)
---------------------
⛓ icon marks a new addition [last update 2023-05-15]
⛓ icon marks a new addition [last update 2023-06-20]

View File

@@ -2,188 +2,261 @@
Dependents stats for `hwchase17/langchain`
[![](https://img.shields.io/static/v1?label=Used%20by&message=5152&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
[![](https://img.shields.io/static/v1?label=Used%20by%20(public)&message=172&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
[![](https://img.shields.io/static/v1?label=Used%20by%20(private)&message=4980&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
[![](https://img.shields.io/static/v1?label=Used%20by%20(stars)&message=17239&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
[![](https://img.shields.io/static/v1?label=Used%20by&message=9941&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
[![](https://img.shields.io/static/v1?label=Used%20by%20(public)&message=244&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
[![](https://img.shields.io/static/v1?label=Used%20by%20(private)&message=9697&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
[![](https://img.shields.io/static/v1?label=Used%20by%20(stars)&message=19827&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
[update: 2023-05-17; only dependent repositories with Stars > 100]
[update: 2023-07-07; only dependent repositories with Stars > 100]
| Repository | Stars |
| :-------- | -----: |
|[openai/openai-cookbook](https://github.com/openai/openai-cookbook) | 35401 |
|[LAION-AI/Open-Assistant](https://github.com/LAION-AI/Open-Assistant) | 32861 |
|[microsoft/TaskMatrix](https://github.com/microsoft/TaskMatrix) | 32766 |
|[hpcaitech/ColossalAI](https://github.com/hpcaitech/ColossalAI) | 29560 |
|[reworkd/AgentGPT](https://github.com/reworkd/AgentGPT) | 22315 |
|[imartinez/privateGPT](https://github.com/imartinez/privateGPT) | 17474 |
|[openai/chatgpt-retrieval-plugin](https://github.com/openai/chatgpt-retrieval-plugin) | 16923 |
|[mindsdb/mindsdb](https://github.com/mindsdb/mindsdb) | 16112 |
|[jerryjliu/llama_index](https://github.com/jerryjliu/llama_index) | 15407 |
|[mlflow/mlflow](https://github.com/mlflow/mlflow) | 14345 |
|[GaiZhenbiao/ChuanhuChatGPT](https://github.com/GaiZhenbiao/ChuanhuChatGPT) | 10372 |
|[databrickslabs/dolly](https://github.com/databrickslabs/dolly) | 9919 |
|[AIGC-Audio/AudioGPT](https://github.com/AIGC-Audio/AudioGPT) | 8177 |
|[logspace-ai/langflow](https://github.com/logspace-ai/langflow) | 6807 |
|[imClumsyPanda/langchain-ChatGLM](https://github.com/imClumsyPanda/langchain-ChatGLM) | 6087 |
|[arc53/DocsGPT](https://github.com/arc53/DocsGPT) | 5292 |
|[e2b-dev/e2b](https://github.com/e2b-dev/e2b) | 4622 |
|[nsarrazin/serge](https://github.com/nsarrazin/serge) | 4076 |
|[madawei2699/myGPTReader](https://github.com/madawei2699/myGPTReader) | 3952 |
|[zauberzeug/nicegui](https://github.com/zauberzeug/nicegui) | 3952 |
|[go-skynet/LocalAI](https://github.com/go-skynet/LocalAI) | 3762 |
|[GreyDGL/PentestGPT](https://github.com/GreyDGL/PentestGPT) | 3388 |
|[mmabrouk/chatgpt-wrapper](https://github.com/mmabrouk/chatgpt-wrapper) | 3243 |
|[zilliztech/GPTCache](https://github.com/zilliztech/GPTCache) | 3189 |
|[wenda-LLM/wenda](https://github.com/wenda-LLM/wenda) | 3050 |
|[marqo-ai/marqo](https://github.com/marqo-ai/marqo) | 2930 |
|[gkamradt/langchain-tutorials](https://github.com/gkamradt/langchain-tutorials) | 2710 |
|[PrefectHQ/marvin](https://github.com/PrefectHQ/marvin) | 2545 |
|[project-baize/baize-chatbot](https://github.com/project-baize/baize-chatbot) | 2479 |
|[whitead/paper-qa](https://github.com/whitead/paper-qa) | 2399 |
|[langgenius/dify](https://github.com/langgenius/dify) | 2344 |
|[GerevAI/gerev](https://github.com/GerevAI/gerev) | 2283 |
|[hwchase17/chat-langchain](https://github.com/hwchase17/chat-langchain) | 2266 |
|[guangzhengli/ChatFiles](https://github.com/guangzhengli/ChatFiles) | 1903 |
|[Azure-Samples/azure-search-openai-demo](https://github.com/Azure-Samples/azure-search-openai-demo) | 1884 |
|[OpenBMB/BMTools](https://github.com/OpenBMB/BMTools) | 1860 |
|[Farama-Foundation/PettingZoo](https://github.com/Farama-Foundation/PettingZoo) | 1813 |
|[OpenGVLab/Ask-Anything](https://github.com/OpenGVLab/Ask-Anything) | 1571 |
|[IntelligenzaArtificiale/Free-Auto-GPT](https://github.com/IntelligenzaArtificiale/Free-Auto-GPT) | 1480 |
|[hwchase17/notion-qa](https://github.com/hwchase17/notion-qa) | 1464 |
|[NVIDIA/NeMo-Guardrails](https://github.com/NVIDIA/NeMo-Guardrails) | 1419 |
|[Unstructured-IO/unstructured](https://github.com/Unstructured-IO/unstructured) | 1410 |
|[Kav-K/GPTDiscord](https://github.com/Kav-K/GPTDiscord) | 1363 |
|[paulpierre/RasaGPT](https://github.com/paulpierre/RasaGPT) | 1344 |
|[StanGirard/quivr](https://github.com/StanGirard/quivr) | 1330 |
|[lunasec-io/lunasec](https://github.com/lunasec-io/lunasec) | 1318 |
|[vocodedev/vocode-python](https://github.com/vocodedev/vocode-python) | 1286 |
|[agiresearch/OpenAGI](https://github.com/agiresearch/OpenAGI) | 1156 |
|[h2oai/h2ogpt](https://github.com/h2oai/h2ogpt) | 1141 |
|[jina-ai/thinkgpt](https://github.com/jina-ai/thinkgpt) | 1106 |
|[yanqiangmiffy/Chinese-LangChain](https://github.com/yanqiangmiffy/Chinese-LangChain) | 1072 |
|[ttengwang/Caption-Anything](https://github.com/ttengwang/Caption-Anything) | 1064 |
|[jina-ai/dev-gpt](https://github.com/jina-ai/dev-gpt) | 1057 |
|[juncongmoo/chatllama](https://github.com/juncongmoo/chatllama) | 1003 |
|[greshake/llm-security](https://github.com/greshake/llm-security) | 1002 |
|[visual-openllm/visual-openllm](https://github.com/visual-openllm/visual-openllm) | 957 |
|[richardyc/Chrome-GPT](https://github.com/richardyc/Chrome-GPT) | 918 |
|[irgolic/AutoPR](https://github.com/irgolic/AutoPR) | 886 |
|[mmz-001/knowledge_gpt](https://github.com/mmz-001/knowledge_gpt) | 867 |
|[thomas-yanxin/LangChain-ChatGLM-Webui](https://github.com/thomas-yanxin/LangChain-ChatGLM-Webui) | 850 |
|[microsoft/X-Decoder](https://github.com/microsoft/X-Decoder) | 837 |
|[peterw/Chat-with-Github-Repo](https://github.com/peterw/Chat-with-Github-Repo) | 826 |
|[cirediatpl/FigmaChain](https://github.com/cirediatpl/FigmaChain) | 782 |
|[hashintel/hash](https://github.com/hashintel/hash) | 778 |
|[seanpixel/Teenage-AGI](https://github.com/seanpixel/Teenage-AGI) | 773 |
|[jina-ai/langchain-serve](https://github.com/jina-ai/langchain-serve) | 738 |
|[corca-ai/EVAL](https://github.com/corca-ai/EVAL) | 737 |
|[ai-sidekick/sidekick](https://github.com/ai-sidekick/sidekick) | 717 |
|[rlancemartin/auto-evaluator](https://github.com/rlancemartin/auto-evaluator) | 703 |
|[poe-platform/api-bot-tutorial](https://github.com/poe-platform/api-bot-tutorial) | 689 |
|[SamurAIGPT/Camel-AutoGPT](https://github.com/SamurAIGPT/Camel-AutoGPT) | 666 |
|[eyurtsev/kor](https://github.com/eyurtsev/kor) | 608 |
|[run-llama/llama-lab](https://github.com/run-llama/llama-lab) | 559 |
|[namuan/dr-doc-search](https://github.com/namuan/dr-doc-search) | 544 |
|[pieroit/cheshire-cat](https://github.com/pieroit/cheshire-cat) | 520 |
|[griptape-ai/griptape](https://github.com/griptape-ai/griptape) | 514 |
|[getmetal/motorhead](https://github.com/getmetal/motorhead) | 481 |
|[hwchase17/chat-your-data](https://github.com/hwchase17/chat-your-data) | 462 |
|[langchain-ai/langchain-aiplugin](https://github.com/langchain-ai/langchain-aiplugin) | 452 |
|[jina-ai/agentchain](https://github.com/jina-ai/agentchain) | 439 |
|[SamurAIGPT/ChatGPT-Developer-Plugins](https://github.com/SamurAIGPT/ChatGPT-Developer-Plugins) | 437 |
|[alexanderatallah/window.ai](https://github.com/alexanderatallah/window.ai) | 433 |
|[michaelthwan/searchGPT](https://github.com/michaelthwan/searchGPT) | 427 |
|[mpaepper/content-chatbot](https://github.com/mpaepper/content-chatbot) | 425 |
|[mckaywrigley/repo-chat](https://github.com/mckaywrigley/repo-chat) | 422 |
|[whyiyhw/chatgpt-wechat](https://github.com/whyiyhw/chatgpt-wechat) | 421 |
|[freddyaboulton/gradio-tools](https://github.com/freddyaboulton/gradio-tools) | 407 |
|[jonra1993/fastapi-alembic-sqlmodel-async](https://github.com/jonra1993/fastapi-alembic-sqlmodel-async) | 395 |
|[yeagerai/yeagerai-agent](https://github.com/yeagerai/yeagerai-agent) | 383 |
|[akshata29/chatpdf](https://github.com/akshata29/chatpdf) | 374 |
|[OpenGVLab/InternGPT](https://github.com/OpenGVLab/InternGPT) | 368 |
|[ruoccofabrizio/azure-open-ai-embeddings-qna](https://github.com/ruoccofabrizio/azure-open-ai-embeddings-qna) | 358 |
|[101dotxyz/GPTeam](https://github.com/101dotxyz/GPTeam) | 357 |
|[mtenenholtz/chat-twitter](https://github.com/mtenenholtz/chat-twitter) | 354 |
|[amosjyng/langchain-visualizer](https://github.com/amosjyng/langchain-visualizer) | 343 |
|[msoedov/langcorn](https://github.com/msoedov/langcorn) | 334 |
|[showlab/VLog](https://github.com/showlab/VLog) | 330 |
|[continuum-llms/chatgpt-memory](https://github.com/continuum-llms/chatgpt-memory) | 324 |
|[steamship-core/steamship-langchain](https://github.com/steamship-core/steamship-langchain) | 323 |
|[daodao97/chatdoc](https://github.com/daodao97/chatdoc) | 320 |
|[xuwenhao/geektime-ai-course](https://github.com/xuwenhao/geektime-ai-course) | 308 |
|[StevenGrove/GPT4Tools](https://github.com/StevenGrove/GPT4Tools) | 301 |
|[logan-markewich/llama_index_starter_pack](https://github.com/logan-markewich/llama_index_starter_pack) | 300 |
|[andylokandy/gpt-4-search](https://github.com/andylokandy/gpt-4-search) | 299 |
|[Anil-matcha/ChatPDF](https://github.com/Anil-matcha/ChatPDF) | 287 |
|[itamargol/openai](https://github.com/itamargol/openai) | 273 |
|[BlackHC/llm-strategy](https://github.com/BlackHC/llm-strategy) | 267 |
|[momegas/megabots](https://github.com/momegas/megabots) | 259 |
|[bborn/howdoi.ai](https://github.com/bborn/howdoi.ai) | 238 |
|[Cheems-Seminar/grounded-segment-any-parts](https://github.com/Cheems-Seminar/grounded-segment-any-parts) | 232 |
|[ur-whitelab/exmol](https://github.com/ur-whitelab/exmol) | 227 |
|[sullivan-sean/chat-langchainjs](https://github.com/sullivan-sean/chat-langchainjs) | 227 |
|[explosion/spacy-llm](https://github.com/explosion/spacy-llm) | 226 |
|[recalign/RecAlign](https://github.com/recalign/RecAlign) | 218 |
|[jupyterlab/jupyter-ai](https://github.com/jupyterlab/jupyter-ai) | 218 |
|[alvarosevilla95/autolang](https://github.com/alvarosevilla95/autolang) | 215 |
|[conceptofmind/toolformer](https://github.com/conceptofmind/toolformer) | 213 |
|[MagnivOrg/prompt-layer-library](https://github.com/MagnivOrg/prompt-layer-library) | 209 |
|[JohnSnowLabs/nlptest](https://github.com/JohnSnowLabs/nlptest) | 208 |
|[airobotlab/KoChatGPT](https://github.com/airobotlab/KoChatGPT) | 197 |
|[langchain-ai/auto-evaluator](https://github.com/langchain-ai/auto-evaluator) | 195 |
|[yvann-hub/Robby-chatbot](https://github.com/yvann-hub/Robby-chatbot) | 195 |
|[alejandro-ao/langchain-ask-pdf](https://github.com/alejandro-ao/langchain-ask-pdf) | 192 |
|[daveebbelaar/langchain-experiments](https://github.com/daveebbelaar/langchain-experiments) | 189 |
|[NimbleBoxAI/ChainFury](https://github.com/NimbleBoxAI/ChainFury) | 187 |
|[kaleido-lab/dolphin](https://github.com/kaleido-lab/dolphin) | 184 |
|[Anil-matcha/Website-to-Chatbot](https://github.com/Anil-matcha/Website-to-Chatbot) | 183 |
|[plchld/InsightFlow](https://github.com/plchld/InsightFlow) | 180 |
|[OpenBMB/AgentVerse](https://github.com/OpenBMB/AgentVerse) | 166 |
|[benthecoder/ClassGPT](https://github.com/benthecoder/ClassGPT) | 166 |
|[jbrukh/gpt-jargon](https://github.com/jbrukh/gpt-jargon) | 161 |
|[hardbyte/qabot](https://github.com/hardbyte/qabot) | 160 |
|[shaman-ai/agent-actors](https://github.com/shaman-ai/agent-actors) | 153 |
|[radi-cho/datasetGPT](https://github.com/radi-cho/datasetGPT) | 153 |
|[poe-platform/poe-protocol](https://github.com/poe-platform/poe-protocol) | 152 |
|[paolorechia/learn-langchain](https://github.com/paolorechia/learn-langchain) | 149 |
|[ajndkr/lanarky](https://github.com/ajndkr/lanarky) | 149 |
|[fengyuli-dev/multimedia-gpt](https://github.com/fengyuli-dev/multimedia-gpt) | 147 |
|[yasyf/compress-gpt](https://github.com/yasyf/compress-gpt) | 144 |
|[homanp/superagent](https://github.com/homanp/superagent) | 143 |
|[realminchoi/babyagi-ui](https://github.com/realminchoi/babyagi-ui) | 141 |
|[ethanyanjiali/minChatGPT](https://github.com/ethanyanjiali/minChatGPT) | 141 |
|[ccurme/yolopandas](https://github.com/ccurme/yolopandas) | 139 |
|[hwchase17/langchain-streamlit-template](https://github.com/hwchase17/langchain-streamlit-template) | 138 |
|[Jaseci-Labs/jaseci](https://github.com/Jaseci-Labs/jaseci) | 136 |
|[hirokidaichi/wanna](https://github.com/hirokidaichi/wanna) | 135 |
|[Haste171/langchain-chatbot](https://github.com/Haste171/langchain-chatbot) | 134 |
|[jmpaz/promptlib](https://github.com/jmpaz/promptlib) | 130 |
|[Klingefjord/chatgpt-telegram](https://github.com/Klingefjord/chatgpt-telegram) | 130 |
|[filip-michalsky/SalesGPT](https://github.com/filip-michalsky/SalesGPT) | 128 |
|[handrew/browserpilot](https://github.com/handrew/browserpilot) | 128 |
|[shauryr/S2QA](https://github.com/shauryr/S2QA) | 127 |
|[steamship-core/vercel-examples](https://github.com/steamship-core/vercel-examples) | 127 |
|[yasyf/summ](https://github.com/yasyf/summ) | 127 |
|[gia-guar/JARVIS-ChatGPT](https://github.com/gia-guar/JARVIS-ChatGPT) | 126 |
|[jerlendds/osintbuddy](https://github.com/jerlendds/osintbuddy) | 125 |
|[ibiscp/LLM-IMDB](https://github.com/ibiscp/LLM-IMDB) | 124 |
|[Teahouse-Studios/akari-bot](https://github.com/Teahouse-Studios/akari-bot) | 124 |
|[hwchase17/chroma-langchain](https://github.com/hwchase17/chroma-langchain) | 124 |
|[menloparklab/langchain-cohere-qdrant-doc-retrieval](https://github.com/menloparklab/langchain-cohere-qdrant-doc-retrieval) | 123 |
|[peterw/StoryStorm](https://github.com/peterw/StoryStorm) | 123 |
|[chakkaradeep/pyCodeAGI](https://github.com/chakkaradeep/pyCodeAGI) | 123 |
|[petehunt/langchain-github-bot](https://github.com/petehunt/langchain-github-bot) | 115 |
|[su77ungr/CASALIOY](https://github.com/su77ungr/CASALIOY) | 113 |
|[eunomia-bpf/GPTtrace](https://github.com/eunomia-bpf/GPTtrace) | 113 |
|[zenml-io/zenml-projects](https://github.com/zenml-io/zenml-projects) | 112 |
|[pablomarin/GPT-Azure-Search-Engine](https://github.com/pablomarin/GPT-Azure-Search-Engine) | 111 |
|[shamspias/customizable-gpt-chatbot](https://github.com/shamspias/customizable-gpt-chatbot) | 109 |
|[WongSaang/chatgpt-ui-server](https://github.com/WongSaang/chatgpt-ui-server) | 108 |
|[davila7/file-gpt](https://github.com/davila7/file-gpt) | 104 |
|[enhancedocs/enhancedocs](https://github.com/enhancedocs/enhancedocs) | 102 |
|[aurelio-labs/arxiv-bot](https://github.com/aurelio-labs/arxiv-bot) | 101 |
|[openai/openai-cookbook](https://github.com/openai/openai-cookbook) | 41047 |
|[LAION-AI/Open-Assistant](https://github.com/LAION-AI/Open-Assistant) | 33983 |
|[microsoft/TaskMatrix](https://github.com/microsoft/TaskMatrix) | 33375 |
|[imartinez/privateGPT](https://github.com/imartinez/privateGPT) | 31114 |
|[hpcaitech/ColossalAI](https://github.com/hpcaitech/ColossalAI) | 30369 |
|[reworkd/AgentGPT](https://github.com/reworkd/AgentGPT) | 24116 |
|[OpenBB-finance/OpenBBTerminal](https://github.com/OpenBB-finance/OpenBBTerminal) | 22565 |
|[openai/chatgpt-retrieval-plugin](https://github.com/openai/chatgpt-retrieval-plugin) | 18375 |
|[jerryjliu/llama_index](https://github.com/jerryjliu/llama_index) | 17723 |
|[mindsdb/mindsdb](https://github.com/mindsdb/mindsdb) | 16958 |
|[mlflow/mlflow](https://github.com/mlflow/mlflow) | 14632 |
|[GaiZhenbiao/ChuanhuChatGPT](https://github.com/GaiZhenbiao/ChuanhuChatGPT) | 11273 |
|[openai/evals](https://github.com/openai/evals) | 10745 |
|[databrickslabs/dolly](https://github.com/databrickslabs/dolly) | 10298 |
|[imClumsyPanda/langchain-ChatGLM](https://github.com/imClumsyPanda/langchain-ChatGLM) | 9838 |
|[logspace-ai/langflow](https://github.com/logspace-ai/langflow) | 9247 |
|[AIGC-Audio/AudioGPT](https://github.com/AIGC-Audio/AudioGPT) | 8768 |
|[PromtEngineer/localGPT](https://github.com/PromtEngineer/localGPT) | 8651 |
|[StanGirard/quivr](https://github.com/StanGirard/quivr) | 8119 |
|[go-skynet/LocalAI](https://github.com/go-skynet/LocalAI) | 7418 |
|[gventuri/pandas-ai](https://github.com/gventuri/pandas-ai) | 7301 |
|[PipedreamHQ/pipedream](https://github.com/PipedreamHQ/pipedream) | 6636 |
|[arc53/DocsGPT](https://github.com/arc53/DocsGPT) | 5849 |
|[e2b-dev/e2b](https://github.com/e2b-dev/e2b) | 5129 |
|[langgenius/dify](https://github.com/langgenius/dify) | 4804 |
|[serge-chat/serge](https://github.com/serge-chat/serge) | 4448 |
|[csunny/DB-GPT](https://github.com/csunny/DB-GPT) | 4350 |
|[wenda-LLM/wenda](https://github.com/wenda-LLM/wenda) | 4268 |
|[zauberzeug/nicegui](https://github.com/zauberzeug/nicegui) | 4244 |
|[intitni/CopilotForXcode](https://github.com/intitni/CopilotForXcode) | 4232 |
|[GreyDGL/PentestGPT](https://github.com/GreyDGL/PentestGPT) | 4154 |
|[madawei2699/myGPTReader](https://github.com/madawei2699/myGPTReader) | 4080 |
|[zilliztech/GPTCache](https://github.com/zilliztech/GPTCache) | 3949 |
|[gkamradt/langchain-tutorials](https://github.com/gkamradt/langchain-tutorials) | 3920 |
|[bentoml/OpenLLM](https://github.com/bentoml/OpenLLM) | 3481 |
|[MineDojo/Voyager](https://github.com/MineDojo/Voyager) | 3453 |
|[mmabrouk/chatgpt-wrapper](https://github.com/mmabrouk/chatgpt-wrapper) | 3355 |
|[postgresml/postgresml](https://github.com/postgresml/postgresml) | 3328 |
|[marqo-ai/marqo](https://github.com/marqo-ai/marqo) | 3100 |
|[kyegomez/tree-of-thoughts](https://github.com/kyegomez/tree-of-thoughts) | 3049 |
|[PrefectHQ/marvin](https://github.com/PrefectHQ/marvin) | 2844 |
|[project-baize/baize-chatbot](https://github.com/project-baize/baize-chatbot) | 2833 |
|[h2oai/h2ogpt](https://github.com/h2oai/h2ogpt) | 2809 |
|[hwchase17/chat-langchain](https://github.com/hwchase17/chat-langchain) | 2809 |
|[whitead/paper-qa](https://github.com/whitead/paper-qa) | 2664 |
|[Azure-Samples/azure-search-openai-demo](https://github.com/Azure-Samples/azure-search-openai-demo) | 2650 |
|[OpenGVLab/InternGPT](https://github.com/OpenGVLab/InternGPT) | 2525 |
|[GerevAI/gerev](https://github.com/GerevAI/gerev) | 2372 |
|[ParisNeo/lollms-webui](https://github.com/ParisNeo/lollms-webui) | 2287 |
|[OpenBMB/BMTools](https://github.com/OpenBMB/BMTools) | 2265 |
|[SamurAIGPT/privateGPT](https://github.com/SamurAIGPT/privateGPT) | 2084 |
|[Chainlit/chainlit](https://github.com/Chainlit/chainlit) | 1912 |
|[Farama-Foundation/PettingZoo](https://github.com/Farama-Foundation/PettingZoo) | 1869 |
|[OpenGVLab/Ask-Anything](https://github.com/OpenGVLab/Ask-Anything) | 1864 |
|[IntelligenzaArtificiale/Free-Auto-GPT](https://github.com/IntelligenzaArtificiale/Free-Auto-GPT) | 1849 |
|[Unstructured-IO/unstructured](https://github.com/Unstructured-IO/unstructured) | 1766 |
|[yanqiangmiffy/Chinese-LangChain](https://github.com/yanqiangmiffy/Chinese-LangChain) | 1745 |
|[NVIDIA/NeMo-Guardrails](https://github.com/NVIDIA/NeMo-Guardrails) | 1732 |
|[hwchase17/notion-qa](https://github.com/hwchase17/notion-qa) | 1716 |
|[paulpierre/RasaGPT](https://github.com/paulpierre/RasaGPT) | 1619 |
|[pinterest/querybook](https://github.com/pinterest/querybook) | 1468 |
|[vocodedev/vocode-python](https://github.com/vocodedev/vocode-python) | 1446 |
|[thomas-yanxin/LangChain-ChatGLM-Webui](https://github.com/thomas-yanxin/LangChain-ChatGLM-Webui) | 1430 |
|[Mintplex-Labs/anything-llm](https://github.com/Mintplex-Labs/anything-llm) | 1419 |
|[Kav-K/GPTDiscord](https://github.com/Kav-K/GPTDiscord) | 1416 |
|[lunasec-io/lunasec](https://github.com/lunasec-io/lunasec) | 1327 |
|[psychic-api/psychic](https://github.com/psychic-api/psychic) | 1307 |
|[jina-ai/thinkgpt](https://github.com/jina-ai/thinkgpt) | 1242 |
|[agiresearch/OpenAGI](https://github.com/agiresearch/OpenAGI) | 1239 |
|[ttengwang/Caption-Anything](https://github.com/ttengwang/Caption-Anything) | 1203 |
|[jina-ai/dev-gpt](https://github.com/jina-ai/dev-gpt) | 1179 |
|[keephq/keep](https://github.com/keephq/keep) | 1169 |
|[greshake/llm-security](https://github.com/greshake/llm-security) | 1156 |
|[richardyc/Chrome-GPT](https://github.com/richardyc/Chrome-GPT) | 1090 |
|[jina-ai/langchain-serve](https://github.com/jina-ai/langchain-serve) | 1088 |
|[mmz-001/knowledge_gpt](https://github.com/mmz-001/knowledge_gpt) | 1074 |
|[juncongmoo/chatllama](https://github.com/juncongmoo/chatllama) | 1057 |
|[noahshinn024/reflexion](https://github.com/noahshinn024/reflexion) | 1045 |
|[visual-openllm/visual-openllm](https://github.com/visual-openllm/visual-openllm) | 1036 |
|[101dotxyz/GPTeam](https://github.com/101dotxyz/GPTeam) | 999 |
|[poe-platform/api-bot-tutorial](https://github.com/poe-platform/api-bot-tutorial) | 989 |
|[irgolic/AutoPR](https://github.com/irgolic/AutoPR) | 974 |
|[homanp/superagent](https://github.com/homanp/superagent) | 970 |
|[microsoft/X-Decoder](https://github.com/microsoft/X-Decoder) | 941 |
|[peterw/Chat-with-Github-Repo](https://github.com/peterw/Chat-with-Github-Repo) | 896 |
|[SamurAIGPT/Camel-AutoGPT](https://github.com/SamurAIGPT/Camel-AutoGPT) | 856 |
|[cirediatpl/FigmaChain](https://github.com/cirediatpl/FigmaChain) | 840 |
|[chatarena/chatarena](https://github.com/chatarena/chatarena) | 829 |
|[rlancemartin/auto-evaluator](https://github.com/rlancemartin/auto-evaluator) | 816 |
|[seanpixel/Teenage-AGI](https://github.com/seanpixel/Teenage-AGI) | 816 |
|[hashintel/hash](https://github.com/hashintel/hash) | 806 |
|[corca-ai/EVAL](https://github.com/corca-ai/EVAL) | 790 |
|[eyurtsev/kor](https://github.com/eyurtsev/kor) | 752 |
|[cheshire-cat-ai/core](https://github.com/cheshire-cat-ai/core) | 713 |
|[e-johnstonn/BriefGPT](https://github.com/e-johnstonn/BriefGPT) | 686 |
|[run-llama/llama-lab](https://github.com/run-llama/llama-lab) | 685 |
|[refuel-ai/autolabel](https://github.com/refuel-ai/autolabel) | 673 |
|[griptape-ai/griptape](https://github.com/griptape-ai/griptape) | 617 |
|[billxbf/ReWOO](https://github.com/billxbf/ReWOO) | 616 |
|[Anil-matcha/ChatPDF](https://github.com/Anil-matcha/ChatPDF) | 609 |
|[NimbleBoxAI/ChainFury](https://github.com/NimbleBoxAI/ChainFury) | 592 |
|[getmetal/motorhead](https://github.com/getmetal/motorhead) | 581 |
|[ajndkr/lanarky](https://github.com/ajndkr/lanarky) | 574 |
|[namuan/dr-doc-search](https://github.com/namuan/dr-doc-search) | 572 |
|[kreneskyp/ix](https://github.com/kreneskyp/ix) | 564 |
|[akshata29/chatpdf](https://github.com/akshata29/chatpdf) | 540 |
|[hwchase17/chat-your-data](https://github.com/hwchase17/chat-your-data) | 540 |
|[whyiyhw/chatgpt-wechat](https://github.com/whyiyhw/chatgpt-wechat) | 537 |
|[khoj-ai/khoj](https://github.com/khoj-ai/khoj) | 531 |
|[SamurAIGPT/ChatGPT-Developer-Plugins](https://github.com/SamurAIGPT/ChatGPT-Developer-Plugins) | 528 |
|[microsoft/PodcastCopilot](https://github.com/microsoft/PodcastCopilot) | 526 |
|[ruoccofabrizio/azure-open-ai-embeddings-qna](https://github.com/ruoccofabrizio/azure-open-ai-embeddings-qna) | 515 |
|[alexanderatallah/window.ai](https://github.com/alexanderatallah/window.ai) | 494 |
|[StevenGrove/GPT4Tools](https://github.com/StevenGrove/GPT4Tools) | 483 |
|[jina-ai/agentchain](https://github.com/jina-ai/agentchain) | 472 |
|[mckaywrigley/repo-chat](https://github.com/mckaywrigley/repo-chat) | 465 |
|[yeagerai/yeagerai-agent](https://github.com/yeagerai/yeagerai-agent) | 464 |
|[langchain-ai/langchain-aiplugin](https://github.com/langchain-ai/langchain-aiplugin) | 464 |
|[mpaepper/content-chatbot](https://github.com/mpaepper/content-chatbot) | 455 |
|[michaelthwan/searchGPT](https://github.com/michaelthwan/searchGPT) | 455 |
|[freddyaboulton/gradio-tools](https://github.com/freddyaboulton/gradio-tools) | 450 |
|[amosjyng/langchain-visualizer](https://github.com/amosjyng/langchain-visualizer) | 446 |
|[msoedov/langcorn](https://github.com/msoedov/langcorn) | 445 |
|[plastic-labs/tutor-gpt](https://github.com/plastic-labs/tutor-gpt) | 426 |
|[poe-platform/poe-protocol](https://github.com/poe-platform/poe-protocol) | 426 |
|[jonra1993/fastapi-alembic-sqlmodel-async](https://github.com/jonra1993/fastapi-alembic-sqlmodel-async) | 418 |
|[langchain-ai/auto-evaluator](https://github.com/langchain-ai/auto-evaluator) | 416 |
|[steamship-core/steamship-langchain](https://github.com/steamship-core/steamship-langchain) | 401 |
|[xuwenhao/geektime-ai-course](https://github.com/xuwenhao/geektime-ai-course) | 400 |
|[continuum-llms/chatgpt-memory](https://github.com/continuum-llms/chatgpt-memory) | 386 |
|[mtenenholtz/chat-twitter](https://github.com/mtenenholtz/chat-twitter) | 382 |
|[explosion/spacy-llm](https://github.com/explosion/spacy-llm) | 368 |
|[showlab/VLog](https://github.com/showlab/VLog) | 363 |
|[yvann-hub/Robby-chatbot](https://github.com/yvann-hub/Robby-chatbot) | 363 |
|[daodao97/chatdoc](https://github.com/daodao97/chatdoc) | 361 |
|[opentensor/bittensor](https://github.com/opentensor/bittensor) | 360 |
|[alejandro-ao/langchain-ask-pdf](https://github.com/alejandro-ao/langchain-ask-pdf) | 355 |
|[logan-markewich/llama_index_starter_pack](https://github.com/logan-markewich/llama_index_starter_pack) | 351 |
|[jupyterlab/jupyter-ai](https://github.com/jupyterlab/jupyter-ai) | 348 |
|[alejandro-ao/ask-multiple-pdfs](https://github.com/alejandro-ao/ask-multiple-pdfs) | 321 |
|[andylokandy/gpt-4-search](https://github.com/andylokandy/gpt-4-search) | 314 |
|[mosaicml/examples](https://github.com/mosaicml/examples) | 313 |
|[personoids/personoids-lite](https://github.com/personoids/personoids-lite) | 306 |
|[itamargol/openai](https://github.com/itamargol/openai) | 304 |
|[Anil-matcha/Website-to-Chatbot](https://github.com/Anil-matcha/Website-to-Chatbot) | 299 |
|[momegas/megabots](https://github.com/momegas/megabots) | 299 |
|[BlackHC/llm-strategy](https://github.com/BlackHC/llm-strategy) | 289 |
|[daveebbelaar/langchain-experiments](https://github.com/daveebbelaar/langchain-experiments) | 283 |
|[wandb/weave](https://github.com/wandb/weave) | 279 |
|[Cheems-Seminar/grounded-segment-any-parts](https://github.com/Cheems-Seminar/grounded-segment-any-parts) | 273 |
|[jerlendds/osintbuddy](https://github.com/jerlendds/osintbuddy) | 271 |
|[OpenBMB/AgentVerse](https://github.com/OpenBMB/AgentVerse) | 270 |
|[MagnivOrg/prompt-layer-library](https://github.com/MagnivOrg/prompt-layer-library) | 269 |
|[sullivan-sean/chat-langchainjs](https://github.com/sullivan-sean/chat-langchainjs) | 259 |
|[Azure-Samples/openai](https://github.com/Azure-Samples/openai) | 252 |
|[bborn/howdoi.ai](https://github.com/bborn/howdoi.ai) | 248 |
|[hnawaz007/pythondataanalysis](https://github.com/hnawaz007/pythondataanalysis) | 247 |
|[conceptofmind/toolformer](https://github.com/conceptofmind/toolformer) | 243 |
|[truera/trulens](https://github.com/truera/trulens) | 239 |
|[ur-whitelab/exmol](https://github.com/ur-whitelab/exmol) | 238 |
|[intel/intel-extension-for-transformers](https://github.com/intel/intel-extension-for-transformers) | 237 |
|[monarch-initiative/ontogpt](https://github.com/monarch-initiative/ontogpt) | 236 |
|[wandb/edu](https://github.com/wandb/edu) | 231 |
|[recalign/RecAlign](https://github.com/recalign/RecAlign) | 229 |
|[alvarosevilla95/autolang](https://github.com/alvarosevilla95/autolang) | 223 |
|[kaleido-lab/dolphin](https://github.com/kaleido-lab/dolphin) | 221 |
|[JohnSnowLabs/nlptest](https://github.com/JohnSnowLabs/nlptest) | 220 |
|[paolorechia/learn-langchain](https://github.com/paolorechia/learn-langchain) | 219 |
|[Safiullah-Rahu/CSV-AI](https://github.com/Safiullah-Rahu/CSV-AI) | 215 |
|[Haste171/langchain-chatbot](https://github.com/Haste171/langchain-chatbot) | 215 |
|[steamship-packages/langchain-agent-production-starter](https://github.com/steamship-packages/langchain-agent-production-starter) | 214 |
|[airobotlab/KoChatGPT](https://github.com/airobotlab/KoChatGPT) | 213 |
|[filip-michalsky/SalesGPT](https://github.com/filip-michalsky/SalesGPT) | 211 |
|[marella/chatdocs](https://github.com/marella/chatdocs) | 207 |
|[su77ungr/CASALIOY](https://github.com/su77ungr/CASALIOY) | 200 |
|[shaman-ai/agent-actors](https://github.com/shaman-ai/agent-actors) | 195 |
|[plchld/InsightFlow](https://github.com/plchld/InsightFlow) | 189 |
|[jbrukh/gpt-jargon](https://github.com/jbrukh/gpt-jargon) | 186 |
|[hwchase17/langchain-streamlit-template](https://github.com/hwchase17/langchain-streamlit-template) | 185 |
|[huchenxucs/ChatDB](https://github.com/huchenxucs/ChatDB) | 179 |
|[benthecoder/ClassGPT](https://github.com/benthecoder/ClassGPT) | 178 |
|[hwchase17/chroma-langchain](https://github.com/hwchase17/chroma-langchain) | 178 |
|[radi-cho/datasetGPT](https://github.com/radi-cho/datasetGPT) | 177 |
|[jiran214/GPT-vup](https://github.com/jiran214/GPT-vup) | 176 |
|[rsaryev/talk-codebase](https://github.com/rsaryev/talk-codebase) | 174 |
|[edreisMD/plugnplai](https://github.com/edreisMD/plugnplai) | 174 |
|[gia-guar/JARVIS-ChatGPT](https://github.com/gia-guar/JARVIS-ChatGPT) | 172 |
|[hardbyte/qabot](https://github.com/hardbyte/qabot) | 171 |
|[shamspias/customizable-gpt-chatbot](https://github.com/shamspias/customizable-gpt-chatbot) | 165 |
|[gustavz/DataChad](https://github.com/gustavz/DataChad) | 164 |
|[yasyf/compress-gpt](https://github.com/yasyf/compress-gpt) | 163 |
|[SamPink/dev-gpt](https://github.com/SamPink/dev-gpt) | 161 |
|[yuanjie-ai/ChatLLM](https://github.com/yuanjie-ai/ChatLLM) | 161 |
|[pablomarin/GPT-Azure-Search-Engine](https://github.com/pablomarin/GPT-Azure-Search-Engine) | 160 |
|[jondurbin/airoboros](https://github.com/jondurbin/airoboros) | 157 |
|[fengyuli-dev/multimedia-gpt](https://github.com/fengyuli-dev/multimedia-gpt) | 157 |
|[PradipNichite/Youtube-Tutorials](https://github.com/PradipNichite/Youtube-Tutorials) | 156 |
|[nicknochnack/LangchainDocuments](https://github.com/nicknochnack/LangchainDocuments) | 155 |
|[ethanyanjiali/minChatGPT](https://github.com/ethanyanjiali/minChatGPT) | 155 |
|[ccurme/yolopandas](https://github.com/ccurme/yolopandas) | 154 |
|[chakkaradeep/pyCodeAGI](https://github.com/chakkaradeep/pyCodeAGI) | 153 |
|[preset-io/promptimize](https://github.com/preset-io/promptimize) | 150 |
|[onlyphantom/llm-python](https://github.com/onlyphantom/llm-python) | 148 |
|[Azure-Samples/azure-search-power-skills](https://github.com/Azure-Samples/azure-search-power-skills) | 146 |
|[realminchoi/babyagi-ui](https://github.com/realminchoi/babyagi-ui) | 144 |
|[microsoft/azure-openai-in-a-day-workshop](https://github.com/microsoft/azure-openai-in-a-day-workshop) | 144 |
|[jmpaz/promptlib](https://github.com/jmpaz/promptlib) | 143 |
|[shauryr/S2QA](https://github.com/shauryr/S2QA) | 142 |
|[handrew/browserpilot](https://github.com/handrew/browserpilot) | 141 |
|[Jaseci-Labs/jaseci](https://github.com/Jaseci-Labs/jaseci) | 140 |
|[Klingefjord/chatgpt-telegram](https://github.com/Klingefjord/chatgpt-telegram) | 140 |
|[WongSaang/chatgpt-ui-server](https://github.com/WongSaang/chatgpt-ui-server) | 139 |
|[ibiscp/LLM-IMDB](https://github.com/ibiscp/LLM-IMDB) | 139 |
|[menloparklab/langchain-cohere-qdrant-doc-retrieval](https://github.com/menloparklab/langchain-cohere-qdrant-doc-retrieval) | 138 |
|[hirokidaichi/wanna](https://github.com/hirokidaichi/wanna) | 137 |
|[steamship-core/vercel-examples](https://github.com/steamship-core/vercel-examples) | 137 |
|[deeppavlov/dream](https://github.com/deeppavlov/dream) | 136 |
|[miaoshouai/miaoshouai-assistant](https://github.com/miaoshouai/miaoshouai-assistant) | 135 |
|[sugarforever/LangChain-Tutorials](https://github.com/sugarforever/LangChain-Tutorials) | 135 |
|[yasyf/summ](https://github.com/yasyf/summ) | 135 |
|[peterw/StoryStorm](https://github.com/peterw/StoryStorm) | 134 |
|[vaibkumr/prompt-optimizer](https://github.com/vaibkumr/prompt-optimizer) | 132 |
|[ju-bezdek/langchain-decorators](https://github.com/ju-bezdek/langchain-decorators) | 130 |
|[homanp/vercel-langchain](https://github.com/homanp/vercel-langchain) | 128 |
|[Teahouse-Studios/akari-bot](https://github.com/Teahouse-Studios/akari-bot) | 127 |
|[petehunt/langchain-github-bot](https://github.com/petehunt/langchain-github-bot) | 125 |
|[eunomia-bpf/GPTtrace](https://github.com/eunomia-bpf/GPTtrace) | 122 |
|[fixie-ai/fixie-examples](https://github.com/fixie-ai/fixie-examples) | 122 |
|[Aggregate-Intellect/practical-llms](https://github.com/Aggregate-Intellect/practical-llms) | 120 |
|[davila7/file-gpt](https://github.com/davila7/file-gpt) | 120 |
|[Azure-Samples/azure-search-openai-demo-csharp](https://github.com/Azure-Samples/azure-search-openai-demo-csharp) | 119 |
|[prof-frink-lab/slangchain](https://github.com/prof-frink-lab/slangchain) | 117 |
|[aurelio-labs/arxiv-bot](https://github.com/aurelio-labs/arxiv-bot) | 117 |
|[zenml-io/zenml-projects](https://github.com/zenml-io/zenml-projects) | 116 |
|[flurb18/AgentOoba](https://github.com/flurb18/AgentOoba) | 114 |
|[kaarthik108/snowChat](https://github.com/kaarthik108/snowChat) | 112 |
|[RedisVentures/redis-openai-qna](https://github.com/RedisVentures/redis-openai-qna) | 111 |
|[solana-labs/chatgpt-plugin](https://github.com/solana-labs/chatgpt-plugin) | 111 |
|[kulltc/chatgpt-sql](https://github.com/kulltc/chatgpt-sql) | 109 |
|[summarizepaper/summarizepaper](https://github.com/summarizepaper/summarizepaper) | 109 |
|[Azure-Samples/miyagi](https://github.com/Azure-Samples/miyagi) | 106 |
|[ssheng/BentoChain](https://github.com/ssheng/BentoChain) | 106 |
|[voxel51/voxelgpt](https://github.com/voxel51/voxelgpt) | 105 |
|[mallahyari/drqa](https://github.com/mallahyari/drqa) | 103 |

View File

@@ -1,17 +1,17 @@
# Databerry
# Chaindesk
>[Databerry](https://databerry.ai) is an [open source](https://github.com/gmpetrov/databerry) document retrieval platform that helps to connect your personal data with Large Language Models.
>[Chaindesk](https://chaindesk.ai) is an [open source](https://github.com/gmpetrov/databerry) document retrieval platform that helps to connect your personal data with Large Language Models.
## Installation and Setup
We need to sign up for Databerry, create a datastore, add some data and get your datastore api endpoint url.
We need the [API Key](https://docs.databerry.ai/api-reference/authentication).
We need to sign up for Chaindesk, create a datastore, add some data and get your datastore api endpoint url.
We need the [API Key](https://docs.chaindesk.ai/api-reference/authentication).
## Retriever
See a [usage example](/docs/modules/data_connection/retrievers/integrations/databerry.html).
See a [usage example](/docs/modules/data_connection/retrievers/integrations/chaindesk.html).
```python
from langchain.retrievers import DataberryRetriever
from langchain.retrievers import ChaindeskRetriever
```

View File

@@ -0,0 +1,52 @@
# Clarifai
>[Clarifai](https://clarifai.com) is one of first deep learning platforms having been founded in 2013. Clarifai provides an AI platform with the full AI lifecycle for data exploration, data labeling, model training, evaluation and inference around images, video, text and audio data. In the LangChain ecosystem, as far as we're aware, Clarifai is the only provider that supports LLMs, embeddings and a vector store in one production scale platform, making it an excellent choice to operationalize your LangChain implementations.
## Installation and Setup
- Install the Python SDK:
```bash
pip install clarifai
```
[Sign-up](https://clarifai.com/signup) for a Clarifai account, then get a personal access token to access the Clarifai API from your [security settings](https://clarifai.com/settings/security) and set it as an environment variable (`CLARIFAI_PAT`).
## Models
Clarifai provides 1,000s of AI models for many different use cases. You can [explore them here](https://clarifai.com/explore) to find the one most suited for your use case. These models include those created by other providers such as OpenAI, Anthropic, Cohere, AI21, etc. as well as state of the art from open source such as Falcon, InstructorXL, etc. so that you build the best in AI into your products. You'll find these organized by the creator's user_id and into projects we call applications denoted by their app_id. Those IDs will be needed in additional to the model_id and optionally the version_id, so make note of all these IDs once you found the best model for your use case!
Also note that given there are many models for images, video, text and audio understanding, you can build some interested AI agents that utilize the variety of AI models as experts to understand those data types.
### LLMs
To find the selection of LLMs in the Clarifai platform you can select the text to text model type [here](https://clarifai.com/explore/models?filterData=%5B%7B%22field%22%3A%22model_type_id%22%2C%22value%22%3A%5B%22text-to-text%22%5D%7D%5D&page=1&perPage=24).
```python
from langchain.llms import Clarifai
llm = Clarifai(pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)
```
For more details, the docs on the Clarifai LLM wrapper provide a [detailed walkthrough](/docs/modules/model_io/models/llms/integrations/clarifai.html).
### Text Embedding Models
To find the selection of text embeddings models in the Clarifai platform you can select the text to embedding model type [here](https://clarifai.com/explore/models?page=1&perPage=24&filterData=%5B%7B%22field%22%3A%22model_type_id%22%2C%22value%22%3A%5B%22text-embedder%22%5D%7D%5D).
There is a Clarifai Embedding model in LangChain, which you can access with:
```python
from langchain.embeddings import ClarifaiEmbeddings
embeddings = ClarifaiEmbeddings(pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)
```
For more details, the docs on the Clarifai Embeddings wrapper provide a [detailed walthrough](/docs/modules/data_connection/text_embedding/integrations/clarifai.html).
## Vectorstore
Clarifai's vector DB was launched in 2016 and has been optimized to support live search queries. With workflows in the Clarifai platform, you data is automatically indexed by am embedding model and optionally other models as well to index that information in the DB for search. You can query the DB not only via the vectors but also filter by metadata matches, other AI predicted concepts, and even do geo-coordinate search. Simply create an application, select the appropriate base workflow for your type of data, and upload it (through the API as [documented here](https://docs.clarifai.com/api-guide/data/create-get-update-delete) or the UIs at clarifai.com).
You an also add data directly from LangChain as well, and the auto-indexing will take place for you. You'll notice this is a little different than other vectorstores where you need to provde an embedding model in their constructor and have LangChain coordinate getting the embeddings from text and writing those to the index. Not only is it more convenient, but it's much more scalable to use Clarifai's distributed cloud to do all the index in the background.
```python
from langchain.vectorstores import Clarifai
clarifai_vector_db = Clarifai.from_texts(user_id=USER_ID, app_id=APP_ID, texts=texts, pat=CLARIFAI_PAT, number_of_docs=NUMBER_OF_DOCS, metadatas = metadatas)
```
For more details, the docs on the Clarifai vector store provide a [detailed walthrough](/docs/modules/data_connection/text_embedding/integrations/clarifai.html).

View File

@@ -0,0 +1,108 @@
# CnosDB
> [CnosDB](https://github.com/cnosdb/cnosdb) is an open source distributed time series database with high performance, high compression rate and high ease of use.
## Installation and Setup
```python
pip install cnos-connector
```
## Connecting to CnosDB
You can connect to CnosDB using the SQLDatabase.from_cnosdb() method.
### Syntax
```python
def SQLDatabase.from_cnosdb(url: str = "127.0.0.1:8902",
user: str = "root",
password: str = "",
tenant: str = "cnosdb",
database: str = "public")
```
Args:
1. url (str): The HTTP connection host name and port number of the CnosDB
service, excluding "http://" or "https://", with a default value
of "127.0.0.1:8902".
2. user (str): The username used to connect to the CnosDB service, with a
default value of "root".
3. password (str): The password of the user connecting to the CnosDB service,
with a default value of "".
4. tenant (str): The name of the tenant used to connect to the CnosDB service,
with a default value of "cnosdb".
5. database (str): The name of the database in the CnosDB tenant.
## Examples
```python
# Connecting to CnosDB with SQLDatabase Wrapper
from cnosdb_connector import make_cnosdb_langchain_uri
from langchain import SQLDatabase
db = SQLDatabase.from_cnosdb()
```
```python
# Creating a OpenAI Chat LLM Wrapper
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")
```
### SQL Chain
This example demonstrates the use of the SQL Chain for answering a question over a CnosDB.
```python
from langchain import SQLDatabaseChain
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)
db_chain.run(
"What is the average fa of test table that time between November 3,2022 and November 4, 2022?"
)
```
```shell
> Entering new chain...
What is the average fa of test table that time between November 3, 2022 and November 4, 2022?
SQLQuery:SELECT AVG(fa) FROM test WHERE time >= '2022-11-03' AND time < '2022-11-04'
SQLResult: [(2.0,)]
Answer:The average fa of the test table between November 3, 2022, and November 4, 2022, is 2.0.
> Finished chain.
```
### SQL Database Agent
This example demonstrates the use of the SQL Database Agent for answering questions over a CnosDB.
```python
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
toolkit = SQLDatabaseToolkit(db=db, llm=llm)
agent = create_sql_agent(llm=llm, toolkit=toolkit, verbose=True)
```
```python
agent.run(
"What is the average fa of test table that time between November 3, 2022 and November 4, 2022?"
)
```
```shell
> Entering new chain...
Action: sql_db_list_tables
Action Input: ""
Observation: test
Thought:The relevant table is "test". I should query the schema of this table to see the column names.
Action: sql_db_schema
Action Input: "test"
Observation:
CREATE TABLE test (
time TIMESTAMP,
fa BIGINT
)
/*
3 rows from test table:
fa time
1 2022-11-03T06:20:11
2 2022-11-03T06:20:11.000000001
3 2022-11-03T06:20:11.000000002
*/
Thought:The relevant column is "fa" in the "test" table. I can now construct the query to calculate the average "fa" between the specified time range.
Action: sql_db_query
Action Input: "SELECT AVG(fa) FROM test WHERE time >= '2022-11-03' AND time < '2022-11-04'"
Observation: [(2.0,)]
Thought:The average "fa" of the "test" table between November 3, 2022 and November 4, 2022 is 2.0.
Final Answer: 2.0
> Finished chain.
```

View File

@@ -0,0 +1,51 @@
# DataForSEO
This page provides instructions on how to use the DataForSEO search APIs within LangChain.
## Installation and Setup
- Get a DataForSEO API Access login and password, and set them as environment variables (`DATAFORSEO_LOGIN` and `DATAFORSEO_PASSWORD` respectively). You can find it in your dashboard.
## Wrappers
### Utility
The DataForSEO utility wraps the API. To import this utility, use:
```python
from langchain.utilities import DataForSeoAPIWrapper
```
For a detailed walkthrough of this wrapper, see [this notebook](/docs/modules/agents/tools/integrations/dataforseo.ipynb).
### Tool
You can also load this wrapper as a Tool to use with an Agent:
```python
from langchain.agents import load_tools
tools = load_tools(["dataforseo-api-search"])
```
## Example usage
```python
dataforseo = DataForSeoAPIWrapper(api_login="your_login", api_password="your_password")
result = dataforseo.run("Bill Gates")
print(result)
```
## Environment Variables
You can store your DataForSEO API Access login and password as environment variables. The wrapper will automatically check for these environment variables if no values are provided:
```python
import os
os.environ["DATAFORSEO_LOGIN"] = "your_login"
os.environ["DATAFORSEO_PASSWORD"] = "your_password"
dataforseo = DataForSeoAPIWrapper()
result = dataforseo.run("weather in Los Angeles")
print(result)
```

View File

@@ -0,0 +1,31 @@
# Marqo
This page covers how to use the Marqo ecosystem within LangChain.
### **What is Marqo?**
Marqo is a tensor search engine that uses embeddings stored in in-memory HNSW indexes to achieve cutting edge search speeds. Marqo can scale to hundred-million document indexes with horizontal index sharding and allows for async and non-blocking data upload and search. Marqo uses the latest machine learning models from PyTorch, Huggingface, OpenAI and more. You can start with a pre-configured model or bring your own. The built in ONNX support and conversion allows for faster inference and higher throughput on both CPU and GPU.
Because Marqo include its own inference your documents can have a mix of text and images, you can bring Marqo indexes with data from your other systems into the langchain ecosystem without having to worry about your embeddings being compatible.
Deployment of Marqo is flexible, you can get started yourself with our docker image or [contact us about our managed cloud offering!](https://www.marqo.ai/pricing)
To run Marqo locally with our docker image, [see our getting started.](https://docs.marqo.ai/latest/)
## Installation and Setup
- Install the Python SDK with `pip install marqo`
## Wrappers
### VectorStore
There exists a wrapper around Marqo indexes, allowing you to use them within the vectorstore framework. Marqo lets you select from a range of models for generating embeddings and exposes some preprocessing configurations.
The Marqo vectorstore can also work with existing multimodel indexes where your documents have a mix of images and text, for more information refer to [our documentation](https://docs.marqo.ai/latest/#multi-modal-and-cross-modal-search). Note that instaniating the Marqo vectorstore with an existing multimodal index will disable the ability to add any new documents to it via the langchain vectorstore `add_texts` method.
To import this vectorstore:
```python
from langchain.vectorstores import Marqo
```
For a more detailed walkthrough of the Marqo wrapper and some of its unique features, see [this notebook](../modules/data_connection/vectorstores/integrations/marqo.ipynb)

View File

@@ -0,0 +1,56 @@
# TruLens
This page covers how to use [TruLens](https://trulens.org) to evaluate and track LLM apps built on langchain.
## What is TruLens?
TruLens is an [opensource](https://github.com/truera/trulens) package that provides instrumentation and evaluation tools for large language model (LLM) based applications.
## Quick start
Once you've created your LLM chain, you can use TruLens for evaluation and tracking. TruLens has a number of [out-of-the-box Feedback Functions](https://www.trulens.org/trulens_eval/feedback_functions/), and is also an extensible framework for LLM evaluation.
```python
# create a feedback function
from trulens_eval.feedback import Feedback, Huggingface, OpenAI
# Initialize HuggingFace-based feedback function collection class:
hugs = Huggingface()
openai = OpenAI()
# Define a language match feedback function using HuggingFace.
lang_match = Feedback(hugs.language_match).on_input_output()
# By default this will check language match on the main app input and main app
# output.
# Question/answer relevance between overall question and answer.
qa_relevance = Feedback(openai.relevance).on_input_output()
# By default this will evaluate feedback on main app input and main app output.
# Toxicity of input
toxicity = Feedback(openai.toxicity).on_input()
```
After you've set up Feedback Function(s) for evaluating your LLM, you can wrap your application with TruChain to get detailed tracing, logging and evaluation of your LLM app.
```python
# wrap your chain with TruChain
truchain = TruChain(
chain,
app_id='Chain1_ChatApplication',
feedbacks=[lang_match, qa_relevance, toxicity]
)
# Note: any `feedbacks` specified here will be evaluated and logged whenever the chain is used.
truchain("que hora es?")
```
Now you can explore your LLM-based application!
Doing so will help you understand how your LLM application is performing at a glance. As you iterate new versions of your LLM application, you can compare their performance across all of the different quality metrics you've set up. You'll also be able to view evaluations at a record level, and explore the chain metadata for each record.
```python
tru.run_dashboard() # open a Streamlit app to explore
```
For more information on TruLens, visit [trulens.org](https://www.trulens.org/)

View File

@@ -51,6 +51,10 @@ A minimal example of how to deploy LangChain to [Fly.io](https://fly.io/) using
A minimal example on how to deploy LangChain to DigitalOcean App Platform.
## [CI/CD Google Cloud Build + Dockerfile + Serverless Google Cloud Run](https://github.com/g-emarco/github-assistant)
Boilerplate LangChain project on how to deploy to Google Cloud Run using Docker with Cloud Build CI/CD pipeline
## [Google Cloud Run](https://github.com/homanp/gcp-langchain)
A minimal example on how to deploy LangChain to Google Cloud Run.

View File

@@ -7,7 +7,7 @@
"source": [
"# SQL Database Agent\n",
"\n",
"This notebook showcases an agent designed to interact with a sql databases. The agent builds off of [SQLDatabaseChain](https://langchain.readthedocs.io/en/latest/modules/chains/examples/sqlite.html) and is designed to answer more general questions about a database, as well as recover from errors.\n",
"This notebook showcases an agent designed to interact with a sql databases. The agent builds off of [SQLDatabaseChain](https://python.langchain.com/docs/modules/chains/popular/sqlite) and is designed to answer more general questions about a database, as well as recover from errors.\n",
"\n",
"Note that, as this agent is in active development, all answers might not be correct. Additionally, it is not guaranteed that the agent won't perform DML statements on your database given certain questions. Be careful running it on sensitive data!\n",
"\n",

View File

@@ -0,0 +1,226 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# DataForSeo API Wrapper\n",
"This notebook demonstrates how to use the DataForSeo API wrapper to obtain search engine results. The DataForSeo API allows users to retrieve SERP from most popular search engines like Google, Bing, Yahoo. It also allows to get SERPs from different search engine types like Maps, News, Events, etc.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain.utilities import DataForSeoAPIWrapper"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setting up the API wrapper with your credentials\n",
"You can obtain your API credentials by registering on the DataForSeo website."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.environ[\"DATAFORSEO_LOGIN\"] = \"your_api_access_username\"\n",
"os.environ[\"DATAFORSEO_PASSWORD\"] = \"your_api_access_password\"\n",
"\n",
"wrapper = DataForSeoAPIWrapper()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"The run method will return the first result snippet from one of the following elements: answer_box, knowledge_graph, featured_snippet, shopping, organic."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"wrapper.run(\"Weather in Los Angeles\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## The Difference Between `run` and `results`\n",
"`run` and `results` are two methods provided by the `DataForSeoAPIWrapper` class.\n",
"\n",
"The `run` method executes the search and returns the first result snippet from the answer box, knowledge graph, featured snippet, shopping, or organic results. These elements are sorted by priority from highest to lowest.\n",
"\n",
"The `results` method returns a JSON response configured according to the parameters set in the wrapper. This allows for more flexibility in terms of what data you want to return from the API."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Getting Results as JSON\n",
"You can customize the result types and fields you want to return in the JSON response. You can also set a maximum count for the number of top results to return."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"json_wrapper = DataForSeoAPIWrapper(\n",
" json_result_types=[\"organic\", \"knowledge_graph\", \"answer_box\"],\n",
" json_result_fields=[\"type\", \"title\", \"description\", \"text\"],\n",
" top_count=3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"json_wrapper.results(\"Bill Gates\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Customizing Location and Language\n",
"You can specify the location and language of your search results by passing additional parameters to the API wrapper."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"customized_wrapper = DataForSeoAPIWrapper(\n",
" top_count=10,\n",
" json_result_types=[\"organic\", \"local_pack\"],\n",
" json_result_fields=[\"title\", \"description\", \"type\"],\n",
" params={\"location_name\": \"Germany\", \"language_code\": \"en\"})\n",
"customized_wrapper.results(\"coffee near me\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Customizing the Search Engine\n",
"You can also specify the search engine you want to use."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"customized_wrapper = DataForSeoAPIWrapper(\n",
" top_count=10,\n",
" json_result_types=[\"organic\", \"local_pack\"],\n",
" json_result_fields=[\"title\", \"description\", \"type\"],\n",
" params={\"location_name\": \"Germany\", \"language_code\": \"en\", \"se_name\": \"bing\"})\n",
"customized_wrapper.results(\"coffee near me\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Customizing the Search Type\n",
"The API wrapper also allows you to specify the type of search you want to perform. For example, you can perform a maps search."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"maps_search = DataForSeoAPIWrapper(\n",
" top_count=10,\n",
" json_result_fields=[\"title\", \"value\", \"address\", \"rating\", \"type\"],\n",
" params={\"location_coordinate\": \"52.512,13.36,12z\", \"language_code\": \"en\", \"se_type\": \"maps\"})\n",
"maps_search.results(\"coffee near me\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Integration with Langchain Agents\n",
"You can use the `Tool` class from the `langchain.agents` module to integrate the `DataForSeoAPIWrapper` with a langchain agent. The `Tool` class encapsulates a function that the agent can call."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import Tool\n",
"search = DataForSeoAPIWrapper(\n",
" top_count=3,\n",
" json_result_types=[\"organic\"],\n",
" json_result_fields=[\"title\", \"description\", \"type\"])\n",
"tool = Tool(\n",
" name=\"google-search-answer\",\n",
" description=\"My new answer tool\",\n",
" func=search.run,\n",
")\n",
"json_tool = Tool(\n",
" name=\"google-search-json\",\n",
" description=\"My new json tool\",\n",
" func=search.results,\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,220 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Context\n",
"\n",
"![Context - Product Analytics for AI Chatbots](https://go.getcontext.ai/langchain.png)\n",
"\n",
"[Context](https://getcontext.ai/) provides product analytics for AI chatbots.\n",
"\n",
"Context helps you understand how users are interacting with your AI chat products.\n",
"Gain critical insights, optimise poor experiences, and minimise brand risks.\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"In this guide we will show you how to integrate with Context."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"tags": []
},
"source": [
"## Installation and Setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "shellscript"
}
},
"outputs": [],
"source": [
"$ pip install context-python --upgrade"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Getting API Credentials\n",
"\n",
"To get your Context API token:\n",
"\n",
"1. Go to the settings page within your Context account (https://go.getcontext.ai/settings).\n",
"2. Generate a new API Token.\n",
"3. Store this token somewhere secure."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Setup Context\n",
"\n",
"To use the `ContextCallbackHandler`, import the handler from Langchain and instantiate it with your Context API token.\n",
"\n",
"Ensure you have installed the `context-python` package before using the handler."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"from langchain.callbacks import ContextCallbackHandler\n",
"\n",
"token = os.environ[\"CONTEXT_API_TOKEN\"]\n",
"\n",
"context_callback = ContextCallbackHandler(token)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Usage\n",
"### Using the Context callback within a Chat Model\n",
"\n",
"The Context callback handler can be used to directly record transcripts between users and AI assistants.\n",
"\n",
"#### Example"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.schema import (\n",
" SystemMessage,\n",
" HumanMessage,\n",
")\n",
"from langchain.callbacks import ContextCallbackHandler\n",
"\n",
"token = os.environ[\"CONTEXT_API_TOKEN\"]\n",
"\n",
"chat = ChatOpenAI(\n",
" headers={\"user_id\": \"123\"}, temperature=0, callbacks=[ContextCallbackHandler(token)]\n",
")\n",
"\n",
"messages = [\n",
" SystemMessage(\n",
" content=\"You are a helpful assistant that translates English to French.\"\n",
" ),\n",
" HumanMessage(content=\"I love programming.\"),\n",
"]\n",
"\n",
"print(chat(messages))"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Using the Context callback within Chains\n",
"\n",
"The Context callback handler can also be used to record the inputs and outputs of chains. Note that intermediate steps of the chain are not recorded - only the starting inputs and final outputs.\n",
"\n",
"__Note:__ Ensure that you pass the same context object to the chat model and the chain.\n",
"\n",
"Wrong:\n",
"> ```python\n",
"> chat = ChatOpenAI(temperature=0.9, callbacks=[ContextCallbackHandler(token)])\n",
"> chain = LLMChain(llm=chat, prompt=chat_prompt_template, callbacks=[ContextCallbackHandler(token)])\n",
"> ```\n",
"\n",
"Correct:\n",
">```python\n",
">handler = ContextCallbackHandler(token)\n",
">chat = ChatOpenAI(temperature=0.9, callbacks=[callback])\n",
">chain = LLMChain(llm=chat, prompt=chat_prompt_template, callbacks=[callback])\n",
">```\n",
"\n",
"#### Example"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain import LLMChain\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.prompts.chat import (\n",
" ChatPromptTemplate,\n",
" HumanMessagePromptTemplate,\n",
")\n",
"from langchain.callbacks import ContextCallbackHandler\n",
"\n",
"token = os.environ[\"CONTEXT_API_TOKEN\"]\n",
"\n",
"human_message_prompt = HumanMessagePromptTemplate(\n",
" prompt=PromptTemplate(\n",
" template=\"What is a good name for a company that makes {product}?\",\n",
" input_variables=[\"product\"],\n",
" )\n",
")\n",
"chat_prompt_template = ChatPromptTemplate.from_messages([human_message_prompt])\n",
"callback = ContextCallbackHandler(token)\n",
"chat = ChatOpenAI(temperature=0.9, callbacks=[callback])\n",
"chain = LLMChain(llm=chat, prompt=chat_prompt_template, callbacks=[callback])\n",
"print(chain.run(\"colorful socks\"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
},
"vscode": {
"interpreter": {
"hash": "a53ebf4a859167383b364e7e7521d0add3c2dbbdecce4edf676e8c4634ff3fbb"
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@@ -9,7 +9,7 @@
In this guide we will demonstrate how to use `StreamlitCallbackHandler` to display the thoughts and actions of an agent in an
interactive Streamlit app. Try it out with the running app below using the [MRKL agent](/docs/modules/agents/how_to/mrkl/):
<iframe loading="lazy" src="https://mrkl-minimal.streamlit.app/?embed=true&embed_options=light_theme"
<iframe loading="lazy" src="https://langchain-mrkl.streamlit.app/?embed=true&embed_options=light_theme"
style={{ width: 100 + '%', border: 'none', marginBottom: 1 + 'rem', height: 600 }}
allow="camera;clipboard-read;clipboard-write;"
></iframe>
@@ -35,7 +35,7 @@ st_callback = StreamlitCallbackHandler(st.container())
```
Additional keyword arguments to customize the display behavior are described in the
[API reference](https://api.python.langchain.com/en/latest/modules/callbacks.html#langchain.callbacks.StreamlitCallbackHandler).
[API reference](https://api.python.langchain.com/en/latest/callbacks/langchain.callbacks.streamlit.streamlit_callback_handler.StreamlitCallbackHandler.html).
### Scenario 1: Using an Agent with Tools

View File

@@ -56,7 +56,8 @@
"source": [
"import os\n",
"\n",
"os.environ[\"SERPER_API_KEY\"] = \"\""
"os.environ[\"SERPER_API_KEY\"] = \"\"",
"os.environ[\"OPENAI_API_KEY\"] = \"\""
]
},
{
@@ -77,7 +78,7 @@
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.llms import OpenAI\n",
"from langchain.schema import Document\n",
"from typing import Any"
"from typing import Any, List"
]
},
{
@@ -96,8 +97,8 @@
"outputs": [],
"source": [
"class SerperSearchRetriever(BaseRetriever):\n",
" def __init__(self, search):\n",
" self.search = search\n",
"\n",
" search: GoogleSerperAPIWrapper = None\n",
"\n",
" def _get_relevant_documents(self, query: str, *, run_manager: CallbackManagerForRetrieverRun, **kwargs: Any) -> List[Document]:\n",
" return [Document(page_content=self.search.run(query))]\n",
@@ -111,7 +112,7 @@
" raise NotImplementedError()\n",
"\n",
"\n",
"retriever = SerperSearchRetriever(GoogleSerperAPIWrapper())"
"retriever = SerperSearchRetriever(search=GoogleSerperAPIWrapper())"
]
},
{

View File

@@ -0,0 +1,300 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "c94240f5",
"metadata": {},
"source": [
"# GraphSparqlQAChain\n",
"\n",
"Graph databases are an excellent choice for applications based on network-like models. To standardize the syntax and semantics of such graphs, the W3C recommends Semantic Web Technologies, cp. [Semantic Web](https://www.w3.org/standards/semanticweb/). [SPARQL](https://www.w3.org/TR/sparql11-query/) serves as a query language analogously to SQL or Cypher for these graphs. This notebook demonstrates the application of LLMs as a natural language interface to a graph database by generating SPARQL.\\\n",
"Disclaimer: To date, SPARQL query generation via LLMs is still a bit unstable. Be especially careful with UPDATE queries, which alter the graph."
]
},
{
"cell_type": "markdown",
"id": "dbc0ee68",
"metadata": {},
"source": [
"There are several sources you can run queries against, including files on the web, files you have available locally, SPARQL endpoints, e.g., [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page), and [triple stores](https://www.w3.org/wiki/LargeTripleStores)."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "62812aad",
"metadata": {
"pycharm": {
"is_executing": true
}
},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.chains import GraphSparqlQAChain\n",
"from langchain.graphs import RdfGraph"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "0928915d",
"metadata": {
"pycharm": {
"is_executing": true
}
},
"outputs": [],
"source": [
"graph = RdfGraph(\n",
" source_file=\"http://www.w3.org/People/Berners-Lee/card\",\n",
" standard=\"rdf\",\n",
" local_copy=\"test.ttl\",\n",
")"
]
},
{
"cell_type": "markdown",
"source": [
"Note that providing a `local_file` is necessary for storing changes locally if the source is read-only."
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"id": "58c1a8ea",
"metadata": {},
"source": [
"## Refresh graph schema information\n",
"If the schema of the database changes, you can refresh the schema information needed to generate SPARQL queries."
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "4e3de44f",
"metadata": {
"pycharm": {
"is_executing": true
}
},
"outputs": [],
"source": [
"graph.load_schema()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "1fe76ccd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"In the following, each IRI is followed by the local name and optionally its description in parentheses. \n",
"The RDF graph supports the following node types:\n",
"<http://xmlns.com/foaf/0.1/PersonalProfileDocument> (PersonalProfileDocument, None), <http://www.w3.org/ns/auth/cert#RSAPublicKey> (RSAPublicKey, None), <http://www.w3.org/2000/10/swap/pim/contact#Male> (Male, None), <http://xmlns.com/foaf/0.1/Person> (Person, None), <http://www.w3.org/2006/vcard/ns#Work> (Work, None)\n",
"The RDF graph supports the following relationships:\n",
"<http://www.w3.org/2000/01/rdf-schema#seeAlso> (seeAlso, None), <http://purl.org/dc/elements/1.1/title> (title, None), <http://xmlns.com/foaf/0.1/mbox_sha1sum> (mbox_sha1sum, None), <http://xmlns.com/foaf/0.1/maker> (maker, None), <http://www.w3.org/ns/solid/terms#oidcIssuer> (oidcIssuer, None), <http://www.w3.org/2000/10/swap/pim/contact#publicHomePage> (publicHomePage, None), <http://xmlns.com/foaf/0.1/openid> (openid, None), <http://www.w3.org/ns/pim/space#storage> (storage, None), <http://xmlns.com/foaf/0.1/name> (name, None), <http://www.w3.org/2000/10/swap/pim/contact#country> (country, None), <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> (type, None), <http://www.w3.org/ns/solid/terms#profileHighlightColor> (profileHighlightColor, None), <http://www.w3.org/ns/pim/space#preferencesFile> (preferencesFile, None), <http://www.w3.org/2000/01/rdf-schema#label> (label, None), <http://www.w3.org/ns/auth/cert#modulus> (modulus, None), <http://www.w3.org/2000/10/swap/pim/contact#participant> (participant, None), <http://www.w3.org/2000/10/swap/pim/contact#street2> (street2, None), <http://www.w3.org/2006/vcard/ns#locality> (locality, None), <http://xmlns.com/foaf/0.1/nick> (nick, None), <http://xmlns.com/foaf/0.1/homepage> (homepage, None), <http://creativecommons.org/ns#license> (license, None), <http://xmlns.com/foaf/0.1/givenname> (givenname, None), <http://www.w3.org/2006/vcard/ns#street-address> (street-address, None), <http://www.w3.org/2006/vcard/ns#postal-code> (postal-code, None), <http://www.w3.org/2000/10/swap/pim/contact#street> (street, None), <http://www.w3.org/2003/01/geo/wgs84_pos#lat> (lat, None), <http://xmlns.com/foaf/0.1/primaryTopic> (primaryTopic, None), <http://www.w3.org/2006/vcard/ns#fn> (fn, None), <http://www.w3.org/2003/01/geo/wgs84_pos#location> (location, None), <http://usefulinc.com/ns/doap#developer> (developer, None), <http://www.w3.org/2000/10/swap/pim/contact#city> (city, None), <http://www.w3.org/2006/vcard/ns#region> (region, None), <http://xmlns.com/foaf/0.1/member> (member, None), <http://www.w3.org/2003/01/geo/wgs84_pos#long> (long, None), <http://www.w3.org/2000/10/swap/pim/contact#address> (address, None), <http://xmlns.com/foaf/0.1/family_name> (family_name, None), <http://xmlns.com/foaf/0.1/account> (account, None), <http://xmlns.com/foaf/0.1/workplaceHomepage> (workplaceHomepage, None), <http://purl.org/dc/terms/title> (title, None), <http://www.w3.org/ns/solid/terms#publicTypeIndex> (publicTypeIndex, None), <http://www.w3.org/2000/10/swap/pim/contact#office> (office, None), <http://www.w3.org/2000/10/swap/pim/contact#homePage> (homePage, None), <http://xmlns.com/foaf/0.1/mbox> (mbox, None), <http://www.w3.org/2000/10/swap/pim/contact#preferredURI> (preferredURI, None), <http://www.w3.org/ns/solid/terms#profileBackgroundColor> (profileBackgroundColor, None), <http://schema.org/owns> (owns, None), <http://xmlns.com/foaf/0.1/based_near> (based_near, None), <http://www.w3.org/2006/vcard/ns#hasAddress> (hasAddress, None), <http://xmlns.com/foaf/0.1/img> (img, None), <http://www.w3.org/2000/10/swap/pim/contact#assistant> (assistant, None), <http://xmlns.com/foaf/0.1/title> (title, None), <http://www.w3.org/ns/auth/cert#key> (key, None), <http://www.w3.org/ns/ldp#inbox> (inbox, None), <http://www.w3.org/ns/solid/terms#editableProfile> (editableProfile, None), <http://www.w3.org/2000/10/swap/pim/contact#postalCode> (postalCode, None), <http://xmlns.com/foaf/0.1/weblog> (weblog, None), <http://www.w3.org/ns/auth/cert#exponent> (exponent, None), <http://rdfs.org/sioc/ns#avatar> (avatar, None)\n",
"\n"
]
}
],
"source": [
"graph.get_schema"
]
},
{
"cell_type": "markdown",
"id": "68a3c677",
"metadata": {},
"source": [
"## Querying the graph\n",
"\n",
"Now, you can use the graph SPARQL QA chain to ask questions about the graph."
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "7476ce98",
"metadata": {
"pycharm": {
"is_executing": true
}
},
"outputs": [],
"source": [
"chain = GraphSparqlQAChain.from_llm(\n",
" ChatOpenAI(temperature=0), graph=graph, verbose=True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "ef8ee27b",
"metadata": {
"pycharm": {
"is_executing": true
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001B[1m> Entering new GraphSparqlQAChain chain...\u001B[0m\n",
"Identified intent:\n",
"\u001B[32;1m\u001B[1;3mSELECT\u001B[0m\n",
"Generated SPARQL:\n",
"\u001B[32;1m\u001B[1;3mPREFIX foaf: <http://xmlns.com/foaf/0.1/>\n",
"SELECT ?homepage\n",
"WHERE {\n",
" ?person foaf:name \"Tim Berners-Lee\" .\n",
" ?person foaf:workplaceHomepage ?homepage .\n",
"}\u001B[0m\n",
"Full Context:\n",
"\u001B[32;1m\u001B[1;3m[]\u001B[0m\n",
"\n",
"\u001B[1m> Finished chain.\u001B[0m\n"
]
},
{
"data": {
"text/plain": [
"\"Tim Berners-Lee's work homepage is http://www.w3.org/People/Berners-Lee/.\""
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain.run(\"What is Tim Berners-Lee's work homepage?\")"
]
},
{
"cell_type": "markdown",
"id": "af4b3294",
"metadata": {},
"source": [
"## Updating the graph\n",
"\n",
"Analogously, you can update the graph, i.e., insert triples, using natural language."
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "fdf38841",
"metadata": {
"pycharm": {
"is_executing": true
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001B[1m> Entering new GraphSparqlQAChain chain...\u001B[0m\n",
"Identified intent:\n",
"\u001B[32;1m\u001B[1;3mUPDATE\u001B[0m\n",
"Generated SPARQL:\n",
"\u001B[32;1m\u001B[1;3mPREFIX foaf: <http://xmlns.com/foaf/0.1/>\n",
"INSERT {\n",
" ?person foaf:workplaceHomepage <http://www.w3.org/foo/bar/> .\n",
"}\n",
"WHERE {\n",
" ?person foaf:name \"Timothy Berners-Lee\" .\n",
"}\u001B[0m\n",
"\n",
"\u001B[1m> Finished chain.\u001B[0m\n"
]
},
{
"data": {
"text/plain": [
"'Successfully inserted triples into the graph.'"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain.run(\"Save that the person with the name 'Timothy Berners-Lee' has a work homepage at 'http://www.w3.org/foo/bar/'\")"
]
},
{
"cell_type": "markdown",
"id": "5e0f7fc1",
"metadata": {},
"source": [
"Let's verify the results:"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "f874171b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(rdflib.term.URIRef('https://www.w3.org/'),),\n",
" (rdflib.term.URIRef('http://www.w3.org/foo/bar/'),)]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query = (\n",
" \"\"\"PREFIX foaf: <http://xmlns.com/foaf/0.1/>\\n\"\"\"\n",
" \"\"\"SELECT ?hp\\n\"\"\"\n",
" \"\"\"WHERE {\\n\"\"\"\n",
" \"\"\" ?person foaf:name \"Timothy Berners-Lee\" . \\n\"\"\"\n",
" \"\"\" ?person foaf:workplaceHomepage ?hp .\\n\"\"\"\n",
" \"\"\"}\"\"\"\n",
")\n",
"graph.query(query)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "lc",
"language": "python",
"name": "lc"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -28,7 +28,7 @@
"\n",
"from pydantic import Extra\n",
"\n",
"from langchain.base_language import BaseLanguageModel\n",
"from langchain.schemea import BaseLanguageModel\n",
"from langchain.callbacks.manager import (\n",
" AsyncCallbackManagerForChainRun,\n",
" CallbackManagerForChainRun,\n",

View File

@@ -0,0 +1,518 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "54ccb772",
"metadata": {},
"source": [
"# Using OpenAI functions\n",
"This walkthrough demonstrates how to incorporate OpenAI function-calling API's in a chain. We'll go over: \n",
"1. How to use functions to get structured outputs from ChatOpenAI\n",
"2. How to create a generic chain that uses (multiple) functions\n",
"3. How to create a chain that actually executes the chosen function"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "767ac575",
"metadata": {},
"outputs": [],
"source": [
"from typing import Optional\n",
"\n",
"from langchain.chains.openai_functions import (\n",
" create_openai_fn_chain, create_structured_output_chain\n",
")\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate\n",
"from langchain.schema import HumanMessage, SystemMessage"
]
},
{
"cell_type": "markdown",
"id": "976b6496",
"metadata": {},
"source": [
"## Getting structured outputs\n",
"We can take advantage of OpenAI functions to try and force the model to return a particular kind of structured output. We'll use the `create_structured_output_chain` to create our chain, which takes the desired structured output either as a Pydantic class or as JsonSchema.\n",
"\n",
"See here for relevant [reference docs](https://api.python.langchain.com/en/latest/chains/langchain.chains.openai_functions.base.create_structured_output_chain.html)."
]
},
{
"cell_type": "markdown",
"id": "e052faae",
"metadata": {},
"source": [
"### Using Pydantic classes\n",
"When passing in Pydantic classes to structure our text, we need to make sure to have a docstring description for the class. It also helps to have descriptions for each of the classes attributes."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "0e085c99",
"metadata": {},
"outputs": [],
"source": [
"from pydantic import BaseModel, Field\n",
"\n",
"class Person(BaseModel):\n",
" \"\"\"Identifying information about a person.\"\"\"\n",
" name: str = Field(..., description=\"The person's name\")\n",
" age: int = Field(..., description=\"The person's age\")\n",
" fav_food: Optional[str] = Field(None, description=\"The person's favorite food\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "b459a33e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for extracting information in structured formats.\n",
"Human: Use the given format to extract information from the following input:\n",
"Human: Sally is 13\n",
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'name': 'Sally', 'age': 13}"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# If we pass in a model explicitly, we need to make sure it supports the OpenAI function-calling API.\n",
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0613\", temperature=0)\n",
"\n",
"prompt_msgs = [\n",
" SystemMessage(\n",
" content=\"You are a world class algorithm for extracting information in structured formats.\"\n",
" ),\n",
" HumanMessage(content=\"Use the given format to extract information from the following input:\"),\n",
" HumanMessagePromptTemplate.from_template(\"{input}\"),\n",
" HumanMessage(content=\"Tips: Make sure to answer in the correct format\"),\n",
" ]\n",
"prompt = ChatPromptTemplate(messages=prompt_msgs)\n",
"\n",
"chain = create_structured_output_chain(Person, llm, prompt, verbose=True)\n",
"chain.run(\"Sally is 13\")"
]
},
{
"cell_type": "markdown",
"id": "e3539936",
"metadata": {},
"source": [
"To extract arbitrarily many structured outputs of a given format, we can just create a wrapper Pydantic class that takes a sequence of the original class."
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "4d8ea815",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for extracting information in structured formats.\n",
"Human: Use the given format to extract information from the following input:\n",
"Human: Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally, so she's 23.\n",
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'people': [{'name': 'Sally', 'age': 13, 'fav_food': ''},\n",
" {'name': 'Joey', 'age': 12, 'fav_food': 'spinach'},\n",
" {'name': 'Caroline', 'age': 23, 'fav_food': ''}]}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from typing import Sequence\n",
"\n",
"class People(BaseModel):\n",
" \"\"\"Identifying information about all people in a text.\"\"\"\n",
" people: Sequence[Person] = Field(..., description=\"The people in the text\")\n",
" \n",
"chain = create_structured_output_chain(People, llm, prompt, verbose=True)\n",
"chain.run(\"Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally, so she's 23.\")"
]
},
{
"cell_type": "markdown",
"id": "ea66e10e",
"metadata": {},
"source": [
"### Using JsonSchema\n",
"\n",
"We can also pass in JsonSchema instead of Pydantic classes to specify the desired structure. When we do this, our chain will output json corresponding to the properties described in the JsonSchema, instead of a Pydantic class."
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "3484415e",
"metadata": {},
"outputs": [],
"source": [
"json_schema = {\n",
" \"title\": \"Person\",\n",
" \"description\": \"Identifying information about a person.\",\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"name\": {\n",
" \"title\": \"Name\",\n",
" \"description\": \"The person's name\",\n",
" \"type\": \"string\"\n",
" },\n",
" \"age\": {\n",
" \"title\": \"Age\",\n",
" \"description\": \"The person's age\",\n",
" \"type\": \"integer\"\n",
" },\n",
" \"fav_food\": {\n",
" \"title\": \"Fav Food\",\n",
" \"description\": \"The person's favorite food\",\n",
" \"type\": \"string\"\n",
" }\n",
" },\n",
" \"required\": [\n",
" \"name\",\n",
" \"age\"\n",
" ]\n",
"}\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "be9b76b3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for extracting information in structured formats.\n",
"Human: Use the given format to extract information from the following input:\n",
"Human: Sally is 13\n",
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'name': 'Sally', 'age': 13}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain = create_structured_output_chain(json_schema, llm, prompt, verbose=True)\n",
"chain.run(\"Sally is 13\")"
]
},
{
"cell_type": "markdown",
"id": "12394696",
"metadata": {},
"source": [
"## Creating a generic OpenAI functions chain\n",
"To create a generic OpenAI functions chain, we can use the `create_openai_fn_chain` method. This is the same as `create_structured_output_chain` except that instead of taking a single output schema, it takes a sequence of function definitions.\n",
"\n",
"Functions can be passed in as:\n",
"- dicts conforming to OpenAI functions spec,\n",
"- Pydantic classes, in which case they should have docstring descriptions of the function they represent and descriptions for each of the parameters,\n",
"- Python functions, in which case they should have docstring descriptions of the function and args, along with type hints.\n",
"\n",
"See here for relevant [reference docs](https://api.python.langchain.com/en/latest/chains/langchain.chains.openai_functions.base.create_openai_fn_chain.html)."
]
},
{
"cell_type": "markdown",
"id": "ff19be25",
"metadata": {},
"source": [
"### Using Pydantic classes"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "17f52508",
"metadata": {},
"outputs": [],
"source": [
"class RecordPerson(BaseModel):\n",
" \"\"\"Record some identifying information about a pe.\"\"\"\n",
" name: str = Field(..., description=\"The person's name\")\n",
" age: int = Field(..., description=\"The person's age\")\n",
" fav_food: Optional[str] = Field(None, description=\"The person's favorite food\")\n",
"\n",
" \n",
"class RecordDog(BaseModel):\n",
" \"\"\"Record some identifying information about a dog.\"\"\"\n",
" name: str = Field(..., description=\"The dog's name\")\n",
" color: str = Field(..., description=\"The dog's color\")\n",
" fav_food: Optional[str] = Field(None, description=\"The dog's favorite food\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "a4658ad8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities\n",
"Human: Make calls to the relevant function to record the entities in the following input:\n",
"Human: Harry was a chubby brown beagle who loved chicken\n",
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"RecordDog(name='Harry', color='brown', fav_food='chicken')"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prompt_msgs = [\n",
" SystemMessage(\n",
" content=\"You are a world class algorithm for recording entities\"\n",
" ),\n",
" HumanMessage(content=\"Make calls to the relevant function to record the entities in the following input:\"),\n",
" HumanMessagePromptTemplate.from_template(\"{input}\"),\n",
" HumanMessage(content=\"Tips: Make sure to answer in the correct format\"),\n",
"]\n",
"prompt = ChatPromptTemplate(messages=prompt_msgs)\n",
"\n",
"chain = create_openai_fn_chain([RecordPerson, RecordDog], llm, prompt, verbose=True)\n",
"chain.run(\"Harry was a chubby brown beagle who loved chicken\")"
]
},
{
"cell_type": "markdown",
"id": "df6d9147",
"metadata": {},
"source": [
"### Using Python functions\n",
"We can pass in functions as Pydantic classes, directly as OpenAI function dicts, or Python functions. To pass Python function in directly, we'll want to make sure our parameters have type hints, we have a docstring, and we use [Google Python style docstrings](https://google.github.io/styleguide/pyguide.html#doc-function-args) to describe the parameters.\n",
"\n",
"**NOTE**: To use Python functions, make sure the function arguments are of primitive types (str, float, int, bool) or that they are Pydantic objects."
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "95ac5825",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities\n",
"Human: Make calls to the relevant function to record the entities in the following input:\n",
"Human: The most important thing to remember about Tommy, my 12 year old, is that he'll do anything for apple pie.\n",
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'name': 'Tommy', 'age': 12, 'fav_food': {'food': 'apple pie'}}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"class OptionalFavFood(BaseModel):\n",
" \"\"\"Either a food or null.\"\"\"\n",
" food: Optional[str] = Field(None, description=\"Either the name of a food or null. Should be null if the food isn't known.\")\n",
"\n",
"def record_person(name: str, age: int, fav_food: OptionalFavFood) -> str:\n",
" \"\"\"Record some basic identifying information about a person.\n",
" \n",
" Args:\n",
" name: The person's name.\n",
" age: The person's age in years.\n",
" fav_food: An OptionalFavFood object that either contains the person's favorite food or a null value. Food should be null if it's not known.\n",
" \"\"\"\n",
" return f\"Recording person {name} of age {age} with favorite food {fav_food.food}!\"\n",
"\n",
" \n",
"chain = create_openai_fn_chain([record_person], llm, prompt, verbose=True)\n",
"chain.run(\"The most important thing to remember about Tommy, my 12 year old, is that he'll do anything for apple pie.\")"
]
},
{
"cell_type": "markdown",
"id": "403ea5dd",
"metadata": {},
"source": [
"If we pass in multiple Python functions or OpenAI functions, then the returned output will be of the form\n",
"```python\n",
"{\"name\": \"<<function_name>>\", \"arguments\": {<<function_arguments>>}}\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "8b0d11de",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities\n",
"Human: Make calls to the relevant function to record the entities in the following input:\n",
"Human: I can't find my dog Henry anywhere, he's a small brown beagle. Could you send a message about him?\n",
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'name': 'record_dog',\n",
" 'arguments': {'name': 'Henry', 'color': 'brown', 'fav_food': {'food': None}}}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def record_dog(name: str, color: str, fav_food: OptionalFavFood) -> str:\n",
" \"\"\"Record some basic identifying information about a dog.\n",
" \n",
" Args:\n",
" name: The dog's name.\n",
" color: The dog's color.\n",
" fav_food: An OptionalFavFood object that either contains the dog's favorite food or a null value. Food should be null if it's not known.\n",
" \"\"\"\n",
" return f\"Recording dog {name} of color {color} with favorite food {fav_food}!\"\n",
"\n",
"\n",
"chain = create_openai_fn_chain([record_person, record_dog], llm, prompt, verbose=True)\n",
"chain.run(\"I can't find my dog Henry anywhere, he's a small brown beagle. Could you send a message about him?\")"
]
},
{
"cell_type": "markdown",
"id": "5f93686b",
"metadata": {},
"source": [
"## Other Chains using OpenAI functions\n",
"\n",
"There are a number of more specific chains that use OpenAI functions.\n",
"- [Extraction](/docs/modules/chains/additional/extraction): very similar to structured output chain, intended for information/entity extraction specifically.\n",
"- [Tagging](/docs/modules/chains/additional/tagging): tag inputs.\n",
"- [OpenAPI](/docs/modules/chains/additional/openapi_openai): take an OpenAPI spec and create + execute valid requests against the API, using OpenAI functions under the hood.\n",
"- [QA with citations](/docs/modules/chains/additional/qa_citations): use OpenAI functions ability to extract citations from text."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "93425c66",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "venv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,118 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cube Semantic Layer"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebook demonstrates the process of retrieving Cube's data model metadata in a format suitable for passing to LLMs as embeddings, thereby enhancing contextual information."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### About Cube"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"[Cube](https://cube.dev/) is the Semantic Layer for building data apps. It helps data engineers and application developers access data from modern data stores, organize it into consistent definitions, and deliver it to every application."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Cubes data model provides structure and definitions that are used as a context for LLM to understand data and generate correct queries. LLM doesnt need to navigate complex joins and metrics calculations because Cube abstracts those and provides a simple interface that operates on the business-level terminology, instead of SQL table and column names. This simplification helps LLM to be less error-prone and avoid hallucinations."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Example"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"`Cube Semantic Loader` requires 2 arguments:\n",
"| Input Parameter | Description |\n",
"| --- | --- |\n",
"| `cube_api_url` | The URL of your Cube's deployment REST API. Please refer to the [Cube documentation](https://cube.dev/docs/http-api/rest#configuration-base-path) for more information on configuring the base path. |\n",
"| `cube_api_token` | The authentication token generated based on your Cube's API secret. Please refer to the [Cube documentation](https://cube.dev/docs/security#generating-json-web-tokens-jwt) for instructions on generating JSON Web Tokens (JWT). |\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import jwt\n",
"from langchain.document_loaders import CubeSemanticLoader\n",
"\n",
"api_url = \"https://api-example.gcp-us-central1.cubecloudapp.dev/cubejs-api/v1/meta\"\n",
"cubejs_api_secret = \"api-secret-here\"\n",
"security_context = {}\n",
"# Read more about security context here: https://cube.dev/docs/security\n",
"api_token = jwt.encode(security_context, cubejs_api_secret, algorithm=\"HS256\")\n",
"\n",
"loader = CubeSemanticLoader(api_url, api_token)\n",
"\n",
"documents = loader.load()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Returns:\n",
"\n",
"A list of documents with the following attributes:\n",
"\n",
"- `page_content`\n",
"- `metadata`\n",
" - `table_name`\n",
" - `column_name`\n",
" - `column_data_type`\n",
" - `column_title`\n",
" - `column_description`"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"> page_content='table name: orders_view, column name: orders_view.total_amount, column data type: number, column title: Orders View Total Amount, column description: None' metadata={'table_name': 'orders_view', 'column_name': 'orders_view.total_amount', 'column_data_type': 'number', 'column_title': 'Orders View Total Amount', 'column_description': 'None'}"
]
}
],
"metadata": {
"language_info": {
"name": "python"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -18,7 +18,7 @@
"## Creating a Pinecone index\n",
"First we'll want to create a `Pinecone` VectorStore and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n",
"\n",
"To use Pinecone, you to have `pinecone` package installed and you must have an API key and an Environment. Here are the [installation instructions](https://docs.pinecone.io/docs/quickstart).\n",
"To use Pinecone, you have to have `pinecone` package installed and you must have an API key and an Environment. Here are the [installation instructions](https://docs.pinecone.io/docs/quickstart).\n",
"\n",
"NOTE: The self-query retriever requires you to have `lark` package installed."
]

View File

@@ -5,9 +5,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# AWS Kendra\n",
"# Amazon Kendra\n",
"\n",
"> AWS Kendra is an intelligent search service provided by Amazon Web Services (AWS). It utilizes advanced natural language processing (NLP) and machine learning algorithms to enable powerful search capabilities across various data sources within an organization. Kendra is designed to help users find the information they need quickly and accurately, improving productivity and decision-making.\n",
"> Amazon Kendra is an intelligent search service provided by Amazon Web Services (AWS). It utilizes advanced natural language processing (NLP) and machine learning algorithms to enable powerful search capabilities across various data sources within an organization. Kendra is designed to help users find the information they need quickly and accurately, improving productivity and decision-making.\n",
"\n",
"> With Kendra, users can search across a wide range of content types, including documents, FAQs, knowledge bases, manuals, and websites. It supports multiple languages and can understand complex queries, synonyms, and contextual meanings to provide highly relevant search results."
]
@@ -17,7 +17,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using the AWS Kendra Index Retriever"
"## Using the Amazon Kendra Index Retriever"
]
},
{

View File

@@ -1,21 +1,31 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "9fc6205b",
"metadata": {},
"source": [
"# Databerry\n",
"# Chaindesk\n",
"\n",
">[Databerry platform](https://docs.databerry.ai/introduction) brings data from anywhere (Datsources: Text, PDF, Word, PowerPpoint, Excel, Notion, Airtable, Google Sheets, etc..) into Datastores (container of multiple Datasources).\n",
"Then your Datastores can be connected to ChatGPT via Plugins or any other Large Langue Model (LLM) via the `Databerry API`.\n",
">[Chaindesk platform](https://docs.chaindesk.ai/introduction) brings data from anywhere (Datsources: Text, PDF, Word, PowerPpoint, Excel, Notion, Airtable, Google Sheets, etc..) into Datastores (container of multiple Datasources).\n",
"Then your Datastores can be connected to ChatGPT via Plugins or any other Large Langue Model (LLM) via the `Chaindesk API`.\n",
"\n",
"This notebook shows how to use [Databerry's](https://www.databerry.ai/) retriever.\n",
"This notebook shows how to use [Chaindesk's](https://www.chaindesk.ai/) retriever.\n",
"\n",
"First, you will need to sign up for Databerry, create a datastore, add some data and get your datastore api endpoint url. You need the [API Key](https://docs.databerry.ai/api-reference/authentication)."
"First, you will need to sign up for Chaindesk, create a datastore, add some data and get your datastore api endpoint url. You need the [API Key](https://docs.chaindesk.ai/api-reference/authentication)."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3697b9fd",
"metadata": {},
"outputs": [],
"source": []
},
{
"attachments": {},
"cell_type": "markdown",
"id": "944e172b",
"metadata": {},
@@ -34,7 +44,7 @@
},
"outputs": [],
"source": [
"from langchain.retrievers import DataberryRetriever"
"from langchain.retrievers import ChaindeskRetriever"
]
},
{
@@ -46,9 +56,9 @@
},
"outputs": [],
"source": [
"retriever = DataberryRetriever(\n",
" datastore_url=\"https://clg1xg2h80000l708dymr0fxc.databerry.ai/query\",\n",
" # api_key=\"DATABERRY_API_KEY\", # optional if datastore is public\n",
"retriever = ChaindeskRetriever(\n",
" datastore_url=\"https://clg1xg2h80000l708dymr0fxc.chaindesk.ai/query\",\n",
" # api_key=\"CHAINDESK_API_KEY\", # optional if datastore is public\n",
" # top_k=10 # optional\n",
")"
]

View File

@@ -1,6 +1,7 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "fc0db1bc",
"metadata": {},
@@ -25,7 +26,7 @@
"from langchain.vectorstores import Chroma\n",
"from langchain.embeddings import HuggingFaceEmbeddings\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.document_transformers import EmbeddingsRedundantFilter\n",
"from langchain.document_transformers import EmbeddingsRedundantFilter,EmbeddingsClusteringFilter\n",
"from langchain.retrievers.document_compressors import DocumentCompressorPipeline\n",
"from langchain.retrievers import ContextualCompressionRetriever\n",
"\n",
@@ -70,6 +71,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "c152339d",
"metadata": {},
@@ -92,6 +94,46 @@
" base_compressor=pipeline, base_retriever=lotr\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "c10022fa",
"metadata": {},
"source": [
"## Pick a representative sample of documents from the merged retrievers."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3885482",
"metadata": {},
"outputs": [],
"source": [
"# This filter will divide the documents vectors into clusters or \"centers\" of meaning.\n",
"# Then it will pick the closest document to that center for the final results.\n",
"# By default the result document will be ordered/grouped by clusters.\n",
"filter_ordered_cluster = EmbeddingsClusteringFilter(\n",
" embeddings=filter_embeddings,\n",
" num_clusters=10,\n",
" num_closest=1,\n",
" )\n",
"\n",
"# If you want the final document to be ordered by the original retriever scores\n",
"# you need to add the \"sorted\" parameter.\n",
"filter_ordered_by_retriever = EmbeddingsClusteringFilter(\n",
" embeddings=filter_embeddings,\n",
" num_clusters=10,\n",
" num_closest=1,\n",
" sorted = True,\n",
" )\n",
"\n",
"pipeline = DocumentCompressorPipeline(transformers=[filter_ordered_by_retriever])\n",
"compression_retriever = ContextualCompressionRetriever(\n",
" base_compressor=pipeline, base_retriever=lotr\n",
")\n"
]
}
],
"metadata": {

View File

@@ -0,0 +1,208 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "9597802c",
"metadata": {},
"source": [
"# Clarifai\n",
"\n",
">[Clarifai](https://www.clarifai.com/) is an AI Platform that provides the full AI lifecycle ranging from data exploration, data labeling, model training, evaluation, and inference.\n",
"\n",
"This example goes over how to use LangChain to interact with `Clarifai` [models](https://clarifai.com/explore/models). Text embedding models in particular can be found [here](https://clarifai.com/explore/models?page=1&perPage=24&filterData=%5B%7B%22field%22%3A%22model_type_id%22%2C%22value%22%3A%5B%22text-embedder%22%5D%7D%5D).\n",
"\n",
"To use Clarifai, you must have an account and a Personal Access Token (PAT) key. \n",
"[Check here](https://clarifai.com/settings/security) to get or create a PAT."
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "2a773d8d",
"metadata": {},
"source": [
"# Dependencies"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "91ea14ce-831d-409a-a88f-30353acdabd1",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Install required dependencies\n",
"!pip install clarifai"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "426f1156",
"metadata": {},
"source": [
"# Imports\n",
"Here we will be setting the personal access token. You can find your PAT under [settings/security](https://clarifai.com/settings/security) in your Clarifai account."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "3f5dc9d7-65e3-4b5b-9086-3327d016cfe0",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
" ········\n"
]
}
],
"source": [
"# Please login and get your API key from https://clarifai.com/settings/security \n",
"from getpass import getpass\n",
"\n",
"CLARIFAI_PAT = getpass()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6fb585dd",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Import the required modules\n",
"from langchain.embeddings import ClarifaiEmbeddings\n",
"from langchain import PromptTemplate, LLMChain"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "16521ed2",
"metadata": {},
"source": [
"# Input\n",
"Create a prompt template to be used with the LLM Chain:"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "035dea0f",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"template = \"\"\"Question: {question}\n",
"\n",
"Answer: Let's think step by step.\"\"\"\n",
"\n",
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "c8905eac",
"metadata": {},
"source": [
"# Setup\n",
"Set the user id and app id to the application in which the model resides. You can find a list of public models on https://clarifai.com/explore/models\n",
"\n",
"You will have to also initialize the model id and if needed, the model version id. Some models have many versions, you can choose the one appropriate for your task."
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "1fe9bf15",
"metadata": {},
"outputs": [],
"source": [
"USER_ID = 'openai'\n",
"APP_ID = 'embed'\n",
"MODEL_ID = 'text-embedding-ada'\n",
"\n",
"# You can provide a specific model version as the model_version_id arg.\n",
"# MODEL_VERSION_ID = \"MODEL_VERSION_ID\""
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "3f3458d9",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Initialize a Clarifai embedding model\n",
"embeddings = ClarifaiEmbeddings(pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "a641dbd9",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"text = \"This is a test document.\""
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "32b4d5f4-2b8e-4681-856f-19a3dd141ae4",
"metadata": {},
"outputs": [],
"source": [
"query_result = embeddings.embed_query(text)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "47076457-1880-48ac-970f-872ead6f0d94",
"metadata": {},
"outputs": [],
"source": [
"doc_result = embeddings.embed_documents([text])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -9,7 +9,7 @@
"# Elasticsearch\n",
"Walkthrough of how to generate embeddings using a hosted embedding model in Elasticsearch\n",
"\n",
"The easiest way to instantiate the `ElasticsearchEmebddings` class it either\n",
"The easiest way to instantiate the `ElasticsearchEmbeddings` class it either\n",
"- using the `from_credentials` constructor if you are using Elastic Cloud\n",
"- or using the `from_es_connection` constructor with any Elasticsearch cluster"
],

View File

@@ -10,14 +10,32 @@
"\n",
">`OpenSearch` helps you develop high quality, maintenance-free, and high performance intelligent search services to provide your users with high search efficiency and accuracy.\n",
"\n",
">`OpenSearch` provides the vector search feature. In specific scenarios, especially test question search and image search scenarios, you can use the vector search feature together with the multimodal search feature to improve the accuracy of search results. This topic describes the syntax and usage notes of vector indexes.\n",
">`OpenSearch` provides the vector search feature. In specific scenarios, especially test question search and image search scenarios, you can use the vector search feature together with the multimodal search feature to improve the accuracy of search results.\n",
"\n",
"This notebook shows how to use functionality related to the `Alibaba Cloud OpenSearch Vector Search Edition`.\n",
"To run, you should have an [OpenSearch Vector Search Edition](https://opensearch.console.aliyun.com) instance up and running:\n",
"\n",
"Read the [help document](https://www.alibabacloud.com/help/en/opensearch/latest/vector-search) to quickly familiarize and configure OpenSearch Vector Search Edition instance.\n"
"Read the [help document](https://www.alibabacloud.com/help/en/opensearch/latest/vector-search) to quickly familiarize and configure OpenSearch Vector Search Edition instance."
]
},
{
"cell_type": "markdown",
"source": [
"After the instance is up and running, follow these steps to split documents, get embeddings, connect to the alibaba cloud opensearch instance, index documents, and perform vector retrieval."
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"We need to install the following Python packages first."
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 1,
@@ -29,10 +47,29 @@
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"After completing the configuration, follow these steps to connect to the instance, index documents, and perform vector retrieval."
]
"We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key."
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"import os\n",
"import getpass\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
@@ -60,7 +97,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Split documents and get embeddings by call OpenAI API"
"Split documents and get embeddings."
]
},
{
@@ -123,7 +160,7 @@
" \"id\": \"id\", # The id field name mapping of index document.\n",
" \"document\": \"document\", # The text field name mapping of index document.\n",
" \"embedding\": \"embedding\", # The embedding field name mapping of index document.\n",
" \"metadata_x\": \"metadata_x,=\", # The metadata field name mapping of index document, could specify multiple, The value field contains mapping name and operator, the operator would be used when executing metadata filter query.\n",
" \"name_of_the_metadata_specified_during_search\": \"opensearch_metadata_field_name,=\", # The metadata field name mapping of index document, could specify multiple, The value field contains mapping name and operator, the operator would be used when executing metadata filter query.\n",
" },\n",
")\n",
"\n",
@@ -139,7 +176,10 @@
"# \"id\": \"id\",\n",
"# \"document\": \"document\",\n",
"# \"embedding\": \"embedding\",\n",
"# \"metadata\": \"metadata,=\" #The value field contains mapping name and operator, the operator would be used when executing metadata filter query\n",
"# \"metadata_a\": \"metadata_a,=\" #The value field contains mapping name and operator, the operator would be used when executing metadata filter query\n",
"# \"metadata_b\": \"metadata_b,>\"\n",
"# \"metadata_c\": \"metadata_c,<\"\n",
"# \"metadata_else\": \"metadata_else,=\"\n",
"# })"
]
},
@@ -251,7 +291,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Query and retrieve data with metadata\n"
"Query and retrieve data with metadata.\n"
]
},
{
@@ -307,4 +347,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

View File

@@ -8,12 +8,12 @@
"source": [
"# Clarifai\n",
"\n",
">[Clarifai](https://www.clarifai.com/) is a AI Platform that provides the full AI lifecycle ranging from data exploration, data labeling, model building and inference. A Clarifai application can be used as a vector database after uploading inputs. \n",
">[Clarifai](https://www.clarifai.com/) is an AI Platform that provides the full AI lifecycle ranging from data exploration, data labeling, model training, evaluation, and inference. A Clarifai application can be used as a vector database after uploading inputs. \n",
"\n",
"This notebook shows how to use functionality related to the `Clarifai` vector database.\n",
"\n",
"To use Clarifai, you must have an account and a Personal Access Token key. \n",
"Here are the [installation instructions](https://clarifai.com/settings/security )."
"To use Clarifai, you must have an account and a Personal Access Token (PAT) key. \n",
"[Check here](https://clarifai.com/settings/security) to get or create a PAT."
]
},
{
@@ -58,7 +58,7 @@
"# Please login and get your API key from https://clarifai.com/settings/security \n",
"from getpass import getpass\n",
"\n",
"CLARIFAI_PAT_KEY = getpass()"
"CLARIFAI_PAT = getpass()"
]
},
{
@@ -92,7 +92,7 @@
"metadata": {},
"source": [
"# Setup\n",
"Setup the user id and app id where the text data will be uploaded. \n",
"Setup the user id and app id where the text data will be uploaded. Note: when creating that application please select an appropriate base workflow for indexing your text documents such as the Language-Understanding workflow.\n",
"\n",
"You will have to first create an account on [Clarifai](https://clarifai.com/login) and then create an application."
]
@@ -139,7 +139,7 @@
"metadata": {},
"outputs": [],
"source": [
"clarifai_vector_db = Clarifai.from_texts(user_id=USER_ID, app_id=APP_ID, texts=texts, pat=CLARIFAI_PAT_KEY, number_of_docs=NUMBER_OF_DOCS, metadatas = metadatas)"
"clarifai_vector_db = Clarifai.from_texts(user_id=USER_ID, app_id=APP_ID, texts=texts, pat=CLARIFAI_PAT, number_of_docs=NUMBER_OF_DOCS, metadatas = metadatas)"
]
},
{

View File

@@ -0,0 +1,575 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "683953b3",
"metadata": {},
"source": [
"# Marqo\n",
"\n",
"This notebook shows how to use functionality related to the Marqo vectorstore.\n",
"\n",
">[Marqo](https://www.marqo.ai/) is an open-source vector search engine. Marqo allows you to store and query multimodal data such as text and images. Marqo creates the vectors for you using a huge selection of opensource models, you can also provide your own finetuned models and Marqo will handle the loading and inference for you.\n",
"\n",
"To run this notebook with our docker image please run the following commands first to get Marqo:\n",
"\n",
"```\n",
"docker pull marqoai/marqo:latest\n",
"docker rm -f marqo\n",
"docker run --name marqo -it --privileged -p 8882:8882 --add-host host.docker.internal:host-gateway marqoai/marqo:latest\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "aac9563e",
"metadata": {},
"outputs": [],
"source": [
"!pip install marqo"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "5d1489ec",
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import Marqo\n",
"from langchain.document_loaders import TextLoader"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a3c3999a",
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import TextLoader\n",
"loader = TextLoader('../../../state_of_the_union.txt')\n",
"documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6e104aee",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index langchain-demo exists.\n"
]
}
],
"source": [
"import marqo \n",
"\n",
"# initialize marqo\n",
"marqo_url = \"http://localhost:8882\" # if using marqo cloud replace with your endpoint (console.marqo.ai)\n",
"marqo_api_key = \"\" # if using marqo cloud replace with your api key (console.marqo.ai)\n",
"\n",
"client = marqo.Client(url=marqo_url, api_key=marqo_api_key)\n",
"\n",
"index_name = \"langchain-demo\"\n",
"\n",
"docsearch = Marqo.from_documents(docs, index_name=index_name)\n",
"\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"result_docs = docsearch.similarity_search(query)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "9c608226",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n"
]
}
],
"source": [
"print(result_docs[0].page_content)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "98704b27",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n",
"0.68647254\n"
]
}
],
"source": [
"result_docs = docsearch.similarity_search_with_score(query)\n",
"print(result_docs[0][0].page_content, result_docs[0][1], sep=\"\\n\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "eb3395b6",
"metadata": {},
"source": [
"## Additional features\n",
"\n",
"One of the powerful features of Marqo as a vectorstore is that you can use indexes created externally. For example:\n",
"\n",
"+ If you had a database of image and text pairs from another application, you can simply just use it in langchain with the Marqo vectorstore. Note that bringing your own multimodal indexes will disable the `add_texts` method.\n",
"\n",
"+ If you had a database of text documents, you can bring it into the langchain framework and add more texts through `add_texts`.\n",
"\n",
"The documents that are returned are customised by passing your own function to the `page_content_builder` callback in the search methods."
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "35b99fef",
"metadata": {},
"source": [
"#### Multimodal Example"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a359ed74",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'errors': False,\n",
" 'processingTimeMs': 2090.2822139996715,\n",
" 'index_name': 'langchain-multimodal-demo',\n",
" 'items': [{'_id': 'aa92fc1c-1fb2-4d86-b027-feb507c419f7',\n",
" 'result': 'created',\n",
" 'status': 201},\n",
" {'_id': '5142c258-ef9f-4bf2-a1a6-2307280173a0',\n",
" 'result': 'created',\n",
" 'status': 201}]}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"# use a new index\n",
"index_name = \"langchain-multimodal-demo\"\n",
"\n",
"# incase the demo is re-run\n",
"try:\n",
" client.delete_index(index_name)\n",
"except Exception:\n",
" print(f\"Creating {index_name}\")\n",
" \n",
"# This index could have been created by another system\n",
"settings = {\"treat_urls_and_pointers_as_images\": True, \"model\": \"ViT-L/14\"}\n",
"client.create_index(index_name, **settings)\n",
"client.index(index_name).add_documents(\n",
" [ \n",
" # image of a bus\n",
" {\n",
" \"caption\": \"Bus\",\n",
" \"image\": \"https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image4.jpg\"\n",
" },\n",
" # image of a plane\n",
" { \n",
" \"caption\": \"Plane\", \n",
" \"image\": \"https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg\"\n",
" }\n",
" ],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "368d1fab",
"metadata": {},
"outputs": [],
"source": [
"def get_content(res):\n",
" \"\"\"Helper to format Marqo's documents into text to be used as page_content\"\"\"\n",
" return f\"{res['caption']}: {res['image']}\"\n",
"\n",
"docsearch = Marqo(client, index_name, page_content_builder=get_content)\n",
"\n",
"\n",
"query = \"vehicles that fly\"\n",
"doc_results = docsearch.similarity_search(query)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "eef4edf9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Plane: https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg\n",
"Bus: https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image4.jpg\n"
]
}
],
"source": [
"for doc in doc_results:\n",
" print(doc.page_content)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "c255f603",
"metadata": {},
"source": [
"#### Text only example"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "9e9a2b20",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'errors': False,\n",
" 'processingTimeMs': 139.2144540004665,\n",
" 'index_name': 'langchain-byo-index-demo',\n",
" 'items': [{'_id': '27c05a1c-b8a9-49a5-ae73-fbf1eb51dc3f',\n",
" 'result': 'created',\n",
" 'status': 201},\n",
" {'_id': '6889afe0-e600-43c1-aa3b-1d91bf6db274',\n",
" 'result': 'created',\n",
" 'status': 201}]}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"# use a new index\n",
"index_name = \"langchain-byo-index-demo\"\n",
"\n",
"# incase the demo is re-run\n",
"try:\n",
" client.delete_index(index_name)\n",
"except Exception:\n",
" print(f\"Creating {index_name}\")\n",
"\n",
"# This index could have been created by another system\n",
"client.create_index(index_name)\n",
"client.index(index_name).add_documents(\n",
" [ \n",
" {\n",
" \"Title\": \"Smartphone\",\n",
" \"Description\": \"A smartphone is a portable computer device that combines mobile telephone \"\n",
" \"functions and computing functions into one unit.\",\n",
" },\n",
" { \n",
" \"Title\": \"Telephone\",\n",
" \"Description\": \"A telephone is a telecommunications device that permits two or more users to\"\n",
" \"conduct a conversation when they are too far apart to be easily heard directly.\",\n",
" }\n",
" ],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "b2943ea9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['9986cc72-adcd-4080-9d74-265c173a9ec3']"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Note text indexes retain the ability to use add_texts despite different field names in documents\n",
"# this is because the page_content_builder callback lets you handle these document fields as required\n",
"\n",
"def get_content(res):\n",
" \"\"\"Helper to format Marqo's documents into text to be used as page_content\"\"\"\n",
" if 'text' in res:\n",
" return res['text']\n",
" return res['Description']\n",
"\n",
"\n",
"docsearch = Marqo(client, index_name, page_content_builder=get_content)\n",
"\n",
"docsearch.add_texts([\"This is a document that is about elephants\"])\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "851450e9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"A smartphone is a portable computer device that combines mobile telephone functions and computing functions into one unit.\n"
]
}
],
"source": [
"query = \"modern communications devices\"\n",
"doc_results = docsearch.similarity_search(query)\n",
"\n",
"print(doc_results[0].page_content)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "9a438fec",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"This is a document that is about elephants\n"
]
}
],
"source": [
"query = \"elephants\"\n",
"doc_results = docsearch.similarity_search(query, page_content_builder=get_content)\n",
"\n",
"print(doc_results[0].page_content)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "0d04c9d4",
"metadata": {},
"source": [
"## Weighted Queries\n",
"\n",
"We also expose marqos weighted queries which are a powerful way to compose complex semantic searches."
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "d42ba0d6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"A smartphone is a portable computer device that combines mobile telephone functions and computing functions into one unit.\n"
]
}
],
"source": [
"query = {\"communications devices\" : 1.0}\n",
"doc_results = docsearch.similarity_search(query)\n",
"print(doc_results[0].page_content)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "b5918a16",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"A telephone is a telecommunications device that permits two or more users toconduct a conversation when they are too far apart to be easily heard directly.\n"
]
}
],
"source": [
"query = {\"communications devices\" : 1.0, \"technology post 2000\": -1.0}\n",
"doc_results = docsearch.similarity_search(query)\n",
"print(doc_results[0].page_content)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "2d026aa0",
"metadata": {},
"source": [
"# Question Answering with Sources\n",
"\n",
"This section shows how to use Marqo as part of a `RetrievalQAWithSourcesChain`. Marqo will perform the searches for information in the sources."
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "e4ca223c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"OpenAI API Key:········\n"
]
}
],
"source": [
"from langchain.chains import RetrievalQAWithSourcesChain\n",
"from langchain import OpenAI\n",
"\n",
"import os\n",
"import getpass\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "5c6e45f9",
"metadata": {},
"outputs": [],
"source": [
"with open(\"../../../state_of_the_union.txt\") as f:\n",
" state_of_the_union = f.read()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_text(state_of_the_union)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "70a7f320",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index langchain-qa-with-retrieval exists.\n"
]
}
],
"source": [
"index_name = \"langchain-qa-with-retrieval\"\n",
"docsearch = Marqo.from_documents(docs, index_name=index_name)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "b3b008a4",
"metadata": {},
"outputs": [],
"source": [
"chain = RetrievalQAWithSourcesChain.from_chain_type(\n",
" OpenAI(temperature=0), chain_type=\"stuff\", retriever=docsearch.as_retriever()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "e1457716",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'answer': ' The president honored Justice Breyer, thanking him for his service and noting that he is a retiring Justice of the United States Supreme Court.\\n',\n",
" 'sources': '../../../state_of_the_union.txt'}"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain(\n",
" {\"question\": \"What did the president say about Justice Breyer\"},\n",
" return_only_outputs=True,\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -1,6 +1,7 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "683953b3",
"metadata": {},
@@ -44,6 +45,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "457ace44-1d95-4001-9dd5-78811ab208ad",
"metadata": {},
@@ -63,6 +65,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "1f3ecc42",
"metadata": {},
@@ -130,7 +133,7 @@
"# initialize MongoDB python client\n",
"client = MongoClient(MONGODB_ATLAS_CLUSTER_URI)\n",
"\n",
"db_name = \"lanchain_db\"\n",
"db_name = \"langchain_db\"\n",
"collection_name = \"langchain_col\"\n",
"collection = client[db_name][collection_name]\n",
"index_name = \"langchain_demo\"\n",
@@ -156,6 +159,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "851a2ec9-9390-49a4-8412-3e132c9f789d",
"metadata": {},
@@ -183,14 +187,14 @@
"db_name = \"langchain_db\"\n",
"collection_name = \"langchain_col\"\n",
"collection = client[db_name][collection_name]\n",
"index_name = \"langchain_index\"\n",
"index_name = \"langchain_demo\"\n",
"\n",
"# initialize vector store\n",
"vectorStore = MongoDBAtlasVectorSearch(\n",
" collection, OpenAIEmbeddings(), index_name=index_name\n",
")\n",
"\n",
"# perform a similarity search between the embedding of the query and the embeddings of the documents\n",
"# perform a similarity search between a query and the ingested documents\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = vectorStore.similarity_search(query)\n",
"\n",

View File

@@ -0,0 +1,338 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "1292f057",
"metadata": {},
"source": [
"# pg_hnsw\n",
"\n",
"> [pg_embedding](https://github.com/knizhnik/hnsw) is an open-source vector similarity search for `Postgres` that uses Hierarchical Navigable Small Worlds for approximate nearest neighbor search.\n",
"\n",
"It supports:\n",
"- exact and approximate nearest neighbor search using HNSW\n",
"- L2 distance\n",
"\n",
"This notebook shows how to use the Postgres vector database (`PGEmbedding`).\n",
"\n",
"> The PGEmbedding integration creates the pg_embedding extension for you, but you run the following Postgres query to add it:\n",
"```sql\n",
"CREATE EXTENSION embedding;\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a6214221",
"metadata": {},
"outputs": [],
"source": [
"# Pip install necessary package\n",
"!pip install openai\n",
"!pip install psycopg2-binary\n",
"!pip install tiktoken"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "b2e49694",
"metadata": {},
"source": [
"Add the OpenAI API Key to the environment variables to use `OpenAIEmbeddings`."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "1dcc8d99",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"OpenAI API Key:········\n"
]
}
],
"source": [
"import os\n",
"import getpass\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "9719ea68",
"metadata": {},
"outputs": [],
"source": [
"## Loading Environment Variables\n",
"from typing import List, Tuple"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dfd1f38d",
"metadata": {},
"outputs": [],
"source": [
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import PGEmbedding\n",
"from langchain.document_loaders import TextLoader\n",
"from langchain.docstore.document import Document"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "8fab8cc2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Database Url:········\n"
]
}
],
"source": [
"os.environ[\"DATABASE_URL\"] = getpass.getpass(\"Database Url:\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "bef17115",
"metadata": {},
"outputs": [],
"source": [
"loader = TextLoader(\"state_of_the_union.txt\")\n",
"documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n",
"\n",
"embeddings = OpenAIEmbeddings()\n",
"connection_string = os.environ.get(\"DATABASE_URL\")\n",
"collection_name = \"state_of_the_union\""
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "743abfaa",
"metadata": {},
"outputs": [],
"source": [
"db = PGEmbedding.from_documents(\n",
" embedding=embeddings,\n",
" documents=docs,\n",
" collection_name=collection_name,\n",
" connection_string=connection_string,\n",
")\n",
"\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs_with_score: List[Tuple[Document, float]] = db.similarity_search_with_score(query)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "41ce4c4e",
"metadata": {},
"outputs": [],
"source": [
"for doc, score in docs_with_score:\n",
" print(\"-\" * 80)\n",
" print(\"Score: \", score)\n",
" print(doc.page_content)\n",
" print(\"-\" * 80)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "7ef7b052",
"metadata": {},
"source": [
"## Working with vectorstore in Postgres"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "939151f7",
"metadata": {},
"source": [
"### Uploading a vectorstore in PG "
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "595ac511",
"metadata": {},
"outputs": [],
"source": [
"db = PGEmbedding.from_documents(\n",
" embedding=embeddings,\n",
" documents=docs,\n",
" collection_name=collection_name,\n",
" connection_string=connection_string,\n",
" pre_delete_collection=False,\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "f9510e6b",
"metadata": {},
"source": [
"### Create HNSW Index\n",
"By default, the extension performs a sequential scan search, with 100% recall. You might consider creating an HNSW index for approximate nearest neighbor (ANN) search to speed up `similarity_search_with_score` execution time. To create the HNSW index on your vector column, use a `create_hnsw_index` function:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2d1981fa",
"metadata": {},
"outputs": [],
"source": [
"PGEmbedding.create_hnsw_index(\n",
" max_elements=10000, dims=1536, m=8, ef_construction=16, ef_search=16\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "7adacf29",
"metadata": {},
"source": [
"The function above is equivalent to running the below SQL query:\n",
"```sql\n",
"CREATE INDEX ON vectors USING hnsw(vec) WITH (maxelements=10000, dims=1536, m=3, efconstruction=16, efsearch=16);\n",
"```\n",
"The HNSW index options used in the statement above include:\n",
"\n",
"- maxelements: Defines the maximum number of elements indexed. This is a required parameter. The example shown above has a value of 3. A real-world example would have a much large value, such as 1000000. An \"element\" refers to a data point (a vector) in the dataset, which is represented as a node in the HNSW graph. Typically, you would set this option to a value able to accommodate the number of rows in your in your dataset.\n",
"- dims: Defines the number of dimensions in your vector data. This is a required parameter. A small value is used in the example above. If you are storing data generated using OpenAI's text-embedding-ada-002 model, which supports 1536 dimensions, you would define a value of 1536, for example.\n",
"- m: Defines the maximum number of bi-directional links (also referred to as \"edges\") created for each node during graph construction.\n",
"The following additional index options are supported:\n",
"\n",
"- efConstruction: Defines the number of nearest neighbors considered during index construction. The default value is 32.\n",
"- efsearch: Defines the number of nearest neighbors considered during index search. The default value is 32.\n",
"For information about how you can configure these options to influence the HNSW algorithm, refer to [Tuning the HNSW algorithm](https://neon-next-git-dprice-hnsw-extension-neondatabase.vercel.app/docs/extensions/hnsw#tuning-the-hnsw-algorithm)."
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "528893fb",
"metadata": {},
"source": [
"### Retrieving a vectorstore in PG"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "b6162b1c",
"metadata": {},
"outputs": [],
"source": [
"store = PGEmbedding(\n",
" connection_string=connection_string,\n",
" embedding_function=embeddings,\n",
" collection_name=collection_name,\n",
")\n",
"\n",
"retriever = store.as_retriever()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "1a5fedb1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"VectorStoreRetriever(vectorstore=<langchain.vectorstores.pghnsw.HNSWVectoreStore object at 0x121d3c8b0>, search_type='similarity', search_kwargs={})"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retriever"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "0cefc938",
"metadata": {},
"outputs": [],
"source": [
"db1 = PGEmbedding.from_existing_index(\n",
" embedding=embeddings,\n",
" collection_name=collection_name,\n",
" pre_delete_collection=False,\n",
" connection_string=connection_string,\n",
")\n",
"\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs_with_score: List[Tuple[Document, float]] = db1.similarity_search_with_score(query)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "85cde495",
"metadata": {},
"outputs": [],
"source": [
"for doc, score in docs_with_score:\n",
" print(\"-\" * 80)\n",
" print(\"Score: \", score)\n",
" print(doc.page_content)\n",
" print(\"-\" * 80)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -123,7 +123,7 @@
},
{
"cell_type": "code",
"execution_count": 62,
"execution_count": 1,
"metadata": {
"tags": []
},
@@ -138,7 +138,7 @@
},
{
"cell_type": "code",
"execution_count": 63,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -152,49 +152,25 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"## PGVector needs the connection string to the database.\n",
"## We will load it from the environment variables.\n",
"import os\n",
"# PGVector needs the connection string to the database.\n",
"CONNECTION_STRING = \"postgresql+psycopg2://harrisonchase@localhost:5432/test3\"\n",
"\n",
"CONNECTION_STRING = PGVector.connection_string_from_db_params(\n",
" driver=os.environ.get(\"PGVECTOR_DRIVER\", \"psycopg2\"),\n",
" host=os.environ.get(\"PGVECTOR_HOST\", \"localhost\"),\n",
" port=int(os.environ.get(\"PGVECTOR_PORT\", \"5432\")),\n",
" database=os.environ.get(\"PGVECTOR_DATABASE\", \"postgres\"),\n",
" user=os.environ.get(\"PGVECTOR_USER\", \"postgres\"),\n",
" password=os.environ.get(\"PGVECTOR_PASSWORD\", \"postgres\"),\n",
")\n",
"\n",
"\n",
"## Example\n",
"# postgresql+psycopg2://username:password@localhost:5432/database_name"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
"# ## PGVector needs the connection string to the database.\n",
"# ## We will load it from the environment variables.\n",
"# # Alternatively, you can create it from enviornment variables.\n",
"# import os\n",
"\n",
"# CONNECTION_STRING = PGVector.connection_string_from_db_params(\n",
"# driver=os.environ.get(\"PGVECTOR_DRIVER\", \"psycopg2\"),\n",
"# host=os.environ.get(\"PGVECTOR_HOST\", \"localhost\"),\n",
"# port=int(os.environ.get(\"PGVECTOR_PORT\", \"5432\")),\n",
"# database=os.environ.get(\"PGVECTOR_DATABASE\", \"rd-embeddings\"),\n",
"# user=os.environ.get(\"PGVECTOR_USER\", \"admin\"),\n",
"# password=os.environ.get(\"PGVECTOR_PASSWORD\", \"password\"),\n",
"# database=os.environ.get(\"PGVECTOR_DATABASE\", \"postgres\"),\n",
"# user=os.environ.get(\"PGVECTOR_USER\", \"postgres\"),\n",
"# password=os.environ.get(\"PGVECTOR_PASSWORD\", \"postgres\"),\n",
"# )\n",
"\n",
"\n",
"# ## Example\n",
"# # postgresql+psycopg2://username:password@localhost:5432/database_name"
"\n"
]
},
{
@@ -206,27 +182,36 @@
},
{
"cell_type": "code",
"execution_count": 69,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"# The PGVector Module will try to create a table with the name of the collection. So, make sure that the collection name is unique and the user has the\n",
"# permission to create a table.\n",
"# The PGVector Module will try to create a table with the name of the collection. \n",
"# So, make sure that the collection name is unique and the user has the permission to create a table.\n",
"\n",
"COLLECTION_NAME = \"state_of_the_union_test\"\n",
"\n",
"db = PGVector.from_documents(\n",
" embedding=embeddings,\n",
" documents=docs,\n",
" collection_name=\"state_of_the_union\",\n",
" collection_name=COLLECTION_NAME,\n",
" connection_string=CONNECTION_STRING,\n",
")\n",
"\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs_with_score: List[Tuple[Document, float]] = db.similarity_search_with_score(query)"
")"
]
},
{
"cell_type": "code",
"execution_count": 70,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs_with_score = db.similarity_search_with_score(query)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
@@ -234,7 +219,7 @@
"output_type": "stream",
"text": [
"--------------------------------------------------------------------------------\n",
"Score: 0.6076804864602984\n",
"Score: 0.18460171628856903\n",
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
@@ -244,7 +229,7 @@
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n",
"--------------------------------------------------------------------------------\n",
"--------------------------------------------------------------------------------\n",
"Score: 0.6076804864602984\n",
"Score: 0.18460171628856903\n",
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
@@ -254,21 +239,17 @@
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n",
"--------------------------------------------------------------------------------\n",
"--------------------------------------------------------------------------------\n",
"Score: 0.659062774389974\n",
"A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n",
"Score: 0.18470284560586236\n",
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n",
"We can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n",
"Weve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n",
"\n",
"Were putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n",
"\n",
"Were securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n",
"--------------------------------------------------------------------------------\n",
"--------------------------------------------------------------------------------\n",
"Score: 0.659062774389974\n",
"Score: 0.21730864082247825\n",
"A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n",
"\n",
"And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n",
@@ -296,183 +277,189 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Working with vectorstore"
"## Working with vectorstore\n",
"\n",
"Above, we created a vectorstore from scratch. However, often times we want to work with an existing vectorstore.\n",
"In order to do that, we can initialize it directly."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"store = PGVector(\n",
" collection_name=COLLECTION_NAME,\n",
" connection_string=CONNECTION_STRING,\n",
" embedding_function=embeddings,\n",
")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Uploading a vectorstore"
"### Add documents\n",
"We can add documents to the existing vectorstore."
]
},
{
"cell_type": "code",
"execution_count": 55,
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['048c2e14-1cf3-11ee-8777-e65801318980']"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"store.add_documents([Document(page_content=\"foo\")])"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"docs_with_score = db.similarity_search_with_score(\"foo\")"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(Document(page_content='foo', metadata={}), 3.3203430005457335e-09)"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"docs_with_score[0]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt'}),\n",
" 0.2404395365581814)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"docs_with_score[1]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Overriding a vectorstore\n",
"\n",
"If you have an existing collection, you override it by doing `from_documents` and setting `pre_delete_collection` = True"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"data = docs\n",
"api_key = os.environ[\"OPENAI_API_KEY\"]\n",
"db = PGVector.from_documents(\n",
" documents=docs,\n",
" embedding=embeddings,\n",
" collection_name=collection_name,\n",
" connection_string=connection_string,\n",
" distance_strategy=DistanceStrategy.COSINE,\n",
" openai_api_key=api_key,\n",
" pre_delete_collection=False,\n",
" collection_name=COLLECTION_NAME,\n",
" connection_string=CONNECTION_STRING,\n",
" pre_delete_collection=True,\n",
")"
]
},
{
"cell_type": "markdown",
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"### Retrieving a vectorstore"
"docs_with_score = db.similarity_search_with_score(\"foo\")"
]
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt'}),\n",
" 0.2404115088144465)"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"docs_with_score[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Using a VectorStore as a Retriever"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"connection_string = CONNECTION_STRING\n",
"embedding = embeddings\n",
"collection_name = \"state_of_the_union\"\n",
"from langchain.vectorstores.pgvector import DistanceStrategy\n",
"\n",
"store = PGVector(\n",
" connection_string=connection_string,\n",
" embedding_function=embedding,\n",
" collection_name=collection_name,\n",
" distance_strategy=DistanceStrategy.COSINE,\n",
")\n",
"\n",
"retriever = store.as_retriever()"
]
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"vectorstore=<langchain.vectorstores.pgvector.PGVector object at 0x7fe9a1b1c670> search_type='similarity' search_kwargs={}\n"
"tags=None metadata=None vectorstore=<langchain.vectorstores.pgvector.PGVector object at 0x29f94f880> search_type='similarity' search_kwargs={}\n"
]
}
],
"source": [
"print(retriever)"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[(Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'}), 0.6075870262188066), (Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'}), 0.6075870262188066), (Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt'}), 0.6589478388546668), (Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt'}), 0.6589478388546668)]\n"
]
}
],
"source": [
"# When we have an existing PG VEctor\n",
"DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.EUCLIDEAN\n",
"db1 = PGVector.from_existing_index(\n",
" embedding=embeddings,\n",
" collection_name=\"state_of_the_union\",\n",
" distance_strategy=DEFAULT_DISTANCE_STRATEGY,\n",
" pre_delete_collection=False,\n",
" connection_string=CONNECTION_STRING,\n",
")\n",
"\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs_with_score: List[Tuple[Document, float]] = db1.similarity_search_with_score(query)\n",
"print(docs_with_score)"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--------------------------------------------------------------------------------\n",
"Score: 0.6075870262188066\n",
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n",
"--------------------------------------------------------------------------------\n",
"--------------------------------------------------------------------------------\n",
"Score: 0.6075870262188066\n",
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n",
"--------------------------------------------------------------------------------\n",
"--------------------------------------------------------------------------------\n",
"Score: 0.6589478388546668\n",
"A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n",
"\n",
"And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n",
"\n",
"We can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \n",
"\n",
"Weve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n",
"\n",
"Were putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n",
"\n",
"Were securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n",
"--------------------------------------------------------------------------------\n",
"--------------------------------------------------------------------------------\n",
"Score: 0.6589478388546668\n",
"A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n",
"\n",
"And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n",
"\n",
"We can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \n",
"\n",
"Weve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n",
"\n",
"Were putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n",
"\n",
"Were securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n",
"--------------------------------------------------------------------------------\n"
]
}
],
"source": [
"for doc, score in docs_with_score:\n",
" print(\"-\" * 80)\n",
" print(\"Score: \", score)\n",
" print(doc.page_content)\n",
" print(\"-\" * 80)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@@ -491,7 +478,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.9.1"
}
},
"nbformat": 4,

View File

@@ -1,6 +1,7 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "683953b3",
"metadata": {},
@@ -51,6 +52,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "320af802-9271-46ee-948f-d2453933d44b",
"metadata": {},
@@ -136,6 +138,30 @@
"print(docs[0].page_content)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "86a4b96b",
"metadata": {},
"source": [
"### Adding More Text to an Existing Index\n",
"\n",
"More text can embedded and upserted to an existing Pinecone index using the `add_texts` function\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "38a7a60e",
"metadata": {},
"outputs": [],
"source": [
"index = pinecone.Index(\"langchain-demo\")\n",
"vectorstore = Pinecone(index, embeddings.embed_query, \"text\")\n",
"\n",
"vectorstore.add_texts(\"More text!\")"
]
},
{
"attachments": {},
"cell_type": "markdown",

View File

@@ -43,7 +43,7 @@
"\n",
" CREATE FUNCTION match_documents(query_embedding vector(1536), match_count int)\n",
" RETURNS TABLE(\n",
" id bigint,\n",
" id uuid,\n",
" content text,\n",
" metadata jsonb,\n",
" -- we return matched vectors to enable maximal marginal relevance searches\n",

View File

@@ -0,0 +1,212 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Human input Chat Model\n",
"\n",
"Along with HumanInputLLM, LangChain also provides a pseudo Chat Model class that can be used for testing, debugging, or educational purposes. This allows you to mock out calls to the Chat Model and simulate how a human would respond if they received the messages.\n",
"\n",
"In this notebook, we go over how to use this.\n",
"\n",
"We start this with using the HumanInputChatModel in an agent."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from langchain.chat_models.human import HumanInputChatModel"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Since we will use the `WikipediaQueryRun` tool in this notebook, you might need to install the `wikipedia` package if you haven't done so already."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/Users/mskim58/dev/research/chatbot/github/langchain/.venv/bin/python: No module named pip\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install wikipedia"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import load_tools\n",
"from langchain.agents import initialize_agent\n",
"from langchain.agents import AgentType"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"tools = load_tools([\"wikipedia\"])\n",
"llm = HumanInputChatModel()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"agent = initialize_agent(\n",
" tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new chain...\u001b[0m\n",
"\n",
" ======= start of message ======= \n",
"\n",
"\n",
"type: system\n",
"data:\n",
" content: \"Answer the following questions as best you can. You have access to the following tools:\\n\\nWikipedia: A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.\\n\\nThe way you use the tools is by specifying a json blob.\\nSpecifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\\n\\nThe only values that should be in the \\\"action\\\" field are: Wikipedia\\n\\nThe $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:\\n\\n```\\n{\\n \\\"action\\\": $TOOL_NAME,\\n \\\"action_input\\\": $INPUT\\n}\\n```\\n\\nALWAYS use the following format:\\n\\nQuestion: the input question you must answer\\nThought: you should always think about what to do\\nAction:\\n```\\n$JSON_BLOB\\n```\\nObservation: the result of the action\\n... (this Thought/Action/Observation can repeat N times)\\nThought: I now know the final answer\\nFinal Answer: the final answer to the original input question\\n\\nBegin! Reminder to always use the exact characters `Final Answer` when responding.\"\n",
" additional_kwargs: {}\n",
"\n",
"======= end of message ======= \n",
"\n",
"\n",
"\n",
" ======= start of message ======= \n",
"\n",
"\n",
"type: human\n",
"data:\n",
" content: 'What is Bocchi the Rock?\n",
"\n",
"\n",
" '\n",
" additional_kwargs: {}\n",
" example: false\n",
"\n",
"======= end of message ======= \n",
"\n",
"\n",
"\u001b[32;1m\u001b[1;3mAction:\n",
"```\n",
"{\n",
" \"action\": \"Wikipedia\",\n",
" \"action_input\": \"What is Bocchi the Rock?\"\n",
"}\n",
"```\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3mPage: Bocchi the Rock!\n",
"Summary: Bocchi the Rock! (ぼっち・ざ・ろっく!, Botchi Za Rokku!) is a Japanese four-panel manga series written and illustrated by Aki Hamaji. It has been serialized in Houbunsha's seinen manga magazine Manga Time Kirara Max since December 2017. Its chapters have been collected in five tankōbon volumes as of November 2022.\n",
"An anime television series adaptation produced by CloverWorks aired from October to December 2022. The series has been praised for its writing, comedy, characters, and depiction of social anxiety, with the anime's visual creativity receiving acclaim.\n",
"\n",
"Page: Hitori Bocchi no Marumaru Seikatsu\n",
"Summary: Hitori Bocchi no Marumaru Seikatsu (Japanese: ひとりぼっちの○○生活, lit. \"Bocchi Hitori's ____ Life\" or \"The ____ Life of Being Alone\") is a Japanese yonkoma manga series written and illustrated by Katsuwo. It was serialized in ASCII Media Works' Comic Dengeki Daioh \"g\" magazine from September 2013 to April 2021. Eight tankōbon volumes have been released. An anime television series adaptation by C2C aired from April to June 2019.\n",
"\n",
"Page: Kessoku Band (album)\n",
"Summary: Kessoku Band (Japanese: 結束バンド, Hepburn: Kessoku Bando) is the debut studio album by Kessoku Band, a fictional musical group from the anime television series Bocchi the Rock!, released digitally on December 25, 2022, and physically on CD on December 28 by Aniplex. Featuring vocals from voice actresses Yoshino Aoyama, Sayumi Suzushiro, Saku Mizuno, and Ikumi Hasegawa, the album consists of 14 tracks previously heard in the anime, including a cover of Asian Kung-Fu Generation's \"Rockn' Roll, Morning Light Falls on You\", as well as newly recorded songs; nine singles preceded the album's physical release. Commercially, Kessoku Band peaked at number one on the Billboard Japan Hot Albums Chart and Oricon Albums Chart, and was certified gold by the Recording Industry Association of Japan.\n",
"\n",
"\u001b[0m\n",
"Thought:\n",
" ======= start of message ======= \n",
"\n",
"\n",
"type: system\n",
"data:\n",
" content: \"Answer the following questions as best you can. You have access to the following tools:\\n\\nWikipedia: A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.\\n\\nThe way you use the tools is by specifying a json blob.\\nSpecifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\\n\\nThe only values that should be in the \\\"action\\\" field are: Wikipedia\\n\\nThe $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:\\n\\n```\\n{\\n \\\"action\\\": $TOOL_NAME,\\n \\\"action_input\\\": $INPUT\\n}\\n```\\n\\nALWAYS use the following format:\\n\\nQuestion: the input question you must answer\\nThought: you should always think about what to do\\nAction:\\n```\\n$JSON_BLOB\\n```\\nObservation: the result of the action\\n... (this Thought/Action/Observation can repeat N times)\\nThought: I now know the final answer\\nFinal Answer: the final answer to the original input question\\n\\nBegin! Reminder to always use the exact characters `Final Answer` when responding.\"\n",
" additional_kwargs: {}\n",
"\n",
"======= end of message ======= \n",
"\n",
"\n",
"\n",
" ======= start of message ======= \n",
"\n",
"\n",
"type: human\n",
"data:\n",
" content: \"What is Bocchi the Rock?\\n\\nThis was your previous work (but I haven't seen any of it! I only see what you return as final answer):\\nAction:\\n```\\n{\\n \\\"action\\\": \\\"Wikipedia\\\",\\n \\\"action_input\\\": \\\"What is Bocchi the Rock?\\\"\\n}\\n```\\nObservation: Page: Bocchi the Rock!\\nSummary: Bocchi the Rock! (ぼっち・ざ・ろっく!, Botchi Za Rokku!) is a Japanese four-panel manga series written and illustrated by Aki Hamaji. It has been serialized in Houbunsha's seinen manga magazine Manga Time Kirara Max since December 2017. Its chapters have been collected in five tankōbon volumes as of November 2022.\\nAn anime television series adaptation produced by CloverWorks aired from October to December 2022. The series has been praised for its writing, comedy, characters, and depiction of social anxiety, with the anime's visual creativity receiving acclaim.\\n\\nPage: Hitori Bocchi no Marumaru Seikatsu\\nSummary: Hitori Bocchi no Marumaru Seikatsu (Japanese: ひとりぼっちの○○生活, lit. \\\"Bocchi Hitori's ____ Life\\\" or \\\"The ____ Life of Being Alone\\\") is a Japanese yonkoma manga series written and illustrated by Katsuwo. It was serialized in ASCII Media Works' Comic Dengeki Daioh \\\"g\\\" magazine from September 2013 to April 2021. Eight tankōbon volumes have been released. An anime television series adaptation by C2C aired from April to June 2019.\\n\\nPage: Kessoku Band (album)\\nSummary: Kessoku Band (Japanese: 結束バンド, Hepburn: Kessoku Bando) is the debut studio album by Kessoku Band, a fictional musical group from the anime television series Bocchi the Rock!, released digitally on December 25, 2022, and physically on CD on December 28 by Aniplex. Featuring vocals from voice actresses Yoshino Aoyama, Sayumi Suzushiro, Saku Mizuno, and Ikumi Hasegawa, the album consists of 14 tracks previously heard in the anime, including a cover of Asian Kung-Fu Generation's \\\"Rockn' Roll, Morning Light Falls on You\\\", as well as newly recorded songs; nine singles preceded the album's physical release. Commercially, Kessoku Band peaked at number one on the Billboard Japan Hot Albums Chart and Oricon Albums Chart, and was certified gold by the Recording Industry Association of Japan.\\n\\n\\nThought:\"\n",
" additional_kwargs: {}\n",
" example: false\n",
"\n",
"======= end of message ======= \n",
"\n",
"\n",
"\u001b[32;1m\u001b[1;3mThis finally works.\n",
"Final Answer: Bocchi the Rock! is a four-panel manga series and anime television series. The series has been praised for its writing, comedy, characters, and depiction of social anxiety, with the anime's visual creativity receiving acclaim.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'input': 'What is Bocchi the Rock?',\n",
" 'output': \"Bocchi the Rock! is a four-panel manga series and anime television series. The series has been praised for its writing, comedy, characters, and depiction of social anxiety, with the anime's visual creativity receiving acclaim.\"}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent(\"What is Bocchi the Rock?\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -52,7 +52,7 @@
{
"data": {
"text/plain": [
"AIMessage(content=\" J'aime programmer. \", additional_kwargs={})"
"AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}, example=False)"
]
},
"execution_count": 3,
@@ -101,7 +101,7 @@
{
"data": {
"text/plain": [
"LLMResult(generations=[[ChatGeneration(text=\" J'aime la programmation.\", generation_info=None, message=AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}))]], llm_output={})"
"LLMResult(generations=[[ChatGeneration(text=\" J'aime programmer.\", generation_info=None, message=AIMessage(content=\" J'aime programmer.\", additional_kwargs={}, example=False))]], llm_output={}, run=[RunInfo(run_id=UUID('8cc8fb68-1c35-439c-96a0-695036a93652'))])"
]
},
"execution_count": 5,
@@ -125,13 +125,13 @@
"name": "stdout",
"output_type": "stream",
"text": [
" J'adore programmer."
" J'aime la programmation."
]
},
{
"data": {
"text/plain": [
"AIMessage(content=\" J'adore programmer.\", additional_kwargs={})"
"AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}, example=False)"
]
},
"execution_count": 6,
@@ -151,7 +151,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "df45f59f",
"id": "c253883f",
"metadata": {},
"outputs": [],
"source": []
@@ -173,7 +173,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
"version": "3.9.1"
}
},
"nbformat": 4,

View File

@@ -0,0 +1,162 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "e49f1e0d",
"metadata": {},
"source": [
"# JinaChat\n",
"\n",
"This notebook covers how to get started with JinaChat chat models."
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "522686de",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.chat_models import JinaChat\n",
"from langchain.prompts.chat import (\n",
" ChatPromptTemplate,\n",
" SystemMessagePromptTemplate,\n",
" AIMessagePromptTemplate,\n",
" HumanMessagePromptTemplate,\n",
")\n",
"from langchain.schema import AIMessage, HumanMessage, SystemMessage"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "62e0dbc3",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"chat = JinaChat(temperature=0)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "ce16ad78-8e6f-48cd-954e-98be75eb5836",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=\"J'aime programmer.\", additional_kwargs={}, example=False)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"messages = [\n",
" SystemMessage(\n",
" content=\"You are a helpful assistant that translates English to French.\"\n",
" ),\n",
" HumanMessage(\n",
" content=\"Translate this sentence from English to French. I love programming.\"\n",
" ),\n",
"]\n",
"chat(messages)"
]
},
{
"cell_type": "markdown",
"id": "778f912a-66ea-4a5d-b3de-6c7db4baba26",
"metadata": {},
"source": [
"You can make use of templating by using a `MessagePromptTemplate`. You can build a `ChatPromptTemplate` from one or more `MessagePromptTemplates`. You can use `ChatPromptTemplate`'s `format_prompt` -- this returns a `PromptValue`, which you can convert to a string or Message object, depending on whether you want to use the formatted value as input to an llm or chat model.\n",
"\n",
"For convenience, there is a `from_template` method exposed on the template. If you were to use this template, this is what it would look like:"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "180c5cc8",
"metadata": {},
"outputs": [],
"source": [
"template = (\n",
" \"You are a helpful assistant that translates {input_language} to {output_language}.\"\n",
")\n",
"system_message_prompt = SystemMessagePromptTemplate.from_template(template)\n",
"human_template = \"{text}\"\n",
"human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "fbb043e6",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=\"J'aime programmer.\", additional_kwargs={}, example=False)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chat_prompt = ChatPromptTemplate.from_messages(\n",
" [system_message_prompt, human_message_prompt]\n",
")\n",
"\n",
"# get a chat completion from the formatted messages\n",
"chat(\n",
" chat_prompt.format_prompt(\n",
" input_language=\"English\", output_language=\"French\", text=\"I love programming.\"\n",
" ).to_messages()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c095285d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -8,9 +8,12 @@
"source": [
"# Clarifai\n",
"\n",
">[Clarifai](https://www.clarifai.com/) is a AI Platform that provides the full AI lifecycle ranging from data exploration, data labeling, model building and inference.\n",
">[Clarifai](https://www.clarifai.com/) is an AI Platform that provides the full AI lifecycle ranging from data exploration, data labeling, model training, evaluation, and inference.\n",
"\n",
"This example goes over how to use LangChain to interact with `Clarifai` [models](https://clarifai.com/explore/models)."
"This example goes over how to use LangChain to interact with `Clarifai` [models](https://clarifai.com/explore/models). \n",
"\n",
"To use Clarifai, you must have an account and a Personal Access Token (PAT) key. \n",
"[Check here](https://clarifai.com/settings/security) to get or create a PAT."
]
},
{
@@ -42,7 +45,7 @@
"metadata": {},
"source": [
"# Imports\n",
"Here we will be setting the personal access token. You can find your PAT under settings/security on the platform."
"Here we will be setting the personal access token. You can find your PAT under [settings/security](https://clarifai.com/settings/security) in your Clarifai account."
]
},
{
@@ -52,17 +55,25 @@
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
" ········\n"
]
}
],
"source": [
"# Please login and get your API key from https://clarifai.com/settings/security \n",
"from getpass import getpass\n",
"\n",
"CLARIFAI_PAT_KEY = getpass()"
"CLARIFAI_PAT = getpass()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "6fb585dd",
"metadata": {
"tags": []
@@ -81,12 +92,12 @@
"metadata": {},
"source": [
"# Input\n",
"Create a prompt template to be used with the LLM Chain"
"Create a prompt template to be used with the LLM Chain:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "035dea0f",
"metadata": {
"tags": []
@@ -121,10 +132,10 @@
"source": [
"USER_ID = 'openai'\n",
"APP_ID = 'chat-completion'\n",
"MODEL_ID = 'chatgpt-3_5-turbo'\n",
"MODEL_ID = 'GPT-3_5-turbo'\n",
"\n",
"# You can provide a specific model version\n",
"# model_version_id = \"MODEL_VERSION_ID\""
"# You can provide a specific model version as the model_version_id arg.\n",
"# MODEL_VERSION_ID = \"MODEL_VERSION_ID\""
]
},
{
@@ -137,7 +148,7 @@
"outputs": [],
"source": [
"# Initialize a Clarifai LLM\n",
"clarifai_llm = Clarifai(clarifai_pat_key=CLARIFAI_PAT_KEY, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)"
"clarifai_llm = Clarifai(pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)"
]
},
{
@@ -171,7 +182,7 @@
{
"data": {
"text/plain": [
"'Justin Bieber was born on March 1, 1994. So, we need to look at the Super Bowl that was played in the year 1994. \\n\\nThe Super Bowl in 1994 was Super Bowl XXVIII (28). It was played on January 30, 1994, between the Dallas Cowboys and the Buffalo Bills. \\n\\nThe Dallas Cowboys won the Super Bowl in 1994, defeating the Buffalo Bills by a score of 30-13. \\n\\nTherefore, the Dallas Cowboys are the NFL team that won the Super Bowl in the year Justin Bieber was born.'"
"'Justin Bieber was born on March 1, 1994. So, we need to figure out the Super Bowl winner for the 1994 season. The NFL season spans two calendar years, so the Super Bowl for the 1994 season would have taken place in early 1995. \\n\\nThe Super Bowl in question is Super Bowl XXIX, which was played on January 29, 1995. The game was won by the San Francisco 49ers, who defeated the San Diego Chargers by a score of 49-26. Therefore, the San Francisco 49ers won the Super Bowl in the year Justin Bieber was born.'"
]
},
"execution_count": 7,

View File

@@ -1,20 +1,26 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "959300d4",
"metadata": {},
"source": [
"# Hugging Face Hub\n",
"\n",
"The [Hugging Face Hub](https://huggingface.co/docs/hub/index) is a platform with over 120k models, 20k datasets, and 50k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.\n",
">The [Hugging Face Hub](https://huggingface.co/docs/hub/index) is a platform with over 120k models, 20k datasets, and 50k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.\n",
"\n",
"This example showcases how to connect to the Hugging Face Hub."
"This example showcases how to connect to the `Hugging Face Hub` and use different models."
]
},
{
"cell_type": "markdown",
"id": "1ddafc6d-7d7c-48fa-838f-0e7f50895ce3",
"metadata": {},
"source": [
"## Installation and Setup"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "4c1b8450-5eaf-4d34-8341-2d785448a1ff",
"metadata": {
@@ -26,22 +32,30 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "d772b637-de00-4663-bd77-9bc96d798db2",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"!pip install huggingface_hub > /dev/null"
"!pip install huggingface_hub"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "d597a792-354c-4ca5-b483-5965eec5d63d",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
" ········\n"
]
}
],
"source": [
"# get a token: https://huggingface.co/docs/api-inference/quicktour#get-your-api-token\n",
"\n",
@@ -52,7 +66,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "b8c5b88c-e4b8-4d0d-9a35-6e8f106452c2",
"metadata": {},
"outputs": [],
@@ -63,108 +77,101 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "84dd44c1-c428-41f3-a911-520281386c94",
"metadata": {},
"source": [
"**Select a Model**"
"## Prepare Examples"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "39c7eeac-01c4-486b-9480-e828a9e73e78",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain import HuggingFaceHub\n",
"\n",
"repo_id = \"google/flan-t5-xl\" # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options\n",
"\n",
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0, \"max_length\": 64})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3acf0069",
"id": "3fe7d1d1-241d-426a-acff-e208f1088871",
"metadata": {},
"outputs": [],
"source": [
"from langchain import PromptTemplate, LLMChain\n",
"from langchain import HuggingFaceHub"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "6620f39b-3d32-4840-8931-ff7d2c3e47e8",
"metadata": {},
"outputs": [],
"source": [
"from langchain import PromptTemplate, LLMChain"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "44adc1a0-9c0a-4f1e-af5a-fe04222e78d7",
"metadata": {},
"outputs": [],
"source": [
"question = \"Who won the FIFA World Cup in the year 1994? \"\n",
"\n",
"template = \"\"\"Question: {question}\n",
"\n",
"Answer: Let's think step by step.\"\"\"\n",
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])\n",
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
"\n",
"question = \"Who won the FIFA World Cup in the year 1994? \"\n",
"\n",
"print(llm_chain.run(question))"
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "ddaa06cf-95ec-48ce-b0ab-d892a7909693",
"metadata": {},
"source": [
"## Examples\n",
"\n",
"Below are some examples of models you can access through the Hugging Face Hub integration."
"Below are some examples of models you can access through the `Hugging Face Hub` integration."
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "4fa9337e-ccb5-4c52-9b7c-1653148bc256",
"id": "4c16fded-70d1-42af-8bfa-6ddda9f0bc63",
"metadata": {},
"source": [
"### StableLM, by Stability AI\n",
"\n",
"See [Stability AI's](https://huggingface.co/stabilityai) organization page for a list of available models."
"### Flan, by Google"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36a1ce01-bd46-451f-8ee6-61c8f4bd665a",
"metadata": {},
"execution_count": 5,
"id": "39c7eeac-01c4-486b-9480-e828a9e73e78",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"repo_id = \"stabilityai/stablelm-tuned-alpha-3b\"\n",
"# Others include stabilityai/stablelm-base-alpha-3b\n",
"# as well as 7B parameter versions"
"repo_id = \"google/flan-t5-xxl\" # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5654cea-60b0-4f40-ab34-06ba1eca810d",
"execution_count": 8,
"id": "3acf0069",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The FIFA World Cup was held in the year 1994. West Germany won the FIFA World Cup in 1994\n"
]
}
],
"source": [
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0, \"max_length\": 64})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2f19d0dc-c987-433f-a8d6-b1214e8ee067",
"metadata": {},
"outputs": [],
"source": [
"# Reuse the prompt and question from above.\n",
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64})\n",
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
"\n",
"print(llm_chain.run(question))"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "1a5c97af-89bc-4e59-95c1-223742a9160b",
"metadata": {},
@@ -176,34 +183,40 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"id": "521fcd2b-8e38-4920-b407-5c7d330411c9",
"metadata": {},
"outputs": [],
"source": [
"from langchain import HuggingFaceHub\n",
"\n",
"repo_id = \"databricks/dolly-v2-3b\"\n",
"\n",
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0, \"max_length\": 64})"
"repo_id = \"databricks/dolly-v2-3b\""
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"id": "9907ec3a-fe0c-4543-81c4-d42f9453f16c",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" First of all, the world cup was won by the Germany. Then the Argentina won the world cup in 2022. So, the Argentina won the world cup in 1994.\n",
"\n",
"\n",
"Question: Who\n"
]
}
],
"source": [
"# Reuse the prompt and question from above.\n",
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64})\n",
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
"print(llm_chain.run(question))"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "03f6ae52-b5f9-4de6-832c-551cb3fa11ae",
"metadata": {},
@@ -215,17 +228,14 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"id": "257a091d-750b-4910-ac08-fe1c7b3fd98b",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain import HuggingFaceHub\n",
"\n",
"repo_id = \"Writer/camel-5b-hf\" # See https://huggingface.co/Writer for other options\n",
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0, \"max_length\": 64})"
"repo_id = \"Writer/camel-5b-hf\" # See https://huggingface.co/Writer for other options"
]
},
{
@@ -235,27 +245,74 @@
"metadata": {},
"outputs": [],
"source": [
"# Reuse the prompt and question from above.\n",
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64})\n",
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
"print(llm_chain.run(question))"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "2bf838eb-1083-402f-b099-b07c452418c8",
"metadata": {},
"source": [
"**And many more!**"
"### XGen, by Salesforce\n",
"\n",
"See [more information](https://github.com/salesforce/xgen)."
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "18c78880-65d7-41d0-9722-18090efb60e9",
"metadata": {},
"outputs": [],
"source": [
"repo_id = \"Salesforce/xgen-7b-8k-base\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "18c78880-65d7-41d0-9722-18090efb60e9",
"id": "1b1150b4-ec30-4674-849e-6a41b085aa2b",
"metadata": {},
"outputs": [],
"source": []
"source": [
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64})\n",
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
"print(llm_chain.run(question))"
]
},
{
"cell_type": "markdown",
"id": "0aca9f9e-f333-449c-97b2-10d1dbf17e75",
"metadata": {},
"source": [
"### Falcon, by Technology Innovation Institute (TII)\n",
"\n",
"See [more information](https://huggingface.co/tiiuae/falcon-40b)."
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "496b35ac-5ee2-4b68-a6ce-232608f56c03",
"metadata": {},
"outputs": [],
"source": [
"repo_id = \"tiiuae/falcon-40b\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ff2541ad-e394-4179-93c2-7ae9c4ca2a25",
"metadata": {},
"outputs": [],
"source": [
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64})\n",
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
"print(llm_chain.run(question))"
]
}
],
"metadata": {
@@ -274,7 +331,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.10.6"
}
},
"nbformat": 4,

View File

@@ -253,7 +253,7 @@ html_text = """
```python
html_splitter = RecursiveCharacterTextSplitter.from_language(
language=Language.MARKDOWN, chunk_size=60, chunk_overlap=0
language=Language.HTML, chunk_size=60, chunk_overlap=0
)
html_docs = html_splitter.create_documents([html_text])
html_docs
@@ -262,19 +262,18 @@ html_docs
<CodeOutputBlock lang="python">
```
[Document(page_content='<!DOCTYPE html>\n<html>\n <head>', metadata={}),
Document(page_content='<title>🦜️🔗 LangChain</title>\n <style>', metadata={}),
Document(page_content='body {', metadata={}),
Document(page_content='font-family: Arial, sans-serif;', metadata={}),
Document(page_content='}\n h1 {', metadata={}),
Document(page_content='color: darkblue;\n }', metadata={}),
Document(page_content='</style>\n </head>\n <body>\n <div>', metadata={}),
Document(page_content='<h1>🦜️🔗 LangChain</h1>', metadata={}),
Document(page_content='<p>⚡ Building applications with LLMs through', metadata={}),
Document(page_content='composability ⚡</p>', metadata={}),
Document(page_content='</div>\n <div>', metadata={}),
Document(page_content='As an open source project in a rapidly', metadata={}),
Document(page_content='developing field, we are extremely open to contributions.', metadata={}),
[Document(page_content='<!DOCTYPE html>\n<html>', metadata={}),
Document(page_content='<head>\n <title>🦜️🔗 LangChain</title>', metadata={}),
Document(page_content='<style>\n body {\n font-family: Aria', metadata={}),
Document(page_content='l, sans-serif;\n }\n h1 {', metadata={}),
Document(page_content='color: darkblue;\n }\n </style>\n </head', metadata={}),
Document(page_content='>', metadata={}),
Document(page_content='<body>', metadata={}),
Document(page_content='<div>\n <h1>🦜️🔗 LangChain</h1>', metadata={}),
Document(page_content='<p>⚡ Building applications with LLMs through composability ⚡', metadata={}),
Document(page_content='</p>\n </div>', metadata={}),
Document(page_content='<div>\n As an open source project in a rapidly dev', metadata={}),
Document(page_content='eloping field, we are extremely open to contributions.', metadata={}),
Document(page_content='</div>\n </body>\n</html>', metadata={})]
```
@@ -310,4 +309,4 @@ sol_docs
]
```
</CodeOutputBlock>
</CodeOutputBlock>

View File

@@ -14,7 +14,6 @@ from pydantic import BaseModel, root_validator
from langchain.agents.agent_types import AgentType
from langchain.agents.tools import InvalidTool
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.callbacks.manager import (
AsyncCallbackManagerForChainRun,
@@ -35,6 +34,7 @@ from langchain.schema import (
BasePromptTemplate,
OutputParserException,
)
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.messages import BaseMessage
from langchain.tools.base import BaseTool
from langchain.utilities.asyncio import asyncio_timeout

View File

@@ -12,6 +12,7 @@ from langchain.agents.agent_toolkits.jira.toolkit import JiraToolkit
from langchain.agents.agent_toolkits.json.base import create_json_agent
from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit
from langchain.agents.agent_toolkits.nla.toolkit import NLAToolkit
from langchain.agents.agent_toolkits.office365.toolkit import O365Toolkit
from langchain.agents.agent_toolkits.openapi.base import create_openapi_agent
from langchain.agents.agent_toolkits.openapi.toolkit import OpenAPIToolkit
from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent
@@ -64,4 +65,5 @@ __all__ = [
"FileManagementToolkit",
"PlayWrightBrowserToolkit",
"AzureCognitiveServicesToolkit",
"O365Toolkit",
]

View File

@@ -1,5 +1,5 @@
"""Toolkits for agents."""
from abc import abstractmethod
from abc import ABC, abstractmethod
from typing import List
from pydantic import BaseModel
@@ -7,8 +7,8 @@ from pydantic import BaseModel
from langchain.tools import BaseTool
class BaseToolkit(BaseModel):
"""Class responsible for defining a collection of related tools."""
class BaseToolkit(BaseModel, ABC):
"""Class representing a collection of related tools."""
@abstractmethod
def get_tools(self) -> List[BaseTool]:

View File

@@ -1,9 +1,9 @@
"""Agent for working with csvs."""
"""Agent for working with csv files."""
from typing import Any, List, Optional, Union
from langchain.agents.agent import AgentExecutor
from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent
from langchain.base_language import BaseLanguageModel
from langchain.schema.language_model import BaseLanguageModel
def create_csv_agent(

View File

@@ -6,9 +6,9 @@ from langchain.agents.agent_toolkits.json.prompt import JSON_PREFIX, JSON_SUFFIX
from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.schema.language_model import BaseLanguageModel
def create_json_agent(

View File

@@ -4,9 +4,9 @@
from typing import Any, Optional
from langchain.agents.tools import Tool
from langchain.base_language import BaseLanguageModel
from langchain.chains.api.openapi.chain import OpenAPIEndpointChain
from langchain.requests import Requests
from langchain.schema.language_model import BaseLanguageModel
from langchain.tools.openapi.utils.api_models import APIOperation
from langchain.tools.openapi.utils.openapi_utils import OpenAPISpec

View File

@@ -7,8 +7,8 @@ from pydantic import Field
from langchain.agents.agent_toolkits.base import BaseToolkit
from langchain.agents.agent_toolkits.nla.tool import NLATool
from langchain.base_language import BaseLanguageModel
from langchain.requests import Requests
from langchain.schema.language_model import BaseLanguageModel
from langchain.tools.base import BaseTool
from langchain.tools.openapi.utils.openapi_utils import OpenAPISpec
from langchain.tools.plugin import AIPlugin

View File

@@ -1 +1 @@
"""Gmail toolkit."""
"""Office365 toolkit."""

View File

@@ -30,9 +30,9 @@ class O365Toolkit(BaseToolkit):
def get_tools(self) -> List[BaseTool]:
"""Get the tools in the toolkit."""
return [
O365SearchEvents(account=self.account),
O365CreateDraftMessage(account=self.account),
O365SearchEmails(account=self.account),
O365SendEvent(account=self.account),
O365SendMessage(account=self.account),
O365SearchEvents(),
O365CreateDraftMessage(),
O365SearchEmails(),
O365SendEvent(),
O365SendMessage(),
]

View File

@@ -9,9 +9,9 @@ from langchain.agents.agent_toolkits.openapi.prompt import (
from langchain.agents.agent_toolkits.openapi.toolkit import OpenAPIToolkit
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.schema.language_model import BaseLanguageModel
def create_openapi_agent(

View File

@@ -28,7 +28,6 @@ from langchain.agents.agent_toolkits.openapi.planner_prompt import (
from langchain.agents.agent_toolkits.openapi.spec import ReducedOpenAPISpec
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.agents.tools import Tool
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.llms.openai import OpenAI
@@ -36,6 +35,7 @@ from langchain.memory import ReadOnlySharedMemory
from langchain.prompts import PromptTemplate
from langchain.requests import RequestsWrapper
from langchain.schema import BasePromptTemplate
from langchain.schema.language_model import BaseLanguageModel
from langchain.tools.base import BaseTool
from langchain.tools.requests.tool import BaseRequestsTool

View File

@@ -9,8 +9,8 @@ from langchain.agents.agent_toolkits.json.base import create_json_agent
from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit
from langchain.agents.agent_toolkits.openapi.prompt import DESCRIPTION
from langchain.agents.tools import Tool
from langchain.base_language import BaseLanguageModel
from langchain.requests import TextRequestsWrapper
from langchain.schema.language_model import BaseLanguageModel
from langchain.tools import BaseTool
from langchain.tools.json.tool import JsonSpec
from langchain.tools.requests.tool import (
@@ -39,7 +39,7 @@ class RequestsToolkit(BaseToolkit):
class OpenAPIToolkit(BaseToolkit):
"""Toolkit for interacting with a OpenAPI api."""
"""Toolkit for interacting with an OpenAPI API."""
json_agent: AgentExecutor
requests_wrapper: TextRequestsWrapper

View File

@@ -16,10 +16,10 @@ from langchain.agents.agent_toolkits.pandas.prompt import (
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.agents.types import AgentType
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.schema import BasePromptTemplate
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.messages import SystemMessage
from langchain.tools.python.tool import PythonAstREPLTool
@@ -30,6 +30,7 @@ def _get_multi_prompt(
suffix: Optional[str] = None,
input_variables: Optional[List[str]] = None,
include_df_in_prompt: Optional[bool] = True,
number_of_head_rows: int = 5,
) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]:
num_dfs = len(dfs)
if suffix is not None:
@@ -60,7 +61,7 @@ def _get_multi_prompt(
partial_prompt = prompt.partial()
if "dfs_head" in input_variables:
dfs_head = "\n\n".join([d.head().to_markdown() for d in dfs])
dfs_head = "\n\n".join([d.head(number_of_head_rows).to_markdown() for d in dfs])
partial_prompt = partial_prompt.partial(num_dfs=str(num_dfs), dfs_head=dfs_head)
if "num_dfs" in input_variables:
partial_prompt = partial_prompt.partial(num_dfs=str(num_dfs))
@@ -73,6 +74,7 @@ def _get_single_prompt(
suffix: Optional[str] = None,
input_variables: Optional[List[str]] = None,
include_df_in_prompt: Optional[bool] = True,
number_of_head_rows: int = 5,
) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]:
if suffix is not None:
suffix_to_use = suffix
@@ -100,7 +102,9 @@ def _get_single_prompt(
partial_prompt = prompt.partial()
if "df_head" in input_variables:
partial_prompt = partial_prompt.partial(df_head=str(df.head().to_markdown()))
partial_prompt = partial_prompt.partial(
df_head=str(df.head(number_of_head_rows).to_markdown())
)
return partial_prompt, tools
@@ -110,6 +114,7 @@ def _get_prompt_and_tools(
suffix: Optional[str] = None,
input_variables: Optional[List[str]] = None,
include_df_in_prompt: Optional[bool] = True,
number_of_head_rows: int = 5,
) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]:
try:
import pandas as pd
@@ -131,6 +136,7 @@ def _get_prompt_and_tools(
suffix=suffix,
input_variables=input_variables,
include_df_in_prompt=include_df_in_prompt,
number_of_head_rows=number_of_head_rows,
)
else:
if not isinstance(df, pd.DataFrame):
@@ -141,6 +147,7 @@ def _get_prompt_and_tools(
suffix=suffix,
input_variables=input_variables,
include_df_in_prompt=include_df_in_prompt,
number_of_head_rows=number_of_head_rows,
)
@@ -149,13 +156,18 @@ def _get_functions_single_prompt(
prefix: Optional[str] = None,
suffix: Optional[str] = None,
include_df_in_prompt: Optional[bool] = True,
number_of_head_rows: int = 5,
) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]:
if suffix is not None:
suffix_to_use = suffix
if include_df_in_prompt:
suffix_to_use = suffix_to_use.format(df_head=str(df.head().to_markdown()))
suffix_to_use = suffix_to_use.format(
df_head=str(df.head(number_of_head_rows).to_markdown())
)
elif include_df_in_prompt:
suffix_to_use = FUNCTIONS_WITH_DF.format(df_head=str(df.head().to_markdown()))
suffix_to_use = FUNCTIONS_WITH_DF.format(
df_head=str(df.head(number_of_head_rows).to_markdown())
)
else:
suffix_to_use = ""
@@ -173,16 +185,19 @@ def _get_functions_multi_prompt(
prefix: Optional[str] = None,
suffix: Optional[str] = None,
include_df_in_prompt: Optional[bool] = True,
number_of_head_rows: int = 5,
) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]:
if suffix is not None:
suffix_to_use = suffix
if include_df_in_prompt:
dfs_head = "\n\n".join([d.head().to_markdown() for d in dfs])
dfs_head = "\n\n".join(
[d.head(number_of_head_rows).to_markdown() for d in dfs]
)
suffix_to_use = suffix_to_use.format(
dfs_head=dfs_head,
)
elif include_df_in_prompt:
dfs_head = "\n\n".join([d.head().to_markdown() for d in dfs])
dfs_head = "\n\n".join([d.head(number_of_head_rows).to_markdown() for d in dfs])
suffix_to_use = FUNCTIONS_WITH_MULTI_DF.format(
dfs_head=dfs_head,
)
@@ -208,6 +223,7 @@ def _get_functions_prompt_and_tools(
suffix: Optional[str] = None,
input_variables: Optional[List[str]] = None,
include_df_in_prompt: Optional[bool] = True,
number_of_head_rows: int = 5,
) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]:
try:
import pandas as pd
@@ -230,6 +246,7 @@ def _get_functions_prompt_and_tools(
prefix=prefix,
suffix=suffix,
include_df_in_prompt=include_df_in_prompt,
number_of_head_rows=number_of_head_rows,
)
else:
if not isinstance(df, pd.DataFrame):
@@ -239,6 +256,7 @@ def _get_functions_prompt_and_tools(
prefix=prefix,
suffix=suffix,
include_df_in_prompt=include_df_in_prompt,
number_of_head_rows=number_of_head_rows,
)
@@ -257,6 +275,7 @@ def create_pandas_dataframe_agent(
early_stopping_method: str = "force",
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
include_df_in_prompt: Optional[bool] = True,
number_of_head_rows: int = 5,
**kwargs: Dict[str, Any],
) -> AgentExecutor:
"""Construct a pandas agent from an LLM and dataframe."""
@@ -268,6 +287,7 @@ def create_pandas_dataframe_agent(
suffix=suffix,
input_variables=input_variables,
include_df_in_prompt=include_df_in_prompt,
number_of_head_rows=number_of_head_rows,
)
llm_chain = LLMChain(
llm=llm,
@@ -288,6 +308,7 @@ def create_pandas_dataframe_agent(
suffix=suffix,
input_variables=input_variables,
include_df_in_prompt=include_df_in_prompt,
number_of_head_rows=number_of_head_rows,
)
agent = OpenAIFunctionsAgent(
llm=llm,

View File

@@ -9,15 +9,15 @@ from langchain.agents.agent_toolkits.powerbi.prompt import (
from langchain.agents.agent_toolkits.powerbi.toolkit import PowerBIToolkit
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.schema.language_model import BaseLanguageModel
from langchain.utilities.powerbi import PowerBIDataset
def create_pbi_agent(
llm: BaseLanguageModel,
toolkit: Optional[PowerBIToolkit],
toolkit: Optional[PowerBIToolkit] = None,
powerbi: Optional[PowerBIDataset] = None,
callback_manager: Optional[BaseCallbackManager] = None,
prefix: str = POWERBI_PREFIX,
@@ -36,13 +36,13 @@ def create_pbi_agent(
raise ValueError("Must provide either a toolkit or powerbi dataset")
toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples)
tools = toolkit.get_tools()
tables = powerbi.table_names if powerbi else toolkit.powerbi.table_names
agent = ZeroShotAgent(
llm_chain=LLMChain(
llm=llm,
prompt=ZeroShotAgent.create_prompt(
tools,
prefix=prefix.format(top_k=top_k),
prefix=prefix.format(top_k=top_k).format(tables=tables),
suffix=suffix,
format_instructions=format_instructions,
input_variables=input_variables,

View File

@@ -18,7 +18,7 @@ from langchain.utilities.powerbi import PowerBIDataset
def create_pbi_chat_agent(
llm: BaseChatModel,
toolkit: Optional[PowerBIToolkit],
toolkit: Optional[PowerBIToolkit] = None,
powerbi: Optional[PowerBIDataset] = None,
callback_manager: Optional[BaseCallbackManager] = None,
output_parser: Optional[AgentOutputParser] = None,
@@ -32,19 +32,20 @@ def create_pbi_chat_agent(
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Dict[str, Any],
) -> AgentExecutor:
"""Construct a pbi agent from an Chat LLM and tools.
"""Construct a Power BI agent from a Chat LLM and tools.
If you supply only a toolkit and no powerbi dataset, the same LLM is used for both.
If you supply only a toolkit and no Power BI dataset, the same LLM is used for both.
"""
if toolkit is None:
if powerbi is None:
raise ValueError("Must provide either a toolkit or powerbi dataset")
toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples)
tools = toolkit.get_tools()
tables = powerbi.table_names if powerbi else toolkit.powerbi.table_names
agent = ConversationalChatAgent.from_llm_and_tools(
llm=llm,
tools=tools,
system_message=prefix.format(top_k=top_k),
system_message=prefix.format(top_k=top_k).format(tables=tables),
human_message=suffix,
input_variables=input_variables,
callback_manager=callback_manager,

View File

@@ -4,7 +4,7 @@
POWERBI_PREFIX = """You are an agent designed to help users interact with a PowerBI Dataset.
Agent has access to a tool that can write a query based on the question and then run those against PowerBI, Microsofts business intelligence tool. The questions from the users should be interpreted as related to the dataset that is available and not general questions about the world. If the question does not seem related to the dataset, just return "This does not appear to be part of this dataset." as the answer.
Agent has access to a tool that can write a query based on the question and then run those against PowerBI, Microsofts business intelligence tool. The questions from the users should be interpreted as related to the dataset that is available and not general questions about the world. If the question does not seem related to the dataset, return "This does not appear to be part of this dataset." as the answer.
Given an input question, ask to run the questions against the dataset, then look at the results and return the answer, the answer should be a complete sentence that answers the question, if multiple rows are asked find a way to write that in a easily readable format for a human, also make sure to represent numbers in readable ways, like 1M instead of 1000000. Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results.
"""
@@ -17,9 +17,9 @@ Thought: I can first ask which tables I have, then how each table is defined and
POWERBI_CHAT_PREFIX = """Assistant is a large language model built to help users interact with a PowerBI Dataset.
Assistant has access to a tool that can write a query based on the question and then run those against PowerBI, Microsofts business intelligence tool. The questions from the users should be interpreted as related to the dataset that is available and not general questions about the world. If the question does not seem related to the dataset, just return "This does not appear to be part of this dataset." as the answer.
Assistant should try to create a correct and complete answer to the question from the user. If the user asks a question not related to the dataset it should return "This does not appear to be part of this dataset." as the answer. The user might make a mistake with the spelling of certain values, if you think that is the case, ask the user to confirm the spelling of the value and then run the query again. Unless the user specifies a specific number of examples they wish to obtain, and the results are too large, limit your query to at most {top_k} results, but make it clear when answering which field was used for the filtering. The user has access to these tables: {{tables}}.
Given an input question, ask to run the questions against the dataset, then look at the results and return the answer, the answer should be a complete sentence that answers the question, if multiple rows are asked find a way to write that in a easily readable format for a human, also make sure to represent numbers in readable ways, like 1M instead of 1000000. Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results.
The answer should be a complete sentence that answers the question, if multiple rows are asked find a way to write that in a easily readable format for a human, also make sure to represent numbers in readable ways, like 1M instead of 1000000.
"""
POWERBI_CHAT_SUFFIX = """TOOLS

View File

@@ -1,15 +1,25 @@
"""Toolkit for interacting with a Power BI dataset."""
from typing import List, Optional
from typing import List, Optional, Union
from pydantic import Field
from langchain.agents.agent_toolkits.base import BaseToolkit
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.chat_models.base import BaseChatModel
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema.language_model import BaseLanguageModel
from langchain.tools import BaseTool
from langchain.tools.powerbi.prompt import QUESTION_TO_QUERY
from langchain.tools.powerbi.prompt import (
QUESTION_TO_QUERY_BASE,
SINGLE_QUESTION_TO_QUERY,
USER_INPUT,
)
from langchain.tools.powerbi.tool import (
InfoPowerBITool,
ListPowerBITool,
@@ -22,10 +32,12 @@ class PowerBIToolkit(BaseToolkit):
"""Toolkit for interacting with PowerBI dataset."""
powerbi: PowerBIDataset = Field(exclude=True)
llm: BaseLanguageModel = Field(exclude=True)
llm: Union[BaseLanguageModel, BaseChatModel] = Field(exclude=True)
examples: Optional[str] = None
max_iterations: int = 5
callback_manager: Optional[BaseCallbackManager] = None
output_token_limit: Optional[int] = None
tiktoken_model_name: Optional[str] = None
class Config:
"""Configuration for this pydantic object."""
@@ -34,30 +46,47 @@ class PowerBIToolkit(BaseToolkit):
def get_tools(self) -> List[BaseTool]:
"""Get the tools in the toolkit."""
if self.callback_manager:
chain = LLMChain(
llm=self.llm,
callback_manager=self.callback_manager,
prompt=PromptTemplate(
template=QUESTION_TO_QUERY,
input_variables=["tool_input", "tables", "schemas", "examples"],
),
)
else:
chain = LLMChain(
llm=self.llm,
prompt=PromptTemplate(
template=QUESTION_TO_QUERY,
input_variables=["tool_input", "tables", "schemas", "examples"],
),
)
return [
QueryPowerBITool(
llm_chain=chain,
llm_chain=self._get_chain(),
powerbi=self.powerbi,
examples=self.examples,
max_iterations=self.max_iterations,
output_token_limit=self.output_token_limit,
tiktoken_model_name=self.tiktoken_model_name,
),
InfoPowerBITool(powerbi=self.powerbi),
ListPowerBITool(powerbi=self.powerbi),
]
def _get_chain(self) -> LLMChain:
"""Construct the chain based on the callback manager and model type."""
if isinstance(self.llm, BaseLanguageModel):
return LLMChain(
llm=self.llm,
callback_manager=self.callback_manager
if self.callback_manager
else None,
prompt=PromptTemplate(
template=SINGLE_QUESTION_TO_QUERY,
input_variables=["tool_input", "tables", "schemas", "examples"],
),
)
system_prompt = SystemMessagePromptTemplate(
prompt=PromptTemplate(
template=QUESTION_TO_QUERY_BASE,
input_variables=["tables", "schemas", "examples"],
)
)
human_prompt = HumanMessagePromptTemplate(
prompt=PromptTemplate(
template=USER_INPUT,
input_variables=["tool_input"],
)
)
return LLMChain(
llm=self.llm,
callback_manager=self.callback_manager if self.callback_manager else None,
prompt=ChatPromptTemplate.from_messages([system_prompt, human_prompt]),
)

View File

@@ -7,9 +7,9 @@ from langchain.agents.agent_toolkits.python.prompt import PREFIX
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.agents.types import AgentType
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.messages import SystemMessage
from langchain.tools.python.tool import PythonREPLTool

View File

@@ -6,9 +6,9 @@ from langchain.agents.agent_toolkits.spark_sql.prompt import SQL_PREFIX, SQL_SUF
from langchain.agents.agent_toolkits.spark_sql.toolkit import SparkSQLToolkit
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.schema.language_model import BaseLanguageModel
def create_spark_sql_agent(

View File

@@ -4,7 +4,7 @@ from typing import List
from pydantic import Field
from langchain.agents.agent_toolkits.base import BaseToolkit
from langchain.base_language import BaseLanguageModel
from langchain.schema.language_model import BaseLanguageModel
from langchain.tools import BaseTool
from langchain.tools.spark_sql.tool import (
InfoSparkSQLTool,

View File

@@ -12,7 +12,6 @@ from langchain.agents.agent_types import AgentType
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.prompts.chat import (
@@ -20,6 +19,7 @@ from langchain.prompts.chat import (
HumanMessagePromptTemplate,
MessagesPlaceholder,
)
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.messages import AIMessage, SystemMessage

View File

@@ -4,7 +4,7 @@ from typing import List
from pydantic import Field
from langchain.agents.agent_toolkits.base import BaseToolkit
from langchain.base_language import BaseLanguageModel
from langchain.schema.language_model import BaseLanguageModel
from langchain.sql_database import SQLDatabase
from langchain.tools import BaseTool
from langchain.tools.sql_database.tool import (

View File

@@ -8,9 +8,9 @@ from langchain.agents.agent_toolkits.vectorstore.toolkit import (
VectorStoreToolkit,
)
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.schema.language_model import BaseLanguageModel
def create_vectorstore_agent(

View File

@@ -4,8 +4,8 @@ from typing import List
from pydantic import BaseModel, Field
from langchain.agents.agent_toolkits.base import BaseToolkit
from langchain.base_language import BaseLanguageModel
from langchain.llms.openai import OpenAI
from langchain.schema.language_model import BaseLanguageModel
from langchain.tools import BaseTool
from langchain.tools.vectorstore.tool import (
VectorStoreQATool,

View File

@@ -11,7 +11,6 @@ from langchain.agents.chat.prompt import (
SYSTEM_MESSAGE_SUFFIX,
)
from langchain.agents.utils import validate_tools_single_input
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.prompts.chat import (
@@ -20,6 +19,7 @@ from langchain.prompts.chat import (
SystemMessagePromptTemplate,
)
from langchain.schema import AgentAction, BasePromptTemplate
from langchain.schema.language_model import BaseLanguageModel
from langchain.tools.base import BaseTool

View File

@@ -30,9 +30,8 @@ class ChatOutputParser(AgentOutputParser):
except Exception:
if not includes_answer:
raise OutputParserException(f"Could not parse LLM output: {text}")
return AgentFinish(
{"output": text.split(FINAL_ANSWER_ACTION)[-1].strip()}, text
)
output = text.split(FINAL_ANSWER_ACTION)[-1].strip()
return AgentFinish({"output": output}, text)
@property
def _type(self) -> str:

View File

@@ -10,10 +10,10 @@ from langchain.agents.agent_types import AgentType
from langchain.agents.conversational.output_parser import ConvoOutputParser
from langchain.agents.conversational.prompt import FORMAT_INSTRUCTIONS, PREFIX, SUFFIX
from langchain.agents.utils import validate_tools_single_input
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.schema.language_model import BaseLanguageModel
from langchain.tools.base import BaseTool

View File

@@ -13,7 +13,6 @@ from langchain.agents.conversational_chat.prompt import (
TEMPLATE_TOOL_RESPONSE,
)
from langchain.agents.utils import validate_tools_single_input
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains import LLMChain
from langchain.prompts.chat import (
@@ -23,6 +22,7 @@ from langchain.prompts.chat import (
SystemMessagePromptTemplate,
)
from langchain.schema import AgentAction, BaseOutputParser, BasePromptTemplate
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.messages import AIMessage, BaseMessage, HumanMessage
from langchain.tools.base import BaseTool

View File

@@ -4,8 +4,8 @@ from typing import Any, Optional, Sequence
from langchain.agents.agent import AgentExecutor
from langchain.agents.agent_types import AgentType
from langchain.agents.loading import AGENT_TO_CLASS, load_agent
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.schema.language_model import BaseLanguageModel
from langchain.tools.base import BaseTool

View File

@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional, Callable, Tuple
from mypy_extensions import Arg, KwArg
from langchain.agents.tools import Tool
from langchain.base_language import BaseLanguageModel
from langchain.schema.language_model import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.callbacks.manager import Callbacks
from langchain.chains.api import news_docs, open_meteo_docs, podcast_docs, tmdb_docs
@@ -38,6 +38,8 @@ from langchain.tools.sleep.tool import SleepTool
from langchain.tools.wikipedia.tool import WikipediaQueryRun
from langchain.tools.wolfram_alpha.tool import WolframAlphaQueryRun
from langchain.tools.openweathermap.tool import OpenWeatherMapQueryRun
from langchain.tools.dataforseo_api_search import DataForSeoAPISearchRun
from langchain.tools.dataforseo_api_search import DataForSeoAPISearchResults
from langchain.utilities import ArxivAPIWrapper
from langchain.utilities import PubMedAPIWrapper
from langchain.utilities.bing_search import BingSearchAPIWrapper
@@ -53,6 +55,7 @@ from langchain.utilities.twilio import TwilioAPIWrapper
from langchain.utilities.wikipedia import WikipediaAPIWrapper
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
from langchain.utilities.openweathermap import OpenWeatherMapAPIWrapper
from langchain.utilities.dataforseo_api_search import DataForSeoAPIWrapper
def _get_python_repl() -> BaseTool:
@@ -278,6 +281,14 @@ def _get_openweathermap(**kwargs: Any) -> BaseTool:
return OpenWeatherMapQueryRun(api_wrapper=OpenWeatherMapAPIWrapper(**kwargs))
def _get_dataforseo_api_search(**kwargs: Any) -> BaseTool:
return DataForSeoAPISearchRun(api_wrapper=DataForSeoAPIWrapper(**kwargs))
def _get_dataforseo_api_search_json(**kwargs: Any) -> BaseTool:
return DataForSeoAPISearchResults(api_wrapper=DataForSeoAPIWrapper(**kwargs))
_EXTRA_LLM_TOOLS: Dict[
str,
Tuple[Callable[[Arg(BaseLanguageModel, "llm"), KwArg(Any)], BaseTool], List[str]],
@@ -326,6 +337,14 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st
"sceneXplain": (_get_scenexplain, []),
"graphql": (_get_graphql_tool, ["graphql_endpoint"]),
"openweathermap-api": (_get_openweathermap, ["openweathermap_api_key"]),
"dataforseo-api-search": (
_get_dataforseo_api_search,
["api_login", "api_password", "aiosession"],
),
"dataforseo-api-search-json": (
_get_dataforseo_api_search_json,
["api_login", "api_password", "aiosession"],
),
}

View File

@@ -9,8 +9,8 @@ import yaml
from langchain.agents.agent import BaseMultiActionAgent, BaseSingleActionAgent
from langchain.agents.tools import Tool
from langchain.agents.types import AGENT_TO_CLASS
from langchain.base_language import BaseLanguageModel
from langchain.chains.loading import load_chain, load_chain_from_config
from langchain.schema.language_model import BaseLanguageModel
from langchain.utilities.loading import try_load_from_hub
logger = logging.getLogger(__file__)

View File

@@ -11,10 +11,10 @@ from langchain.agents.mrkl.output_parser import MRKLOutputParser
from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS, PREFIX, SUFFIX
from langchain.agents.tools import Tool
from langchain.agents.utils import validate_tools_single_input
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.schema.language_model import BaseLanguageModel
from langchain.tools.base import BaseTool

View File

@@ -7,7 +7,6 @@ from typing import Any, List, Optional, Sequence, Tuple, Union
from pydantic import root_validator
from langchain.agents import BaseSingleActionAgent
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.callbacks.manager import Callbacks
from langchain.chat_models.openai import ChatOpenAI
@@ -23,6 +22,7 @@ from langchain.schema import (
BasePromptTemplate,
OutputParserException,
)
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.messages import (
AIMessage,
BaseMessage,
@@ -108,7 +108,6 @@ def _parse_ai_message(message: BaseMessage) -> Union[AgentAction, AgentFinish]:
function_call = message.additional_kwargs.get("function_call", {})
if function_call:
function_call = message.additional_kwargs["function_call"]
function_name = function_call["name"]
try:
_tool_input = json.loads(function_call["arguments"])

View File

@@ -7,7 +7,6 @@ from typing import Any, List, Optional, Sequence, Tuple, Union
from pydantic import root_validator
from langchain.agents import BaseMultiActionAgent
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.callbacks.manager import Callbacks
from langchain.chat_models.openai import ChatOpenAI
@@ -23,6 +22,7 @@ from langchain.schema import (
BasePromptTemplate,
OutputParserException,
)
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.messages import (
AIMessage,
BaseMessage,
@@ -107,7 +107,6 @@ def _parse_ai_message(message: BaseMessage) -> Union[List[AgentAction], AgentFin
function_call = message.additional_kwargs.get("function_call", {})
if function_call:
function_call = message.additional_kwargs["function_call"]
try:
tools = json.loads(function_call["arguments"])["actions"]
except JSONDecodeError:

View File

@@ -10,10 +10,10 @@ from langchain.agents.react.textworld_prompt import TEXTWORLD_PROMPT
from langchain.agents.react.wiki_prompt import WIKI_PROMPT
from langchain.agents.tools import Tool
from langchain.agents.utils import validate_tools_single_input
from langchain.base_language import BaseLanguageModel
from langchain.docstore.base import Docstore
from langchain.docstore.document import Document
from langchain.schema import BasePromptTemplate
from langchain.schema.language_model import BaseLanguageModel
from langchain.tools.base import BaseTool

View File

@@ -1,4 +1,4 @@
"""Chain that does self ask with search."""
"""Chain that does self-ask with search."""
from typing import Any, Sequence, Union
from pydantic import Field
@@ -9,8 +9,8 @@ from langchain.agents.self_ask_with_search.output_parser import SelfAskOutputPar
from langchain.agents.self_ask_with_search.prompt import PROMPT
from langchain.agents.tools import Tool
from langchain.agents.utils import validate_tools_single_input
from langchain.base_language import BaseLanguageModel
from langchain.schema import BasePromptTemplate
from langchain.schema.language_model import BaseLanguageModel
from langchain.tools.base import BaseTool
from langchain.utilities.google_serper import GoogleSerperAPIWrapper
from langchain.utilities.serpapi import SerpAPIWrapper
@@ -59,7 +59,7 @@ class SelfAskWithSearchAgent(Agent):
class SelfAskWithSearchChain(AgentExecutor):
"""Chain that does self ask with search.
"""Chain that does self-ask with search.
Example:
.. code-block:: python

View File

@@ -8,7 +8,6 @@ from langchain.agents.structured_chat.output_parser import (
StructuredChatOutputParserWithRetries,
)
from langchain.agents.structured_chat.prompt import FORMAT_INSTRUCTIONS, PREFIX, SUFFIX
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.prompts.chat import (
@@ -17,6 +16,7 @@ from langchain.prompts.chat import (
SystemMessagePromptTemplate,
)
from langchain.schema import AgentAction, BasePromptTemplate
from langchain.schema.language_model import BaseLanguageModel
from langchain.tools import BaseTool
HUMAN_MESSAGE_TEMPLATE = "{input}\n\n{agent_scratchpad}"

View File

@@ -9,9 +9,9 @@ from pydantic import Field
from langchain.agents.agent import AgentOutputParser
from langchain.agents.structured_chat.prompt import FORMAT_INSTRUCTIONS
from langchain.base_language import BaseLanguageModel
from langchain.output_parsers import OutputFixingParser
from langchain.schema import AgentAction, AgentFinish, OutputParserException
from langchain.schema.language_model import BaseLanguageModel
logger = logging.getLogger(__name__)

View File

@@ -1,104 +1,6 @@
"""Base class for all language models."""
"""Deprecated module for BaseLanguageModel class, kept for backwards compatibility."""
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any, List, Optional, Sequence, Set
from langchain.schema.language_model import BaseLanguageModel
from langchain.callbacks.manager import Callbacks
from langchain.load.serializable import Serializable
from langchain.schema import LLMResult, PromptValue
from langchain.schema.messages import BaseMessage, get_buffer_string
def _get_token_ids_default_method(text: str) -> List[int]:
"""Encode the text into token IDs."""
# TODO: this method may not be exact.
# TODO: this method may differ based on model (eg codex).
try:
from transformers import GPT2TokenizerFast
except ImportError:
raise ValueError(
"Could not import transformers python package. "
"This is needed in order to calculate get_token_ids. "
"Please install it with `pip install transformers`."
)
# create a GPT-2 tokenizer instance
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
# tokenize the text using the GPT-2 tokenizer
return tokenizer.encode(text)
class BaseLanguageModel(Serializable, ABC):
@abstractmethod
def generate_prompt(
self,
prompts: List[PromptValue],
stop: Optional[List[str]] = None,
callbacks: Callbacks = None,
**kwargs: Any,
) -> LLMResult:
"""Take in a list of prompt values and return an LLMResult."""
@abstractmethod
async def agenerate_prompt(
self,
prompts: List[PromptValue],
stop: Optional[List[str]] = None,
callbacks: Callbacks = None,
**kwargs: Any,
) -> LLMResult:
"""Take in a list of prompt values and return an LLMResult."""
@abstractmethod
def predict(
self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
) -> str:
"""Predict text from text."""
@abstractmethod
def predict_messages(
self,
messages: List[BaseMessage],
*,
stop: Optional[Sequence[str]] = None,
**kwargs: Any,
) -> BaseMessage:
"""Predict message from messages."""
@abstractmethod
async def apredict(
self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
) -> str:
"""Predict text from text."""
@abstractmethod
async def apredict_messages(
self,
messages: List[BaseMessage],
*,
stop: Optional[Sequence[str]] = None,
**kwargs: Any,
) -> BaseMessage:
"""Predict message from messages."""
def get_token_ids(self, text: str) -> List[int]:
"""Get the token present in the text."""
return _get_token_ids_default_method(text)
def get_num_tokens(self, text: str) -> int:
"""Get the number of tokens present in the text."""
return len(self.get_token_ids(text))
def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
"""Get the number of tokens in the message."""
return sum([self.get_num_tokens(get_buffer_string([m])) for m in messages])
@classmethod
def all_required_field_names(cls) -> Set:
all_required_field_names = set()
for field in cls.__fields__.values():
all_required_field_names.add(field.name)
if field.has_alias:
all_required_field_names.add(field.alias)
return all_required_field_names
__all__ = ["BaseLanguageModel"]

View File

@@ -6,6 +6,7 @@ from langchain.callbacks.arize_callback import ArizeCallbackHandler
from langchain.callbacks.arthur_callback import ArthurCallbackHandler
from langchain.callbacks.clearml_callback import ClearMLCallbackHandler
from langchain.callbacks.comet_ml_callback import CometCallbackHandler
from langchain.callbacks.context_callback import ContextCallbackHandler
from langchain.callbacks.file import FileCallbackHandler
from langchain.callbacks.flyte_callback import FlyteCallbackHandler
from langchain.callbacks.human import HumanApprovalCallbackHandler
@@ -36,6 +37,7 @@ __all__ = [
"ArthurCallbackHandler",
"ClearMLCallbackHandler",
"CometCallbackHandler",
"ContextCallbackHandler",
"FileCallbackHandler",
"HumanApprovalCallbackHandler",
"InfinoCallbackHandler",

View File

@@ -147,6 +147,7 @@ class CallbackManagerMixin:
run_id: UUID,
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> Any:
"""Run when LLM starts running."""
@@ -159,6 +160,7 @@ class CallbackManagerMixin:
run_id: UUID,
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> Any:
"""Run when a chat model starts running."""
@@ -168,10 +170,13 @@ class CallbackManagerMixin:
def on_retriever_start(
self,
serialized: Dict[str, Any],
query: str,
*,
run_id: UUID,
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> Any:
"""Run when Retriever starts running."""
@@ -184,6 +189,7 @@ class CallbackManagerMixin:
run_id: UUID,
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> Any:
"""Run when chain starts running."""
@@ -196,6 +202,7 @@ class CallbackManagerMixin:
run_id: UUID,
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> Any:
"""Run when tool starts running."""
@@ -266,6 +273,7 @@ class AsyncCallbackHandler(BaseCallbackHandler):
run_id: UUID,
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> None:
"""Run when LLM starts running."""
@@ -278,6 +286,7 @@ class AsyncCallbackHandler(BaseCallbackHandler):
run_id: UUID,
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> Any:
"""Run when a chat model starts running."""
@@ -326,6 +335,7 @@ class AsyncCallbackHandler(BaseCallbackHandler):
run_id: UUID,
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> None:
"""Run when chain starts running."""
@@ -360,6 +370,7 @@ class AsyncCallbackHandler(BaseCallbackHandler):
run_id: UUID,
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> None:
"""Run when tool starts running."""
@@ -421,11 +432,13 @@ class AsyncCallbackHandler(BaseCallbackHandler):
async def on_retriever_start(
self,
serialized: Dict[str, Any],
query: str,
*,
run_id: UUID,
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> None:
"""Run on retriever start."""
@@ -464,6 +477,8 @@ class BaseCallbackManager(CallbackManagerMixin):
*,
tags: Optional[List[str]] = None,
inheritable_tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
inheritable_metadata: Optional[Dict[str, Any]] = None,
) -> None:
"""Initialize callback manager."""
self.handlers: List[BaseCallbackHandler] = handlers
@@ -473,6 +488,8 @@ class BaseCallbackManager(CallbackManagerMixin):
self.parent_run_id: Optional[UUID] = parent_run_id
self.tags = tags or []
self.inheritable_tags = inheritable_tags or []
self.metadata = metadata or {}
self.inheritable_metadata = inheritable_metadata or {}
@property
def is_async(self) -> bool:
@@ -515,3 +532,13 @@ class BaseCallbackManager(CallbackManagerMixin):
for tag in tags:
self.tags.remove(tag)
self.inheritable_tags.remove(tag)
def add_metadata(self, metadata: Dict[str, Any], inherit: bool = True) -> None:
self.metadata.update(metadata)
if inherit:
self.inheritable_metadata.update(metadata)
def remove_metadata(self, keys: List[str]) -> None:
for key in keys:
self.metadata.pop(key)
self.inheritable_metadata.pop(key)

View File

@@ -18,6 +18,7 @@ LANGCHAIN_MODEL_NAME = "langchain-model"
def import_comet_ml() -> Any:
"""Import comet_ml and raise an error if it is not installed."""
try:
import comet_ml # noqa: F401
except ImportError:

View File

@@ -0,0 +1,193 @@
"""Callback handler for Context AI"""
import os
from typing import Any, Dict, List
from uuid import UUID
from langchain.callbacks.base import BaseCallbackHandler
from langchain.schema import (
BaseMessage,
LLMResult,
)
def import_context() -> Any:
try:
import getcontext # noqa: F401
from getcontext.generated.models import (
Conversation,
Message,
MessageRole,
Rating,
)
from getcontext.token import Credential # noqa: F401
except ImportError:
raise ImportError(
"To use the context callback manager you need to have the "
"`getcontext` python package installed (version >=0.3.0). "
"Please install it with `pip install --upgrade python-context`"
)
return getcontext, Credential, Conversation, Message, MessageRole, Rating
class ContextCallbackHandler(BaseCallbackHandler):
"""Callback Handler that records transcripts to Context (https://getcontext.ai).
Keyword Args:
token (optional): The token with which to authenticate requests to Context.
Visit https://go.getcontext.ai/settings to generate a token.
If not provided, the value of the `CONTEXT_TOKEN` environment
variable will be used.
Raises:
ImportError: if the `context-python` package is not installed.
Chat Example:
>>> from langchain.llms import ChatOpenAI
>>> from langchain.callbacks import ContextCallbackHandler
>>> context_callback = ContextCallbackHandler(
... token="<CONTEXT_TOKEN_HERE>",
... )
>>> chat = ChatOpenAI(
... temperature=0,
... headers={"user_id": "123"},
... callbacks=[context_callback],
... openai_api_key="API_KEY_HERE",
... )
>>> messages = [
... SystemMessage(content="You translate English to French."),
... HumanMessage(content="I love programming with LangChain."),
... ]
>>> chat(messages)
Chain Example:
>>> from langchain import LLMChain
>>> from langchain.llms import ChatOpenAI
>>> from langchain.callbacks import ContextCallbackHandler
>>> context_callback = ContextCallbackHandler(
... token="<CONTEXT_TOKEN_HERE>",
... )
>>> human_message_prompt = HumanMessagePromptTemplate(
... prompt=PromptTemplate(
... template="What is a good name for a company that makes {product}?",
... input_variables=["product"],
... ),
... )
>>> chat_prompt_template = ChatPromptTemplate.from_messages(
... [human_message_prompt]
... )
>>> callback = ContextCallbackHandler(token)
>>> # Note: the same callback object must be shared between the
... LLM and the chain.
>>> chat = ChatOpenAI(temperature=0.9, callbacks=[callback])
>>> chain = LLMChain(
... llm=chat,
... prompt=chat_prompt_template,
... callbacks=[callback]
... )
>>> chain.run("colorful socks")
"""
def __init__(self, token: str = "", verbose: bool = False, **kwargs: Any) -> None:
(
self.context,
self.credential,
self.conversation_model,
self.message_model,
self.message_role_model,
self.rating_model,
) = import_context()
token = token or os.environ.get("CONTEXT_TOKEN") or ""
self.client = self.context.ContextAPI(credential=self.credential(token))
self.chain_run_id = None
self.llm_model = None
self.messages: List[Any] = []
self.metadata: Dict[str, str] = {}
def on_chat_model_start(
self,
serialized: Dict[str, Any],
messages: List[List[BaseMessage]],
*,
run_id: UUID,
**kwargs: Any,
) -> Any:
"""Run when the chat model is started."""
llm_model = kwargs.get("invocation_params", {}).get("model", None)
if llm_model is not None:
self.metadata["llm_model"] = llm_model
if len(messages) == 0:
return
for message in messages[0]:
role = self.message_role_model.SYSTEM
if message.type == "human":
role = self.message_role_model.USER
elif message.type == "system":
role = self.message_role_model.SYSTEM
elif message.type == "ai":
role = self.message_role_model.ASSISTANT
self.messages.append(
self.message_model(
message=message.content,
role=role,
)
)
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
"""Run when LLM ends."""
if len(response.generations) == 0 or len(response.generations[0]) == 0:
return
if not self.chain_run_id:
generation = response.generations[0][0]
self.messages.append(
self.message_model(
message=generation.text,
role=self.message_role_model.ASSISTANT,
)
)
self._log_conversation()
def on_chain_start(
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
) -> None:
"""Run when chain starts."""
self.chain_run_id = kwargs.get("run_id", None)
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
"""Run when chain ends."""
self.messages.append(
self.message_model(
message=outputs["text"],
role=self.message_role_model.ASSISTANT,
)
)
self._log_conversation()
self.chain_run_id = None
def _log_conversation(self) -> None:
"""Log the conversation to the context API."""
if len(self.messages) == 0:
return
self.client.log.conversation_upsert(
body={
"conversation": self.conversation_model(
messages=self.messages,
metadata=self.metadata,
)
}
)
self.messages = []
self.metadata = {}

View File

@@ -39,7 +39,7 @@ class FileCallbackHandler(BaseCallbackHandler):
self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
) -> Any:
"""Run on agent action."""
print_text(action.log, color=color if color else self.color, file=self.file)
print_text(action.log, color=color or self.color, file=self.file)
def on_tool_end(
self,
@@ -52,24 +52,18 @@ class FileCallbackHandler(BaseCallbackHandler):
"""If not the final action, print out observation."""
if observation_prefix is not None:
print_text(f"\n{observation_prefix}", file=self.file)
print_text(output, color=color if color else self.color, file=self.file)
print_text(output, color=color or self.color, file=self.file)
if llm_prefix is not None:
print_text(f"\n{llm_prefix}", file=self.file)
def on_text(
self,
text: str,
color: Optional[str] = None,
end: str = "",
**kwargs: Any,
self, text: str, color: Optional[str] = None, end: str = "", **kwargs: Any
) -> None:
"""Run when agent ends."""
print_text(text, color=color if color else self.color, end=end, file=self.file)
print_text(text, color=color or self.color, end=end, file=self.file)
def on_agent_finish(
self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any
) -> None:
"""Run on agent end."""
print_text(
finish.log, color=color if self.color else color, end="\n", file=self.file
)
print_text(finish.log, color=color or self.color, end="\n", file=self.file)

View File

@@ -23,6 +23,7 @@ logger = logging.getLogger(__name__)
def import_flytekit() -> Tuple[flytekit, renderer]:
"""Import flytekit and flytekitplugins-deck-standard."""
try:
import flytekit # noqa: F401
from flytekitplugins.deck import renderer # noqa: F401
@@ -39,6 +40,7 @@ def import_flytekit() -> Tuple[flytekit, renderer]:
def analyze_text(
text: str,
nlp: Any = None,
textstat: Any = None,
) -> dict:
"""Analyze text using textstat and spacy.
@@ -51,26 +53,26 @@ def analyze_text(
files serialized to HTML string.
"""
resp: Dict[str, Any] = {}
textstat = import_textstat()
text_complexity_metrics = {
"flesch_reading_ease": textstat.flesch_reading_ease(text),
"flesch_kincaid_grade": textstat.flesch_kincaid_grade(text),
"smog_index": textstat.smog_index(text),
"coleman_liau_index": textstat.coleman_liau_index(text),
"automated_readability_index": textstat.automated_readability_index(text),
"dale_chall_readability_score": textstat.dale_chall_readability_score(text),
"difficult_words": textstat.difficult_words(text),
"linsear_write_formula": textstat.linsear_write_formula(text),
"gunning_fog": textstat.gunning_fog(text),
"fernandez_huerta": textstat.fernandez_huerta(text),
"szigriszt_pazos": textstat.szigriszt_pazos(text),
"gutierrez_polini": textstat.gutierrez_polini(text),
"crawford": textstat.crawford(text),
"gulpease_index": textstat.gulpease_index(text),
"osman": textstat.osman(text),
}
resp.update({"text_complexity_metrics": text_complexity_metrics})
resp.update(text_complexity_metrics)
if textstat is not None:
text_complexity_metrics = {
"flesch_reading_ease": textstat.flesch_reading_ease(text),
"flesch_kincaid_grade": textstat.flesch_kincaid_grade(text),
"smog_index": textstat.smog_index(text),
"coleman_liau_index": textstat.coleman_liau_index(text),
"automated_readability_index": textstat.automated_readability_index(text),
"dale_chall_readability_score": textstat.dale_chall_readability_score(text),
"difficult_words": textstat.difficult_words(text),
"linsear_write_formula": textstat.linsear_write_formula(text),
"gunning_fog": textstat.gunning_fog(text),
"fernandez_huerta": textstat.fernandez_huerta(text),
"szigriszt_pazos": textstat.szigriszt_pazos(text),
"gutierrez_polini": textstat.gutierrez_polini(text),
"crawford": textstat.crawford(text),
"gulpease_index": textstat.gulpease_index(text),
"osman": textstat.osman(text),
}
resp.update({"text_complexity_metrics": text_complexity_metrics})
resp.update(text_complexity_metrics)
if nlp is not None:
spacy = import_spacy()
@@ -78,16 +80,13 @@ def analyze_text(
dep_out = spacy.displacy.render( # type: ignore
doc, style="dep", jupyter=False, page=True
)
ent_out = spacy.displacy.render( # type: ignore
doc, style="ent", jupyter=False, page=True
)
text_visualizations = {
"dependency_tree": dep_out,
"entities": ent_out,
}
resp.update(text_visualizations)
return resp
@@ -98,10 +97,19 @@ class FlyteCallbackHandler(BaseMetadataCallbackHandler, BaseCallbackHandler):
def __init__(self) -> None:
"""Initialize callback handler."""
import_textstat() # Raise error since it is required
flytekit, renderer = import_flytekit()
self.pandas = import_pandas()
self.textstat = None
try:
self.textstat = import_textstat()
except ImportError:
logger.warning(
"Textstat library is not installed. \
It may result in the inability to log \
certain metrics that can be captured with Textstat."
)
spacy = None
try:
spacy = import_spacy()
@@ -123,7 +131,7 @@ class FlyteCallbackHandler(BaseMetadataCallbackHandler, BaseCallbackHandler):
"FlyteCallbackHandler uses spacy's en_core_web_sm model"
" for certain metrics. To download,"
" run the following command in your terminal:"
" `python -m spacy download en_core_web_sm` command."
" `python -m spacy download en_core_web_sm`"
)
self.table_renderer = renderer.TableRenderer
@@ -180,11 +188,10 @@ class FlyteCallbackHandler(BaseMetadataCallbackHandler, BaseCallbackHandler):
for generation in generations:
generation_resp = deepcopy(resp)
generation_resp.update(flatten_dict(generation.dict()))
if self.nlp:
if self.nlp or self.textstat:
generation_resp.update(
analyze_text(
generation.text,
nlp=self.nlp,
generation.text, nlp=self.nlp, textstat=self.textstat
)
)

View File

@@ -6,6 +6,7 @@ from langchain.schema import AgentAction, AgentFinish, LLMResult
def import_infino() -> Any:
"""Import the infino client."""
try:
from infinopy import InfinoClient
except ImportError:

View File

@@ -144,6 +144,7 @@ def tracing_v2_enabled(
project_name: Optional[str] = None,
*,
example_id: Optional[Union[str, UUID]] = None,
tags: Optional[List[str]] = None,
) -> Generator[None, None, None]:
"""Instruct LangChain to log all runs in context to LangSmith.
@@ -152,6 +153,8 @@ def tracing_v2_enabled(
Defaults to "default".
example_id (str or UUID, optional): The ID of the example.
Defaults to None.
tags (List[str], optional): The tags to add to the run.
Defaults to None.
Returns:
None
@@ -170,6 +173,7 @@ def tracing_v2_enabled(
cb = LangChainTracer(
example_id=example_id,
project_name=project_name,
tags=tags,
)
tracing_v2_callback_var.set(cb)
yield
@@ -383,6 +387,8 @@ class BaseRunManager(RunManagerMixin):
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
inheritable_tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
inheritable_metadata: Optional[Dict[str, Any]] = None,
) -> None:
"""Initialize the run manager.
@@ -395,6 +401,8 @@ class BaseRunManager(RunManagerMixin):
Defaults to None.
tags (Optional[List[str]]): The list of tags.
inheritable_tags (Optional[List[str]]): The list of inheritable tags.
metadata (Optional[Dict[str, Any]]): The metadata.
inheritable_metadata (Optional[Dict[str, Any]]): The inheritable metadata.
"""
self.run_id = run_id
self.handlers = handlers
@@ -402,6 +410,8 @@ class BaseRunManager(RunManagerMixin):
self.parent_run_id = parent_run_id
self.tags = tags or []
self.inheritable_tags = inheritable_tags or []
self.metadata = metadata or {}
self.inheritable_metadata = inheritable_metadata or {}
@classmethod
def get_noop_manager(cls: Type[BRM]) -> BRM:
@@ -416,6 +426,8 @@ class BaseRunManager(RunManagerMixin):
inheritable_handlers=[],
tags=[],
inheritable_tags=[],
metadata={},
inheritable_metadata={},
)
@@ -447,6 +459,28 @@ class RunManager(BaseRunManager):
)
class ParentRunManager(RunManager):
"""Sync Parent Run Manager."""
def get_child(self, tag: Optional[str] = None) -> CallbackManager:
"""Get a child callback manager.
Args:
tag (str, optional): The tag for the child callback manager.
Defaults to None.
Returns:
CallbackManager: The child callback manager.
"""
manager = CallbackManager(handlers=[], parent_run_id=self.run_id)
manager.set_handlers(self.inheritable_handlers)
manager.add_tags(self.inheritable_tags)
manager.add_metadata(self.inheritable_metadata)
if tag is not None:
manager.add_tags([tag], False)
return manager
class AsyncRunManager(BaseRunManager):
"""Async Run Manager."""
@@ -475,6 +509,28 @@ class AsyncRunManager(BaseRunManager):
)
class AsyncParentRunManager(AsyncRunManager):
"""Async Parent Run Manager."""
def get_child(self, tag: Optional[str] = None) -> AsyncCallbackManager:
"""Get a child callback manager.
Args:
tag (str, optional): The tag for the child callback manager.
Defaults to None.
Returns:
AsyncCallbackManager: The child callback manager.
"""
manager = AsyncCallbackManager(handlers=[], parent_run_id=self.run_id)
manager.set_handlers(self.inheritable_handlers)
manager.add_tags(self.inheritable_tags)
manager.add_metadata(self.inheritable_metadata)
if tag is not None:
manager.add_tags([tag], False)
return manager
class CallbackManagerForLLMRun(RunManager, LLMManagerMixin):
"""Callback manager for LLM run."""
@@ -601,26 +657,9 @@ class AsyncCallbackManagerForLLMRun(AsyncRunManager, LLMManagerMixin):
)
class CallbackManagerForChainRun(RunManager, ChainManagerMixin):
class CallbackManagerForChainRun(ParentRunManager, ChainManagerMixin):
"""Callback manager for chain run."""
def get_child(self, tag: Optional[str] = None) -> CallbackManager:
"""Get a child callback manager.
Args:
tag (str, optional): The tag for the child callback manager.
Defaults to None.
Returns:
CallbackManager: The child callback manager.
"""
manager = CallbackManager(handlers=[], parent_run_id=self.run_id)
manager.set_handlers(self.inheritable_handlers)
manager.add_tags(self.inheritable_tags)
if tag is not None:
manager.add_tags([tag], False)
return manager
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
"""Run when chain ends running.
@@ -700,26 +739,9 @@ class CallbackManagerForChainRun(RunManager, ChainManagerMixin):
)
class AsyncCallbackManagerForChainRun(AsyncRunManager, ChainManagerMixin):
class AsyncCallbackManagerForChainRun(AsyncParentRunManager, ChainManagerMixin):
"""Async callback manager for chain run."""
def get_child(self, tag: Optional[str] = None) -> AsyncCallbackManager:
"""Get a child callback manager.
Args:
tag (str, optional): The tag for the child callback manager.
Defaults to None.
Returns:
AsyncCallbackManager: The child callback manager.
"""
manager = AsyncCallbackManager(handlers=[], parent_run_id=self.run_id)
manager.set_handlers(self.inheritable_handlers)
manager.add_tags(self.inheritable_tags)
if tag is not None:
manager.add_tags([tag], False)
return manager
async def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
"""Run when chain ends running.
@@ -799,26 +821,9 @@ class AsyncCallbackManagerForChainRun(AsyncRunManager, ChainManagerMixin):
)
class CallbackManagerForToolRun(RunManager, ToolManagerMixin):
class CallbackManagerForToolRun(ParentRunManager, ToolManagerMixin):
"""Callback manager for tool run."""
def get_child(self, tag: Optional[str] = None) -> CallbackManager:
"""Get a child callback manager.
Args:
tag (str, optional): The tag for the child callback manager.
Defaults to None.
Returns:
CallbackManager: The child callback manager.
"""
manager = CallbackManager(handlers=[], parent_run_id=self.run_id)
manager.set_handlers(self.inheritable_handlers)
manager.add_tags(self.inheritable_tags)
if tag is not None:
manager.add_tags([tag], False)
return manager
def on_tool_end(
self,
output: str,
@@ -862,26 +867,9 @@ class CallbackManagerForToolRun(RunManager, ToolManagerMixin):
)
class AsyncCallbackManagerForToolRun(AsyncRunManager, ToolManagerMixin):
class AsyncCallbackManagerForToolRun(AsyncParentRunManager, ToolManagerMixin):
"""Async callback manager for tool run."""
def get_child(self, tag: Optional[str] = None) -> AsyncCallbackManager:
"""Get a child callback manager.
Args:
tag (str, optional): The tag to add to the child
callback manager. Defaults to None.
Returns:
AsyncCallbackManager: The child callback manager.
"""
manager = AsyncCallbackManager(handlers=[], parent_run_id=self.run_id)
manager.set_handlers(self.inheritable_handlers)
manager.add_tags(self.inheritable_tags)
if tag is not None:
manager.add_tags([tag], False)
return manager
async def on_tool_end(self, output: str, **kwargs: Any) -> None:
"""Run when tool ends running.
@@ -921,18 +909,9 @@ class AsyncCallbackManagerForToolRun(AsyncRunManager, ToolManagerMixin):
)
class CallbackManagerForRetrieverRun(RunManager, RetrieverManagerMixin):
class CallbackManagerForRetrieverRun(ParentRunManager, RetrieverManagerMixin):
"""Callback manager for retriever run."""
def get_child(self, tag: Optional[str] = None) -> CallbackManager:
"""Get a child callback manager."""
manager = CallbackManager([], parent_run_id=self.run_id)
manager.set_handlers(self.inheritable_handlers)
manager.add_tags(self.inheritable_tags)
if tag is not None:
manager.add_tags([tag], False)
return manager
def on_retriever_end(
self,
documents: Sequence[Document],
@@ -969,20 +948,11 @@ class CallbackManagerForRetrieverRun(RunManager, RetrieverManagerMixin):
class AsyncCallbackManagerForRetrieverRun(
AsyncRunManager,
AsyncParentRunManager,
RetrieverManagerMixin,
):
"""Async callback manager for retriever run."""
def get_child(self, tag: Optional[str] = None) -> AsyncCallbackManager:
"""Get a child callback manager."""
manager = AsyncCallbackManager([], parent_run_id=self.run_id)
manager.set_handlers(self.inheritable_handlers)
manager.add_tags(self.inheritable_tags)
if tag is not None:
manager.add_tags([tag], False)
return manager
async def on_retriever_end(
self, documents: Sequence[Document], **kwargs: Any
) -> None:
@@ -1048,6 +1018,7 @@ class CallbackManager(BaseCallbackManager):
run_id=run_id_,
parent_run_id=self.parent_run_id,
tags=self.tags,
metadata=self.metadata,
**kwargs,
)
@@ -1059,6 +1030,8 @@ class CallbackManager(BaseCallbackManager):
parent_run_id=self.parent_run_id,
tags=self.tags,
inheritable_tags=self.inheritable_tags,
metadata=self.metadata,
inheritable_metadata=self.inheritable_metadata,
)
)
@@ -1094,6 +1067,7 @@ class CallbackManager(BaseCallbackManager):
run_id=run_id_,
parent_run_id=self.parent_run_id,
tags=self.tags,
metadata=self.metadata,
**kwargs,
)
@@ -1105,6 +1079,8 @@ class CallbackManager(BaseCallbackManager):
parent_run_id=self.parent_run_id,
tags=self.tags,
inheritable_tags=self.inheritable_tags,
metadata=self.metadata,
inheritable_metadata=self.inheritable_metadata,
)
)
@@ -1139,6 +1115,7 @@ class CallbackManager(BaseCallbackManager):
run_id=run_id,
parent_run_id=self.parent_run_id,
tags=self.tags,
metadata=self.metadata,
**kwargs,
)
@@ -1149,6 +1126,8 @@ class CallbackManager(BaseCallbackManager):
parent_run_id=self.parent_run_id,
tags=self.tags,
inheritable_tags=self.inheritable_tags,
metadata=self.metadata,
inheritable_metadata=self.inheritable_metadata,
)
def on_tool_start(
@@ -1182,6 +1161,7 @@ class CallbackManager(BaseCallbackManager):
run_id=run_id,
parent_run_id=self.parent_run_id,
tags=self.tags,
metadata=self.metadata,
**kwargs,
)
@@ -1192,10 +1172,13 @@ class CallbackManager(BaseCallbackManager):
parent_run_id=self.parent_run_id,
tags=self.tags,
inheritable_tags=self.inheritable_tags,
metadata=self.metadata,
inheritable_metadata=self.inheritable_metadata,
)
def on_retriever_start(
self,
serialized: Dict[str, Any],
query: str,
run_id: Optional[UUID] = None,
parent_run_id: Optional[UUID] = None,
@@ -1209,10 +1192,12 @@ class CallbackManager(BaseCallbackManager):
self.handlers,
"on_retriever_start",
"ignore_retriever",
serialized,
query,
run_id=run_id,
parent_run_id=self.parent_run_id,
tags=self.tags,
metadata=self.metadata,
**kwargs,
)
@@ -1223,6 +1208,8 @@ class CallbackManager(BaseCallbackManager):
parent_run_id=self.parent_run_id,
tags=self.tags,
inheritable_tags=self.inheritable_tags,
metadata=self.metadata,
inheritable_metadata=self.inheritable_metadata,
)
@classmethod
@@ -1233,6 +1220,8 @@ class CallbackManager(BaseCallbackManager):
verbose: bool = False,
inheritable_tags: Optional[List[str]] = None,
local_tags: Optional[List[str]] = None,
inheritable_metadata: Optional[Dict[str, Any]] = None,
local_metadata: Optional[Dict[str, Any]] = None,
) -> CallbackManager:
"""Configure the callback manager.
@@ -1246,6 +1235,10 @@ class CallbackManager(BaseCallbackManager):
Defaults to None.
local_tags (Optional[List[str]], optional): The local tags.
Defaults to None.
inheritable_metadata (Optional[Dict[str, Any]], optional): The inheritable
metadata. Defaults to None.
local_metadata (Optional[Dict[str, Any]], optional): The local metadata.
Defaults to None.
Returns:
CallbackManager: The configured callback manager.
@@ -1257,6 +1250,8 @@ class CallbackManager(BaseCallbackManager):
verbose,
inheritable_tags,
local_tags,
inheritable_metadata,
local_metadata,
)
@@ -1303,6 +1298,7 @@ class AsyncCallbackManager(BaseCallbackManager):
run_id=run_id_,
parent_run_id=self.parent_run_id,
tags=self.tags,
metadata=self.metadata,
**kwargs,
)
)
@@ -1315,6 +1311,8 @@ class AsyncCallbackManager(BaseCallbackManager):
parent_run_id=self.parent_run_id,
tags=self.tags,
inheritable_tags=self.inheritable_tags,
metadata=self.metadata,
inheritable_metadata=self.inheritable_metadata,
)
)
@@ -1356,6 +1354,7 @@ class AsyncCallbackManager(BaseCallbackManager):
run_id=run_id_,
parent_run_id=self.parent_run_id,
tags=self.tags,
metadata=self.metadata,
**kwargs,
)
)
@@ -1368,6 +1367,8 @@ class AsyncCallbackManager(BaseCallbackManager):
parent_run_id=self.parent_run_id,
tags=self.tags,
inheritable_tags=self.inheritable_tags,
metadata=self.metadata,
inheritable_metadata=self.inheritable_metadata,
)
)
@@ -1404,6 +1405,7 @@ class AsyncCallbackManager(BaseCallbackManager):
run_id=run_id,
parent_run_id=self.parent_run_id,
tags=self.tags,
metadata=self.metadata,
**kwargs,
)
@@ -1414,6 +1416,8 @@ class AsyncCallbackManager(BaseCallbackManager):
parent_run_id=self.parent_run_id,
tags=self.tags,
inheritable_tags=self.inheritable_tags,
metadata=self.metadata,
inheritable_metadata=self.inheritable_metadata,
)
async def on_tool_start(
@@ -1449,6 +1453,7 @@ class AsyncCallbackManager(BaseCallbackManager):
run_id=run_id,
parent_run_id=self.parent_run_id,
tags=self.tags,
metadata=self.metadata,
**kwargs,
)
@@ -1459,10 +1464,13 @@ class AsyncCallbackManager(BaseCallbackManager):
parent_run_id=self.parent_run_id,
tags=self.tags,
inheritable_tags=self.inheritable_tags,
metadata=self.metadata,
inheritable_metadata=self.inheritable_metadata,
)
async def on_retriever_start(
self,
serialized: Dict[str, Any],
query: str,
run_id: Optional[UUID] = None,
parent_run_id: Optional[UUID] = None,
@@ -1476,10 +1484,12 @@ class AsyncCallbackManager(BaseCallbackManager):
self.handlers,
"on_retriever_start",
"ignore_retriever",
serialized,
query,
run_id=run_id,
parent_run_id=self.parent_run_id,
tags=self.tags,
metadata=self.metadata,
**kwargs,
)
@@ -1490,6 +1500,8 @@ class AsyncCallbackManager(BaseCallbackManager):
parent_run_id=self.parent_run_id,
tags=self.tags,
inheritable_tags=self.inheritable_tags,
metadata=self.metadata,
inheritable_metadata=self.inheritable_metadata,
)
@classmethod
@@ -1500,6 +1512,8 @@ class AsyncCallbackManager(BaseCallbackManager):
verbose: bool = False,
inheritable_tags: Optional[List[str]] = None,
local_tags: Optional[List[str]] = None,
inheritable_metadata: Optional[Dict[str, Any]] = None,
local_metadata: Optional[Dict[str, Any]] = None,
) -> AsyncCallbackManager:
"""Configure the async callback manager.
@@ -1513,6 +1527,10 @@ class AsyncCallbackManager(BaseCallbackManager):
Defaults to None.
local_tags (Optional[List[str]], optional): The local tags.
Defaults to None.
inheritable_metadata (Optional[Dict[str, Any]], optional): The inheritable
metadata. Defaults to None.
local_metadata (Optional[Dict[str, Any]], optional): The local metadata.
Defaults to None.
Returns:
AsyncCallbackManager: The configured async callback manager.
@@ -1524,6 +1542,8 @@ class AsyncCallbackManager(BaseCallbackManager):
verbose,
inheritable_tags,
local_tags,
inheritable_metadata,
local_metadata,
)
@@ -1554,6 +1574,8 @@ def _configure(
verbose: bool = False,
inheritable_tags: Optional[List[str]] = None,
local_tags: Optional[List[str]] = None,
inheritable_metadata: Optional[Dict[str, Any]] = None,
local_metadata: Optional[Dict[str, Any]] = None,
) -> T:
"""Configure the callback manager.
@@ -1567,6 +1589,10 @@ def _configure(
inheritable_tags (Optional[List[str]], optional): The inheritable tags.
Defaults to None.
local_tags (Optional[List[str]], optional): The local tags. Defaults to None.
inheritable_metadata (Optional[Dict[str, Any]], optional): The inheritable
metadata. Defaults to None.
local_metadata (Optional[Dict[str, Any]], optional): The local metadata.
Defaults to None.
Returns:
T: The configured callback manager.
@@ -1586,6 +1612,8 @@ def _configure(
parent_run_id=inheritable_callbacks.parent_run_id,
tags=inheritable_callbacks.tags,
inheritable_tags=inheritable_callbacks.inheritable_tags,
metadata=inheritable_callbacks.metadata,
inheritable_metadata=inheritable_callbacks.inheritable_metadata,
)
local_handlers_ = (
local_callbacks
@@ -1597,6 +1625,9 @@ def _configure(
if inheritable_tags or local_tags:
callback_manager.add_tags(inheritable_tags or [])
callback_manager.add_tags(local_tags or [], False)
if inheritable_metadata or local_metadata:
callback_manager.add_metadata(inheritable_metadata or {})
callback_manager.add_metadata(local_metadata or {}, False)
tracer = tracing_callback_var.get()
wandb_tracer = wandb_tracing_callback_var.get()

View File

@@ -551,8 +551,18 @@ class MlflowCallbackHandler(BaseMetadataCallbackHandler, BaseCallbackHandler):
on_llm_start_records_df = pd.DataFrame(self.records["on_llm_start_records"])
on_llm_end_records_df = pd.DataFrame(self.records["on_llm_end_records"])
llm_input_columns = ["step", "prompt"]
if "name" in on_llm_start_records_df.columns:
llm_input_columns.append("name")
elif "id" in on_llm_start_records_df.columns:
# id is llm class's full import path. For example:
# ["langchain", "llms", "openai", "AzureOpenAI"]
on_llm_start_records_df["name"] = on_llm_start_records_df["id"].apply(
lambda id_: id_[-1]
)
llm_input_columns.append("name")
llm_input_prompts_df = (
on_llm_start_records_df[["step", "prompt", "name"]]
on_llm_start_records_df[llm_input_columns]
.dropna(axis=1)
.rename({"step": "prompt_step"}, axis=1)
)

View File

@@ -63,7 +63,7 @@ class StdOutCallbackHandler(BaseCallbackHandler):
self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
) -> Any:
"""Run on agent action."""
print_text(action.log, color=color if color else self.color)
print_text(action.log, color=color or self.color)
def on_tool_end(
self,
@@ -76,7 +76,7 @@ class StdOutCallbackHandler(BaseCallbackHandler):
"""If not the final action, print out observation."""
if observation_prefix is not None:
print_text(f"\n{observation_prefix}")
print_text(output, color=color if color else self.color)
print_text(output, color=color or self.color)
if llm_prefix is not None:
print_text(f"\n{llm_prefix}")
@@ -94,10 +94,10 @@ class StdOutCallbackHandler(BaseCallbackHandler):
**kwargs: Any,
) -> None:
"""Run when agent ends."""
print_text(text, color=color if color else self.color, end=end)
print_text(text, color=color or self.color, end=end)
def on_agent_finish(
self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any
) -> None:
"""Run on agent end."""
print_text(finish.log, color=color if self.color else color, end="\n")
print_text(finish.log, color=color or self.color, end="\n")

View File

@@ -31,7 +31,8 @@ class AsyncIteratorCallbackHandler(AsyncCallbackHandler):
self.done.clear()
async def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
self.queue.put_nowait(token)
if token is not None and token != "":
self.queue.put_nowait(token)
async def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
self.done.set()

View File

@@ -37,7 +37,7 @@ class FinalStreamingStdOutCallbackHandler(StreamingStdOutCallbackHandler):
"""Instantiate FinalStreamingStdOutCallbackHandler.
Args:
answer_prefix_tokens: Token sequence that prefixes the anwer.
answer_prefix_tokens: Token sequence that prefixes the answer.
Default is ["Final", "Answer", ":"]
strip_tokens: Ignore white spaces and new lines when comparing
answer_prefix_tokens to last tokens? (to determine if answer has been

View File

@@ -9,11 +9,15 @@ if TYPE_CHECKING:
class ChildType(Enum):
"""The enumerator of the child type."""
MARKDOWN = "MARKDOWN"
EXCEPTION = "EXCEPTION"
class ChildRecord(NamedTuple):
"""The child record as a NamedTuple."""
type: ChildType
kwargs: Dict[str, Any]
dg: DeltaGenerator

View File

@@ -27,6 +27,8 @@ EXCEPTION_EMOJI = "⚠️"
class LLMThoughtState(Enum):
"""Enumerator of the LLMThought state."""
# The LLM is thinking about what to do next. We don't know which tool we'll run.
THINKING = "THINKING"
# The LLM has decided to run a tool. We don't have results from the tool yet.
@@ -36,6 +38,8 @@ class LLMThoughtState(Enum):
class ToolRecord(NamedTuple):
"""The tool record as a NamedTuple."""
name: str
input_str: str
@@ -100,6 +104,8 @@ class LLMThoughtLabeler:
class LLMThought:
"""A thought in the LLM's thought stream."""
def __init__(
self,
parent_container: DeltaGenerator,
@@ -107,6 +113,14 @@ class LLMThought:
expanded: bool,
collapse_on_complete: bool,
):
"""Initialize the LLMThought.
Args:
parent_container: The container we're writing into.
labeler: The labeler to use for this thought.
expanded: Whether the thought should be expanded by default.
collapse_on_complete: Whether the thought should be collapsed.
"""
self._container = MutableExpander(
parent_container=parent_container,
label=labeler.get_initial_label(),
@@ -213,6 +227,8 @@ class LLMThought:
class StreamlitCallbackHandler(BaseCallbackHandler):
"""A callback handler that writes to a Streamlit app."""
def __init__(
self,
parent_container: DeltaGenerator,

View File

@@ -89,12 +89,15 @@ class BaseTracer(BaseCallbackHandler, ABC):
run_id: UUID,
tags: Optional[List[str]] = None,
parent_run_id: Optional[UUID] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> None:
"""Start a trace for an LLM run."""
parent_run_id_ = str(parent_run_id) if parent_run_id else None
execution_order = self._get_execution_order(parent_run_id_)
start_time = datetime.utcnow()
if metadata:
kwargs.update({"metadata": metadata})
llm_run = Run(
id=run_id,
parent_run_id=parent_run_id,
@@ -186,12 +189,15 @@ class BaseTracer(BaseCallbackHandler, ABC):
run_id: UUID,
tags: Optional[List[str]] = None,
parent_run_id: Optional[UUID] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> None:
"""Start a trace for a chain run."""
parent_run_id_ = str(parent_run_id) if parent_run_id else None
execution_order = self._get_execution_order(parent_run_id_)
start_time = datetime.utcnow()
if metadata:
kwargs.update({"metadata": metadata})
chain_run = Run(
id=run_id,
parent_run_id=parent_run_id,
@@ -253,12 +259,15 @@ class BaseTracer(BaseCallbackHandler, ABC):
run_id: UUID,
tags: Optional[List[str]] = None,
parent_run_id: Optional[UUID] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> None:
"""Start a trace for a tool run."""
parent_run_id_ = str(parent_run_id) if parent_run_id else None
execution_order = self._get_execution_order(parent_run_id_)
start_time = datetime.utcnow()
if metadata:
kwargs.update({"metadata": metadata})
tool_run = Run(
id=run_id,
parent_run_id=parent_run_id,
@@ -312,20 +321,25 @@ class BaseTracer(BaseCallbackHandler, ABC):
def on_retriever_start(
self,
serialized: Dict[str, Any],
query: str,
*,
run_id: UUID,
parent_run_id: Optional[UUID] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> None:
"""Run when Retriever starts running."""
parent_run_id_ = str(parent_run_id) if parent_run_id else None
execution_order = self._get_execution_order(parent_run_id_)
start_time = datetime.utcnow()
if metadata:
kwargs.update({"metadata": metadata})
retrieval_run = Run(
id=run_id,
name="Retriever",
parent_run_id=parent_run_id,
serialized=serialized,
inputs={"query": query},
extra=kwargs,
events=[{"name": "start", "time": start_time}],

View File

@@ -6,6 +6,7 @@ from uuid import UUID
from langchainplus_sdk import LangChainPlusClient, RunEvaluator
from langchain.callbacks.manager import tracing_v2_enabled
from langchain.callbacks.tracers.base import BaseTracer
from langchain.callbacks.tracers.schemas import Run
@@ -27,6 +28,8 @@ class EvaluatorCallbackHandler(BaseTracer):
If not specified, a new instance will be created.
example_id : Union[UUID, str], optional
The example ID to be associated with the runs.
project_name : str, optional
The LangSmith project name to be organize eval chain runs under.
Attributes
----------
@@ -40,6 +43,11 @@ class EvaluatorCallbackHandler(BaseTracer):
The thread pool executor used for running the evaluators.
futures : Set[Future]
The set of futures representing the running evaluators.
skip_unfinished : bool
Whether to skip runs that are not finished or raised
an error.
project_name : Optional[str]
The LangSmith project name to be organize eval chain runs under.
"""
name = "evaluator_callback_handler"
@@ -50,6 +58,8 @@ class EvaluatorCallbackHandler(BaseTracer):
max_workers: Optional[int] = None,
client: Optional[LangChainPlusClient] = None,
example_id: Optional[Union[UUID, str]] = None,
skip_unfinished: bool = True,
project_name: Optional[str] = None,
**kwargs: Any,
) -> None:
super().__init__(**kwargs)
@@ -62,10 +72,25 @@ class EvaluatorCallbackHandler(BaseTracer):
max_workers=max(max_workers or len(evaluators), 1)
)
self.futures: Set[Future] = set()
self.skip_unfinished = skip_unfinished
self.project_name = project_name
def _evaluate_run(self, run: Run, evaluator: RunEvaluator) -> None:
def _evaluate_in_project(self, run: Run, evaluator: RunEvaluator) -> None:
"""Evaluate the run in the project.
Parameters
----------
run : Run
The run to be evaluated.
evaluator : RunEvaluator
The evaluator to use for evaluating the run.
"""
try:
self.client.evaluate_run(run, evaluator)
if self.project_name is None:
self.client.evaluate_run(run, evaluator)
with tracing_v2_enabled(project_name=self.project_name, tags=["eval"]):
self.client.evaluate_run(run, evaluator)
except Exception as e:
logger.error(
f"Error evaluating run {run.id} with "
@@ -83,10 +108,15 @@ class EvaluatorCallbackHandler(BaseTracer):
The run to be evaluated.
"""
if self.skip_unfinished and not run.outputs:
logger.debug(f"Skipping unfinished run {run.id}")
return
run_ = run.copy()
run_.reference_example_id = self.example_id
for evaluator in self.evaluators:
self.futures.add(self.executor.submit(self._evaluate_run, run_, evaluator))
self.futures.add(
self.executor.submit(self._evaluate_in_project, run_, evaluator)
)
def wait_for_futures(self) -> None:
"""Wait for all futures to complete."""

View File

@@ -31,6 +31,7 @@ def log_error_once(method: str, exception: Exception) -> None:
def wait_for_all_tracers() -> None:
"""Wait for all tracers to finish."""
global _TRACERS
for tracer in _TRACERS:
tracer.wait_for_futures()
@@ -44,6 +45,7 @@ class LangChainTracer(BaseTracer):
example_id: Optional[Union[UUID, str]] = None,
project_name: Optional[str] = None,
client: Optional[LangChainPlusClient] = None,
tags: Optional[List[str]] = None,
**kwargs: Any,
) -> None:
"""Initialize the LangChain tracer."""
@@ -59,6 +61,7 @@ class LangChainTracer(BaseTracer):
self.executor = ThreadPoolExecutor(max_workers=1)
self.client = client or LangChainPlusClient()
self._futures: Set[Future] = set()
self.tags = tags or []
global _TRACERS
_TRACERS.append(self)
@@ -70,12 +73,15 @@ class LangChainTracer(BaseTracer):
run_id: UUID,
tags: Optional[List[str]] = None,
parent_run_id: Optional[UUID] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> None:
"""Start a trace for an LLM run."""
parent_run_id_ = str(parent_run_id) if parent_run_id else None
execution_order = self._get_execution_order(parent_run_id_)
start_time = datetime.utcnow()
if metadata:
kwargs.update({"metadata": metadata})
chat_model_run = Run(
id=run_id,
parent_run_id=parent_run_id,
@@ -95,11 +101,16 @@ class LangChainTracer(BaseTracer):
def _persist_run(self, run: Run) -> None:
"""The Langchain Tracer uses Post/Patch rather than persist."""
def _get_tags(self, run: Run) -> List[str]:
"""Get combined tags for a run."""
tags = set(run.tags or [])
tags.update(self.tags or [])
return list(tags)
def _persist_run_single(self, run: Run) -> None:
"""Persist a run."""
if run.parent_run_id is None:
run.reference_example_id = self.example_id
run_dict = run.dict(exclude={"child_runs"})
run_dict["tags"] = self._get_tags(run)
extra = run_dict.get("extra", {})
extra["runtime"] = get_runtime_environment()
run_dict["extra"] = extra
@@ -113,7 +124,9 @@ class LangChainTracer(BaseTracer):
def _update_run_single(self, run: Run) -> None:
"""Update a run."""
try:
self.client.update_run(run.id, **run.dict())
run_dict = run.dict()
run_dict["tags"] = self._get_tags(run)
self.client.update_run(run.id, **run_dict)
except Exception as e:
# Errors are swallowed by the thread executor so we need to log them here
log_error_once("patch", e)
@@ -121,12 +134,16 @@ class LangChainTracer(BaseTracer):
def _on_llm_start(self, run: Run) -> None:
"""Persist an LLM run."""
if run.parent_run_id is None:
run.reference_example_id = self.example_id
self._futures.add(
self.executor.submit(self._persist_run_single, run.copy(deep=True))
)
def _on_chat_model_start(self, run: Run) -> None:
"""Persist an LLM run."""
if run.parent_run_id is None:
run.reference_example_id = self.example_id
self._futures.add(
self.executor.submit(self._persist_run_single, run.copy(deep=True))
)
@@ -145,6 +162,8 @@ class LangChainTracer(BaseTracer):
def _on_chain_start(self, run: Run) -> None:
"""Process the Chain Run upon start."""
if run.parent_run_id is None:
run.reference_example_id = self.example_id
self._futures.add(
self.executor.submit(self._persist_run_single, run.copy(deep=True))
)
@@ -163,6 +182,8 @@ class LangChainTracer(BaseTracer):
def _on_tool_start(self, run: Run) -> None:
"""Process the Tool Run upon start."""
if run.parent_run_id is None:
run.reference_example_id = self.example_id
self._futures.add(
self.executor.submit(self._persist_run_single, run.copy(deep=True))
)
@@ -181,6 +202,8 @@ class LangChainTracer(BaseTracer):
def _on_retriever_start(self, run: Run) -> None:
"""Process the Retriever Run upon start."""
if run.parent_run_id is None:
run.reference_example_id = self.example_id
self._futures.add(
self.executor.submit(self._persist_run_single, run.copy(deep=True))
)

Some files were not shown because too many files have changed in this diff Show More