mirror of
https://github.com/hwchase17/langchain.git
synced 2026-04-20 05:04:50 +00:00
Compare commits
6 Commits
pg/python-
...
wfh/json_s
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f780d90ed2 | ||
|
|
fde19c8667 | ||
|
|
9cea796671 | ||
|
|
91941d1f19 | ||
|
|
4d66756d93 | ||
|
|
a30f98f534 |
2
.github/workflows/_lint.yml
vendored
2
.github/workflows/_lint.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
# so linting on fewer versions makes CI faster.
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.12"
|
||||
- "3.11"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
|
||||
@@ -24,7 +24,6 @@ jobs:
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
- "3.12"
|
||||
name: Pydantic v1/v2 compatibility - Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
1
.github/workflows/_test.yml
vendored
1
.github/workflows/_test.yml
vendored
@@ -24,7 +24,6 @@ jobs:
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
- "3.12"
|
||||
name: Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
1
.github/workflows/langchain_ci.yml
vendored
1
.github/workflows/langchain_ci.yml
vendored
@@ -63,7 +63,6 @@ jobs:
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
- "3.12"
|
||||
name: Python ${{ matrix.python-version }} extended tests
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
@@ -60,7 +60,6 @@ jobs:
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
- "3.12"
|
||||
name: test with unpublished langchain - Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
@@ -96,7 +95,6 @@ jobs:
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
- "3.12"
|
||||
name: Python ${{ matrix.python-version }} extended tests
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
1
.github/workflows/scheduled_test.yml
vendored
1
.github/workflows/scheduled_test.yml
vendored
@@ -22,7 +22,6 @@ jobs:
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
- "3.12"
|
||||
name: Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
@@ -33,27 +33,26 @@ module.exports = {
|
||||
slug: "get_started",
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Modules",
|
||||
collapsed: false,
|
||||
collapsible: false,
|
||||
items: [{ type: "autogenerated", dirName: "modules" } ],
|
||||
link: {
|
||||
type: 'doc',
|
||||
id: "modules/index"
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "LangChain Expression Language",
|
||||
collapsed: true,
|
||||
collapsed: false,
|
||||
items: [{ type: "autogenerated", dirName: "expression_language" } ],
|
||||
link: {
|
||||
type: 'doc',
|
||||
id: "expression_language/index"
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Modules",
|
||||
collapsed: false,
|
||||
items: [{ type: "autogenerated", dirName: "modules" } ],
|
||||
link: {
|
||||
type: 'doc',
|
||||
id: "modules/index"
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Guides",
|
||||
|
||||
883
docs/extras/integrations/vectorstores/vespa.ipynb
Normal file
883
docs/extras/integrations/vectorstores/vespa.ipynb
Normal file
@@ -0,0 +1,883 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ce0f17b9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Vespa\n",
|
||||
"\n",
|
||||
">[Vespa](https://vespa.ai/) is a fully featured search engine and vector database. It supports vector search (ANN), lexical search, and search in structured data, all in the same query.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use `Vespa.ai` as a LangChain vector store.\n",
|
||||
"\n",
|
||||
"In order to create the vector store, we use\n",
|
||||
"[pyvespa](https://pyvespa.readthedocs.io/en/latest/index.html) to create a\n",
|
||||
"connection a `Vespa` service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7e6a11ab-38bd-4920-ba11-60cb2f075754",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install pyvespa"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Using the `pyvespa` package, you can either connect to a\n",
|
||||
"[Vespa Cloud instance](https://pyvespa.readthedocs.io/en/latest/deploy-vespa-cloud.html)\n",
|
||||
"or a local\n",
|
||||
"[Docker instance](https://pyvespa.readthedocs.io/en/latest/deploy-docker.html).\n",
|
||||
"Here, we will create a new Vespa application and deploy that using Docker.\n",
|
||||
"\n",
|
||||
"#### Creating a Vespa application\n",
|
||||
"\n",
|
||||
"First, we need to create an application package:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from vespa.package import ApplicationPackage, Field, RankProfile\n",
|
||||
"\n",
|
||||
"app_package = ApplicationPackage(name=\"testapp\")\n",
|
||||
"app_package.schema.add_fields(\n",
|
||||
" Field(name=\"text\", type=\"string\", indexing=[\"index\", \"summary\"], index=\"enable-bm25\"),\n",
|
||||
" Field(name=\"embedding\", type=\"tensor<float>(x[384])\",\n",
|
||||
" indexing=[\"attribute\", \"summary\"],\n",
|
||||
" attribute=[f\"distance-metric: angular\"]),\n",
|
||||
")\n",
|
||||
"app_package.schema.add_rank_profile(\n",
|
||||
" RankProfile(name=\"default\",\n",
|
||||
" first_phase=\"closeness(field, embedding)\",\n",
|
||||
" inputs=[(\"query(query_embedding)\", \"tensor<float>(x[384])\")]\n",
|
||||
" )\n",
|
||||
")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"This sets up a Vespa application with a schema for each document that contains\n",
|
||||
"two fields: `text` for holding the document text and `embedding` for holding\n",
|
||||
"the embedding vector. The `text` field is set up to use a BM25 index for\n",
|
||||
"efficient text retrieval, and we'll see how to use this and hybrid search a\n",
|
||||
"bit later.\n",
|
||||
"\n",
|
||||
"The `embedding` field is set up with a vector of length 384 to hold the\n",
|
||||
"embedding representation of the text. See\n",
|
||||
"[Vespa's Tensor Guide](https://docs.vespa.ai/en/tensor-user-guide.html)\n",
|
||||
"for more on tensors in Vespa.\n",
|
||||
"\n",
|
||||
"Lastly, we add a [rank profile](https://docs.vespa.ai/en/ranking.html) to\n",
|
||||
"instruct Vespa how to order documents. Here we set this up with a\n",
|
||||
"[nearest neighbor search](https://docs.vespa.ai/en/nearest-neighbor-search.html).\n",
|
||||
"\n",
|
||||
"Now we can deploy this application locally:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "c10dd962",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from vespa.deployment import VespaDocker\n",
|
||||
"\n",
|
||||
"vespa_docker = VespaDocker()\n",
|
||||
"vespa_app = vespa_docker.deploy(application_package=app_package)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3df4ce53",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This deploys and creates a connection to a `Vespa` service. In case you\n",
|
||||
"already have a Vespa application running, for instance in the cloud,\n",
|
||||
"please refer to the PyVespa application for how to connect.\n",
|
||||
"\n",
|
||||
"#### Creating a Vespa vector store\n",
|
||||
"\n",
|
||||
"Now, let's load some documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings\n",
|
||||
"\n",
|
||||
"embedding_function = SentenceTransformerEmbeddings(model_name=\"all-MiniLM-L6-v2\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Here, we also set up local sentence embedder to transform the text to embedding\n",
|
||||
"vectors. One could also use OpenAI embeddings, but the vector length needs to\n",
|
||||
"be updated to `1536` to reflect the larger size of that embedding.\n",
|
||||
"\n",
|
||||
"To feed these to Vespa, we need to configure how the vector store should map to\n",
|
||||
"fields in the Vespa application. Then we create the vector store directly from\n",
|
||||
"this set of documents:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vespa_config = dict(\n",
|
||||
" page_content_field=\"text\",\n",
|
||||
" embedding_field=\"embedding\",\n",
|
||||
" input_field=\"query_embedding\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"from langchain.vectorstores import VespaStore\n",
|
||||
"\n",
|
||||
"db = VespaStore.from_documents(docs, embedding_function, app=vespa_app, **vespa_config)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"This creates a Vespa vector store and feeds that set of documents to Vespa.\n",
|
||||
"The vector store takes care of calling the embedding function for each document\n",
|
||||
"and inserts them into the database.\n",
|
||||
"\n",
|
||||
"We can now query the vector store:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7ccca1f4",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"results = db.similarity_search(query)\n",
|
||||
"\n",
|
||||
"print(results[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1e7e34e1",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"This will use the embedding function given above to create a representation\n",
|
||||
"for the query and use that to search Vespa. Note that this will use the\n",
|
||||
"`default` ranking function, which we set up in the application package\n",
|
||||
"above. You can use the `ranking` argument to `similarity_search` to\n",
|
||||
"specify which ranking function to use.\n",
|
||||
"\n",
|
||||
"Please refer to the [pyvespa documentation](https://pyvespa.readthedocs.io/en/latest/getting-started-pyvespa.html#Query)\n",
|
||||
"for more information.\n",
|
||||
"\n",
|
||||
"This covers the basic usage of the Vespa store in LangChain.\n",
|
||||
"Now you can return the results and continue using these in LangChain.\n",
|
||||
"\n",
|
||||
"#### Updating documents\n",
|
||||
"\n",
|
||||
"An alternative to calling `from_documents`, you can create the vector\n",
|
||||
"store directly and call `add_texts` from that. This can also be used to update\n",
|
||||
"documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"results = db.similarity_search(query)\n",
|
||||
"result = results[0]\n",
|
||||
"\n",
|
||||
"result.page_content = \"UPDATED: \" + result.page_content\n",
|
||||
"db.add_texts([result.page_content], [result.metadata], result.metadata[\"id\"])\n",
|
||||
"\n",
|
||||
"results = db.similarity_search(query)\n",
|
||||
"print(results[0].page_content)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"However, the `pyvespa` library contains methods to manipulate\n",
|
||||
"content on Vespa which you can use directly.\n",
|
||||
"\n",
|
||||
"#### Deleting documents\n",
|
||||
"\n",
|
||||
"You can delete documents using the `delete` function:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = db.similarity_search(query)\n",
|
||||
"# docs[0].metadata[\"id\"] == \"id:testapp:testapp::32\"\n",
|
||||
"\n",
|
||||
"db.delete([\"32\"])\n",
|
||||
"result = db.similarity_search(query)\n",
|
||||
"# docs[0].metadata[\"id\"] != \"id:testapp:testapp::32\""
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Again, the `pyvespa` connection contains methods to delete documents as well.\n",
|
||||
"\n",
|
||||
"### Returning with scores\n",
|
||||
"\n",
|
||||
"The `similarity_search` method only returns the documents in order of\n",
|
||||
"relevancy. To retrieve the actual scores:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = db.similarity_search_with_score(query)\n",
|
||||
"result = results[0]\n",
|
||||
"# result[1] ~= 0.463"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"This is a result of using the `\"all-MiniLM-L6-v2\"` embedding model using the\n",
|
||||
"cosine distance function (as given by the argument `angular` in the\n",
|
||||
"application function).\n",
|
||||
"\n",
|
||||
"Different embedding functions need different distance functions, and Vespa\n",
|
||||
"needs to know which distance function to use when orderings documents.\n",
|
||||
"Please refer to the\n",
|
||||
"[documentation on distance functions](https://docs.vespa.ai/en/reference/schema-reference.html#distance-metric)\n",
|
||||
"for more information.\n",
|
||||
"\n",
|
||||
"### As retriever\n",
|
||||
"\n",
|
||||
"To use this vector store as a\n",
|
||||
"[LangChain retriever](https://python.langchain.com/docs/modules/data_connection/retrievers/)\n",
|
||||
"simply call the `as_retriever` function, which is a standard vector store\n",
|
||||
"method:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = VespaStore.from_documents(docs, embedding_function, app=vespa_app, **vespa_config)\n",
|
||||
"retriever = db.as_retriever()\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"results = retriever.get_relevant_documents(query)\n",
|
||||
"\n",
|
||||
"# results[0].metadata[\"id\"] == \"id:testapp:testapp::32\""
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"This allows for more general, unstructured, retrieval from the vector store.\n",
|
||||
"\n",
|
||||
"### Metadata\n",
|
||||
"\n",
|
||||
"In the example so far, we've only used the text and the embedding for that\n",
|
||||
"text. Documents usually contain additional information, which in LangChain\n",
|
||||
"is referred to as metadata.\n",
|
||||
"\n",
|
||||
"Vespa can contain many fields with different types by adding them to the application\n",
|
||||
"package:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"app_package.schema.add_fields(\n",
|
||||
" # ...\n",
|
||||
" Field(name=\"date\", type=\"string\", indexing=[\"attribute\", \"summary\"]),\n",
|
||||
" Field(name=\"rating\", type=\"int\", indexing=[\"attribute\", \"summary\"]),\n",
|
||||
" Field(name=\"author\", type=\"string\", indexing=[\"attribute\", \"summary\"]),\n",
|
||||
" # ...\n",
|
||||
")\n",
|
||||
"vespa_app = vespa_docker.deploy(application_package=app_package)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"We can add some metadata fields in the documents:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Add metadata\n",
|
||||
"for i, doc in enumerate(docs):\n",
|
||||
" doc.metadata[\"date\"] = f\"2023-{(i % 12)+1}-{(i % 28)+1}\"\n",
|
||||
" doc.metadata[\"rating\"] = range(1, 6)[i % 5]\n",
|
||||
" doc.metadata[\"author\"] = [\"Joe Biden\", \"Unknown\"][min(i, 1)]"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"And let the Vespa vector store know about these fields:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vespa_config.update(dict(metadata_fields=[\"date\", \"rating\", \"author\"]))"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Now, when searching for these documents, these fields will be returned.\n",
|
||||
"Also, these fields can be filtered on:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = VespaStore.from_documents(docs, embedding_function, app=vespa_app, **vespa_config)\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"results = db.similarity_search(query, filter=\"rating > 3\")\n",
|
||||
"# results[0].metadata[\"id\"] == \"id:testapp:testapp::34\"\n",
|
||||
"# results[0].metadata[\"author\"] == \"Unknown\""
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Custom query\n",
|
||||
"\n",
|
||||
"If the default behavior of the similarity search does not fit your\n",
|
||||
"requirements, you can always provide your own query. Thus, you don't\n",
|
||||
"need to provide all of the configuration to the vector store, but\n",
|
||||
"rather just write this yourself.\n",
|
||||
"\n",
|
||||
"First, let's add a BM25 ranking function to our application:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from vespa.package import FieldSet\n",
|
||||
"\n",
|
||||
"app_package.schema.add_field_set(FieldSet(name=\"default\", fields=[\"text\"]))\n",
|
||||
"app_package.schema.add_rank_profile(RankProfile(name=\"bm25\", first_phase=\"bm25(text)\"))\n",
|
||||
"vespa_app = vespa_docker.deploy(application_package=app_package)\n",
|
||||
"db = VespaStore.from_documents(docs, embedding_function, app=vespa_app, **vespa_config)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Then, to perform a regular text search based on BM25:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"custom_query = {\n",
|
||||
" \"yql\": f\"select * from sources * where userQuery()\",\n",
|
||||
" \"query\": query,\n",
|
||||
" \"type\": \"weakAnd\",\n",
|
||||
" \"ranking\": \"bm25\",\n",
|
||||
" \"hits\": 4\n",
|
||||
"}\n",
|
||||
"results = db.similarity_search_with_score(query, custom_query=custom_query)\n",
|
||||
"# results[0][0].metadata[\"id\"] == \"id:testapp:testapp::32\"\n",
|
||||
"# results[0][1] ~= 14.384"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"All of the powerful search and query capabilities of Vespa can be used\n",
|
||||
"by using a custom query. Please refer to the Vespa documentation on it's\n",
|
||||
"[Query API](https://docs.vespa.ai/en/query-api.html) for more details.\n",
|
||||
"\n",
|
||||
"### Hybrid search\n",
|
||||
"\n",
|
||||
"Hybrid search means using both a classic term-based search such as\n",
|
||||
"BM25 and a vector search and combining the results. We need to create\n",
|
||||
"a new rank profile for hybrid search on Vespa:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"app_package.schema.add_rank_profile(\n",
|
||||
" RankProfile(name=\"hybrid\",\n",
|
||||
" first_phase=\"log(bm25(text)) + 0.5 * closeness(field, embedding)\",\n",
|
||||
" inputs=[(\"query(query_embedding)\", \"tensor<float>(x[384])\")]\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"vespa_app = vespa_docker.deploy(application_package=app_package)\n",
|
||||
"db = VespaStore.from_documents(docs, embedding_function, app=vespa_app, **vespa_config)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Here, we score each document as a combination of it's BM25 score and its\n",
|
||||
"distance score. We can query using a custom query:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"query_embedding = embedding_function.embed_query(query)\n",
|
||||
"nearest_neighbor_expression = \"{targetHits: 4}nearestNeighbor(embedding, query_embedding)\"\n",
|
||||
"custom_query = {\n",
|
||||
" \"yql\": f\"select * from sources * where {nearest_neighbor_expression} and userQuery()\",\n",
|
||||
" \"query\": query,\n",
|
||||
" \"type\": \"weakAnd\",\n",
|
||||
" \"input.query(query_embedding)\": query_embedding,\n",
|
||||
" \"ranking\": \"hybrid\",\n",
|
||||
" \"hits\": 4\n",
|
||||
"}\n",
|
||||
"results = db.similarity_search_with_score(query, custom_query=custom_query)\n",
|
||||
"# results[0][0].metadata[\"id\"], \"id:testapp:testapp::32\")\n",
|
||||
"# results[0][1] ~= 2.897"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Native embedders in Vespa\n",
|
||||
"\n",
|
||||
"Up until this point we've used an embedding function in Python to provide\n",
|
||||
"embeddings for the texts. Vespa supports embedding function natively, so\n",
|
||||
"you can defer this calculation in to Vespa. One benefit is the ability to use\n",
|
||||
"GPUs when embedding documents if you have a large collections.\n",
|
||||
"\n",
|
||||
"Please refer to [Vespa embeddings](https://docs.vespa.ai/en/embedding.html)\n",
|
||||
"for more information.\n",
|
||||
"\n",
|
||||
"First, we need to modify our application package:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from vespa.package import Component, Parameter\n",
|
||||
"\n",
|
||||
"app_package.components = [\n",
|
||||
" Component(id=\"hf-embedder\", type=\"hugging-face-embedder\",\n",
|
||||
" parameters=[\n",
|
||||
" Parameter(\"transformer-model\", {\"path\": \"...\"}),\n",
|
||||
" Parameter(\"tokenizer-model\", {\"url\": \"...\"}),\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"Field(name=\"hfembedding\", type=\"tensor<float>(x[384])\",\n",
|
||||
" is_document_field=False,\n",
|
||||
" indexing=[\"input text\", \"embed hf-embedder\", \"attribute\", \"summary\"],\n",
|
||||
" attribute=[f\"distance-metric: angular\"],\n",
|
||||
" )\n",
|
||||
"app_package.schema.add_rank_profile(\n",
|
||||
" RankProfile(name=\"hf_similarity\",\n",
|
||||
" first_phase=\"closeness(field, hfembedding)\",\n",
|
||||
" inputs=[(\"query(query_embedding)\", \"tensor<float>(x[384])\")]\n",
|
||||
" )\n",
|
||||
")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Please refer to the embeddings documentation on adding embedder models\n",
|
||||
"and tokenizers to the application. Note that the `hfembedding` field\n",
|
||||
"includes instructions for embedding using the `hf-embedder`.\n",
|
||||
"\n",
|
||||
"Now we can query with a custom query:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"nearest_neighbor_expression = \"{targetHits: 4}nearestNeighbor(internalembedding, query_embedding)\"\n",
|
||||
"custom_query = {\n",
|
||||
" \"yql\": f\"select * from sources * where {nearest_neighbor_expression}\",\n",
|
||||
" \"input.query(query_embedding)\": f\"embed(hf-embedder, \\\"{query}\\\")\",\n",
|
||||
" \"ranking\": \"internal_similarity\",\n",
|
||||
" \"hits\": 4\n",
|
||||
"}\n",
|
||||
"results = db.similarity_search_with_score(query, custom_query=custom_query)\n",
|
||||
"# results[0][0].metadata[\"id\"], \"id:testapp:testapp::32\")\n",
|
||||
"# results[0][1] ~= 0.630"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Note that the query here includes an `embed` instruction to embed the query\n",
|
||||
"using the same model as for the documents.\n",
|
||||
"\n",
|
||||
"### Approximate nearest neighbor\n",
|
||||
"\n",
|
||||
"In all of the above examples, we've used exact nearest neighbor to\n",
|
||||
"find results. However, for large collections of documents this is\n",
|
||||
"not feasible as one has to scan through all documents to find the\n",
|
||||
"best matches. To avoid this, we can use\n",
|
||||
"[approximate nearest neighbors](https://docs.vespa.ai/en/approximate-nn-hnsw.html).\n",
|
||||
"\n",
|
||||
"First, we can change the embedding field to create a HNSW index:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from vespa.package import HNSW\n",
|
||||
"\n",
|
||||
"app_package.schema.add_fields(\n",
|
||||
" Field(name=\"embedding\", type=\"tensor<float>(x[384])\",\n",
|
||||
" indexing=[\"attribute\", \"summary\", \"index\"],\n",
|
||||
" ann=HNSW(distance_metric=\"angular\", max_links_per_node=16, neighbors_to_explore_at_insert=200)\n",
|
||||
" )\n",
|
||||
")\n"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"This creates a HNSW index on the embedding data which allows for efficient\n",
|
||||
"searching. With this set, we can easily search using ANN by setting\n",
|
||||
"the `approximate` argument to `True`:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"results = db.similarity_search(query, approximate=True)\n",
|
||||
"# results[0][0].metadata[\"id\"], \"id:testapp:testapp::32\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"This covers most of the functionality in the Vespa vector store in LangChain.\n",
|
||||
"\n"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import math
|
||||
import threading
|
||||
from collections import defaultdict
|
||||
from typing import (
|
||||
Any,
|
||||
AsyncIterator,
|
||||
@@ -19,6 +20,7 @@ from anyio import create_memory_object_stream
|
||||
|
||||
from langchain.callbacks.tracers.base import BaseTracer
|
||||
from langchain.callbacks.tracers.schemas import Run
|
||||
from langchain.load.load import load
|
||||
from langchain.schema.output import ChatGenerationChunk, GenerationChunk
|
||||
|
||||
|
||||
@@ -55,7 +57,7 @@ class RunState(TypedDict):
|
||||
"""Final output of the run, usually the result of aggregating streamed_output.
|
||||
Only available after the run has finished successfully."""
|
||||
|
||||
logs: list[LogEntry]
|
||||
logs: Dict[str, LogEntry]
|
||||
"""List of sub-runs contained in this run, if any, in the order they were started.
|
||||
If filters were supplied, this list will contain only the runs that matched the
|
||||
filters."""
|
||||
@@ -85,7 +87,8 @@ class RunLogPatch:
|
||||
def __repr__(self) -> str:
|
||||
from pprint import pformat
|
||||
|
||||
return f"RunLogPatch(ops={pformat(self.ops)})"
|
||||
# 1:-1 to get rid of the [] around the list
|
||||
return f"RunLogPatch({pformat(self.ops)[1:-1]})"
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
return isinstance(other, RunLogPatch) and self.ops == other.ops
|
||||
@@ -112,7 +115,7 @@ class RunLog(RunLogPatch):
|
||||
def __repr__(self) -> str:
|
||||
from pprint import pformat
|
||||
|
||||
return f"RunLog(state={pformat(self.state)})"
|
||||
return f"RunLog({pformat(self.state)})"
|
||||
|
||||
|
||||
class LogStreamCallbackHandler(BaseTracer):
|
||||
@@ -143,7 +146,8 @@ class LogStreamCallbackHandler(BaseTracer):
|
||||
self.lock = threading.Lock()
|
||||
self.send_stream = send_stream
|
||||
self.receive_stream = receive_stream
|
||||
self._index_map: Dict[UUID, int] = {}
|
||||
self._key_map_by_run_id: Dict[UUID, str] = {}
|
||||
self._counter_map_by_name: Dict[str, int] = defaultdict(int)
|
||||
|
||||
def __aiter__(self) -> AsyncIterator[RunLogPatch]:
|
||||
return self.receive_stream.__aiter__()
|
||||
@@ -196,7 +200,7 @@ class LogStreamCallbackHandler(BaseTracer):
|
||||
id=str(run.id),
|
||||
streamed_output=[],
|
||||
final_output=None,
|
||||
logs=[],
|
||||
logs={},
|
||||
),
|
||||
}
|
||||
)
|
||||
@@ -207,14 +211,18 @@ class LogStreamCallbackHandler(BaseTracer):
|
||||
|
||||
# Determine previous index, increment by 1
|
||||
with self.lock:
|
||||
self._index_map[run.id] = max(self._index_map.values(), default=-1) + 1
|
||||
self._counter_map_by_name[run.name] += 1
|
||||
count = self._counter_map_by_name[run.name]
|
||||
self._key_map_by_run_id[run.id] = (
|
||||
run.name if count == 1 else f"{run.name}:{count}"
|
||||
)
|
||||
|
||||
# Add the run to the stream
|
||||
self.send_stream.send_nowait(
|
||||
RunLogPatch(
|
||||
{
|
||||
"op": "add",
|
||||
"path": f"/logs/{self._index_map[run.id]}",
|
||||
"path": f"/logs/{self._key_map_by_run_id[run.id]}",
|
||||
"value": LogEntry(
|
||||
id=str(run.id),
|
||||
name=run.name,
|
||||
@@ -233,7 +241,7 @@ class LogStreamCallbackHandler(BaseTracer):
|
||||
def _on_run_update(self, run: Run) -> None:
|
||||
"""Finish a run."""
|
||||
try:
|
||||
index = self._index_map.get(run.id)
|
||||
index = self._key_map_by_run_id.get(run.id)
|
||||
|
||||
if index is None:
|
||||
return
|
||||
@@ -243,7 +251,8 @@ class LogStreamCallbackHandler(BaseTracer):
|
||||
{
|
||||
"op": "add",
|
||||
"path": f"/logs/{index}/final_output",
|
||||
"value": run.outputs,
|
||||
# to undo the dumpd done by some runnables / tracer / etc
|
||||
"value": load(run.outputs),
|
||||
},
|
||||
{
|
||||
"op": "add",
|
||||
@@ -259,7 +268,7 @@ class LogStreamCallbackHandler(BaseTracer):
|
||||
{
|
||||
"op": "replace",
|
||||
"path": "/final_output",
|
||||
"value": run.outputs,
|
||||
"value": load(run.outputs),
|
||||
}
|
||||
)
|
||||
)
|
||||
@@ -273,7 +282,7 @@ class LogStreamCallbackHandler(BaseTracer):
|
||||
chunk: Optional[Union[GenerationChunk, ChatGenerationChunk]],
|
||||
) -> None:
|
||||
"""Process new LLM token."""
|
||||
index = self._index_map.get(run.id)
|
||||
index = self._key_map_by_run_id.get(run.id)
|
||||
|
||||
if index is None:
|
||||
return
|
||||
|
||||
0
libs/langchain/langchain/cli/__init__.py
Normal file
0
libs/langchain/langchain/cli/__init__.py
Normal file
54
libs/langchain/langchain/cli/cli.py
Normal file
54
libs/langchain/langchain/cli/cli.py
Normal file
@@ -0,0 +1,54 @@
|
||||
"""A CLI for creating a new project with LangChain."""
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import typer
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Typer must be installed to use the CLI. "
|
||||
"You can install it with `pip install typer`."
|
||||
)
|
||||
|
||||
from typing_extensions import Annotated
|
||||
|
||||
from langchain.cli.create_repo.base import create, is_poetry_installed
|
||||
from langchain.cli.create_repo.user_info import get_git_user_email, get_git_user_name
|
||||
|
||||
app = typer.Typer(no_args_is_help=False, add_completion=False)
|
||||
|
||||
|
||||
AUTHOR_NAME_OPTION = typer.Option(
|
||||
default_factory=get_git_user_name,
|
||||
prompt=True,
|
||||
help="If not specified, will be inferred from git config if possible. ",
|
||||
)
|
||||
AUTHOR_EMAIL_OPTION = typer.Option(
|
||||
default_factory=get_git_user_email,
|
||||
prompt=True,
|
||||
help="If not specified, will be inferred from git config if possible. ",
|
||||
)
|
||||
USE_POETRY_OPTION = typer.Option(
|
||||
default_factory=is_poetry_installed,
|
||||
prompt=True,
|
||||
help=(
|
||||
"Whether to use Poetry to manage the project. "
|
||||
"If not specified, Poetry will be used if poetry is installed."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@app.command()
|
||||
def new(
|
||||
project_directory: Annotated[
|
||||
Path, typer.Argument(help="The directory to create the project in.")
|
||||
],
|
||||
author_name: Annotated[str, AUTHOR_NAME_OPTION],
|
||||
author_email: Annotated[str, AUTHOR_EMAIL_OPTION],
|
||||
use_poetry: Annotated[bool, USE_POETRY_OPTION],
|
||||
) -> None:
|
||||
"""Create a new project with LangChain."""
|
||||
create(project_directory, author_name, author_email, use_poetry)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
291
libs/langchain/langchain/cli/create_repo/base.py
Normal file
291
libs/langchain/langchain/cli/create_repo/base.py
Normal file
@@ -0,0 +1,291 @@
|
||||
""""""
|
||||
import os
|
||||
import pathlib
|
||||
import string
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import List, Sequence
|
||||
|
||||
import typer
|
||||
|
||||
import langchain
|
||||
from langchain.cli.create_repo.pypi_name import is_name_taken, lint_name
|
||||
|
||||
|
||||
class UnderscoreTemplate(string.Template):
|
||||
delimiter = "____"
|
||||
|
||||
|
||||
def _create_project_dir(
|
||||
project_directory_path: Path,
|
||||
use_poetry: bool,
|
||||
project_name: str,
|
||||
project_name_identifier: str,
|
||||
author_name: str,
|
||||
author_email: str,
|
||||
) -> None:
|
||||
project_directory_path.mkdir(parents=True, exist_ok=True)
|
||||
template_directories = _get_template_directories(use_poetry)
|
||||
_check_conflicting_files(template_directories, project_directory_path)
|
||||
_copy_template_files(
|
||||
template_directories,
|
||||
project_directory_path,
|
||||
project_name,
|
||||
project_name_identifier,
|
||||
author_name,
|
||||
author_email,
|
||||
)
|
||||
|
||||
|
||||
def _get_template_directories(use_poetry: bool) -> List[Path]:
|
||||
"""Get the directories containing the templates.
|
||||
|
||||
Args:
|
||||
use_poetry: If true, will set up the project with Poetry.
|
||||
|
||||
"""
|
||||
template_parent_path = Path(__file__).parent / "templates"
|
||||
template_directories = [template_parent_path / "repo"]
|
||||
if use_poetry:
|
||||
template_directories.append(template_parent_path / "poetry")
|
||||
else:
|
||||
template_directories.append(template_parent_path / "pip")
|
||||
return template_directories
|
||||
|
||||
|
||||
def _check_conflicting_files(
|
||||
template_directories: Sequence[Path], project_directory_path: Path
|
||||
) -> None:
|
||||
"""Validate project directory doesn't contain conflicting files."""
|
||||
|
||||
for template_directory_path in template_directories:
|
||||
for template_file_path in template_directory_path.glob("**/*"):
|
||||
relative_template_file_path = template_file_path.relative_to(
|
||||
template_directory_path
|
||||
)
|
||||
project_file_path = project_directory_path / relative_template_file_path
|
||||
if project_file_path.exists():
|
||||
typer.echo(
|
||||
f"{typer.style('Error:', fg=typer.colors.RED)}"
|
||||
f" The project directory already contains a file"
|
||||
f" {typer.style(project_file_path, fg=typer.colors.BRIGHT_CYAN)}"
|
||||
f" that would be overwritten by the template.",
|
||||
err=True,
|
||||
)
|
||||
typer.echo(
|
||||
"Please remove this file and try again.",
|
||||
err=True,
|
||||
)
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
|
||||
def _copy_template_files(
|
||||
template_directories: Sequence[Path],
|
||||
project_directory_path: Path,
|
||||
project_name: str,
|
||||
project_name_identifier: str,
|
||||
author_name: str,
|
||||
author_email: str,
|
||||
) -> None:
|
||||
"""Copy template files to project directory and substitute variables.
|
||||
|
||||
Args:
|
||||
template_directories: The directories containing the templates.
|
||||
project_directory_path: The destination directory.
|
||||
project_name: The name of the project.
|
||||
project_name_identifier: The identifier of the project name.
|
||||
author_name: The name of the author.
|
||||
author_email: The email of the author.
|
||||
"""
|
||||
for template_directory_path in template_directories:
|
||||
for template_file_path in template_directory_path.glob("**/*"):
|
||||
relative_template_file_path = UnderscoreTemplate(
|
||||
str(template_file_path.relative_to(template_directory_path))
|
||||
).substitute(project_name_identifier=project_name_identifier)
|
||||
project_file_path = project_directory_path / relative_template_file_path
|
||||
if template_file_path.is_dir():
|
||||
project_file_path.mkdir(parents=True, exist_ok=True)
|
||||
else:
|
||||
project_file_path.write_text(
|
||||
UnderscoreTemplate(template_file_path.read_text()).substitute(
|
||||
project_name=project_name,
|
||||
project_name_identifier=project_name_identifier,
|
||||
author_name=author_name,
|
||||
author_email=author_email,
|
||||
langchain_version=langchain.__version__,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _poetry_install(project_directory_path: Path) -> None:
|
||||
"""Install dependencies with Poetry."""
|
||||
typer.echo(
|
||||
f"\n{typer.style('2.', bold=True, fg=typer.colors.GREEN)}"
|
||||
f" Installing dependencies with Poetry..."
|
||||
)
|
||||
subprocess.run(["pwd"], cwd=project_directory_path)
|
||||
subprocess.run(
|
||||
["poetry", "install"],
|
||||
cwd=project_directory_path,
|
||||
env={**os.environ.copy(), "VIRTUAL_ENV": ""},
|
||||
)
|
||||
|
||||
|
||||
def _pip_install(project_directory_path: Path) -> None:
|
||||
"""Create virtual environment and install dependencies."""
|
||||
typer.echo(
|
||||
f"\n{typer.style('2.', bold=True, fg=typer.colors.GREEN)}"
|
||||
f" Creating virtual environment..."
|
||||
)
|
||||
subprocess.run(["pwd"], cwd=project_directory_path)
|
||||
subprocess.run(["python", "-m", "venv", ".venv"], cwd=project_directory_path)
|
||||
# TODO install dependencies
|
||||
|
||||
|
||||
def _init_git(project_directory_path: Path) -> None:
|
||||
"""Initialize git repository."""
|
||||
typer.echo(
|
||||
f"\n{typer.style('3.', bold=True, fg=typer.colors.GREEN)} Initializing git..."
|
||||
)
|
||||
subprocess.run(["git", "init"], cwd=project_directory_path)
|
||||
|
||||
# 7. Create initial commit
|
||||
subprocess.run(["git", "add", "."], cwd=project_directory_path)
|
||||
subprocess.run(
|
||||
["git", "commit", "-m", "Initial commit"],
|
||||
cwd=project_directory_path,
|
||||
)
|
||||
|
||||
|
||||
def _select_project_name(suggested_project_name: str) -> str:
|
||||
"""Help the user select a valid project name."""
|
||||
while True:
|
||||
project_name = typer.prompt(
|
||||
"Please choose a project name: ", default=suggested_project_name
|
||||
)
|
||||
|
||||
project_name_diagnostics = lint_name(project_name)
|
||||
if project_name_diagnostics:
|
||||
typer.echo(
|
||||
f"{typer.style('Error:', fg=typer.colors.RED)}"
|
||||
f" The project name"
|
||||
f" {typer.style(project_name, fg=typer.colors.BRIGHT_CYAN)}"
|
||||
f" is not valid:",
|
||||
err=True,
|
||||
)
|
||||
|
||||
for diagnostic in project_name_diagnostics:
|
||||
typer.echo(f" - {diagnostic}")
|
||||
|
||||
if typer.confirm(
|
||||
"Would you like to choose another name? "
|
||||
"Choose NO to proceed with existing name.",
|
||||
default=True,
|
||||
):
|
||||
continue
|
||||
|
||||
if is_name_taken(project_name):
|
||||
typer.echo(
|
||||
f"{typer.style('Error:', fg=typer.colors.RED)}"
|
||||
f" The project name"
|
||||
f" {typer.style(project_name, fg=typer.colors.BRIGHT_CYAN)}"
|
||||
f" is already taken on pypi",
|
||||
err=True,
|
||||
)
|
||||
|
||||
if typer.confirm(
|
||||
"Would you like to choose another name? "
|
||||
"Choose NO to proceed with existing name.",
|
||||
default=True,
|
||||
):
|
||||
continue
|
||||
|
||||
# If we got here then the project name is valid and not taken
|
||||
return project_name
|
||||
|
||||
|
||||
# PUBLIC API
|
||||
|
||||
|
||||
def create(
|
||||
project_directory: pathlib.Path,
|
||||
author_name: str,
|
||||
author_email: str,
|
||||
use_poetry: bool,
|
||||
) -> None:
|
||||
"""Create a new LangChain project.
|
||||
|
||||
Args:
|
||||
project_directory (str): The directory to create the project in.
|
||||
author_name (str): The name of the author.
|
||||
author_email (str): The email of the author.
|
||||
use_poetry (bool): Whether to use Poetry to manage the project.
|
||||
"""
|
||||
|
||||
project_directory_path = Path(project_directory)
|
||||
project_name_suggestion = project_directory_path.name.replace("-", "_")
|
||||
project_name = _select_project_name(project_name_suggestion)
|
||||
project_name_identifier = project_name
|
||||
|
||||
resolved_path = project_directory_path.resolve()
|
||||
|
||||
if not typer.confirm(
|
||||
f"\n{typer.style('>', bold=True, fg=typer.colors.GREEN)} "
|
||||
f"Creating new LangChain project "
|
||||
f"{typer.style(project_name, fg=typer.colors.BRIGHT_CYAN)}"
|
||||
f" in"
|
||||
f" {typer.style(resolved_path, fg=typer.colors.BRIGHT_CYAN)}",
|
||||
default=True,
|
||||
):
|
||||
typer.echo("OK! Canceling project creation.")
|
||||
raise typer.Exit(code=0)
|
||||
|
||||
_create_project_dir(
|
||||
project_directory_path,
|
||||
use_poetry,
|
||||
project_name,
|
||||
project_name_identifier,
|
||||
author_name,
|
||||
author_email,
|
||||
)
|
||||
|
||||
# TODO(Team): Add installation
|
||||
# if use_poetry:
|
||||
# _poetry_install(project_directory_path)
|
||||
# else:
|
||||
# _pip_install(project_directory_path)
|
||||
|
||||
_init_git(project_directory_path)
|
||||
|
||||
typer.echo(
|
||||
f"\n{typer.style('Done!', bold=True, fg=typer.colors.GREEN)}"
|
||||
f" Your new LangChain project"
|
||||
f" {typer.style(project_name, fg=typer.colors.BRIGHT_CYAN)}"
|
||||
f" has been created in"
|
||||
f" {typer.style(project_directory_path.resolve(), fg=typer.colors.BRIGHT_CYAN)}"
|
||||
f"."
|
||||
)
|
||||
# TODO(Team): Add surfacing information from make file and installation
|
||||
# cd_dir = typer.style(
|
||||
# f"cd {project_directory_path.resolve()}", fg=typer.colors.BRIGHT_CYAN
|
||||
# )
|
||||
# typer.echo(
|
||||
# f"\nChange into the project directory with {cd_dir}."
|
||||
# f" The following commands are available:"
|
||||
# )
|
||||
# subprocess.run(["make"], cwd=project_directory_path)
|
||||
|
||||
# if not use_poetry:
|
||||
# pip_install = typer.style(
|
||||
# 'pip install -e ".[dev]"', fg=typer.colors.BRIGHT_CYAN
|
||||
# )
|
||||
# typer.echo(
|
||||
# f"\nTo install all dependencies activate your environment run:"
|
||||
# f"\n{typer.style('source .venv/bin/activate', fg=typer.colors.BRIGHT_CYAN)}"
|
||||
# f"\n{pip_install}."
|
||||
# )
|
||||
|
||||
|
||||
def is_poetry_installed() -> bool:
|
||||
"""Check if Poetry is installed."""
|
||||
return subprocess.run(["poetry", "--version"], capture_output=True).returncode == 0
|
||||
70
libs/langchain/langchain/cli/create_repo/pypi_name.py
Normal file
70
libs/langchain/langchain/cli/create_repo/pypi_name.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""Code helps to check availability of the name of the project on PyPi
|
||||
|
||||
Adapted from https://github.com/danishprakash/pip-name/blob/master/pip-name
|
||||
"""
|
||||
from typing import List, Optional
|
||||
|
||||
import requests
|
||||
|
||||
BASE_URL = "https://pypi.org/pypi"
|
||||
|
||||
UPPERCASE_SUGGESTION = "Use of uppercase letters is discouraged"
|
||||
SEPARATOR_SUGGESTION = "Use of `-` is discouraged, consider using `_`"
|
||||
NUMERIC_SUGGESTION = "Use of numbers is discouraged"
|
||||
|
||||
|
||||
def _request_pypi(name: str) -> Optional[dict]:
|
||||
"""Request response from PyPi API.
|
||||
|
||||
Args:
|
||||
name (str): Name of the project
|
||||
|
||||
Returns:
|
||||
Optional[dict]: Response from PyPi API
|
||||
"""
|
||||
target_url = f"{BASE_URL}/{name}/json"
|
||||
response = requests.get(target_url)
|
||||
return response.json() if response.status_code != 404 else None
|
||||
|
||||
|
||||
# PUBLIC API
|
||||
|
||||
|
||||
def lint_name(name: str) -> List[str]:
|
||||
"""Check name against PEP8's naming conventions.
|
||||
|
||||
Args:
|
||||
name (str): Name of the project
|
||||
|
||||
Returns:
|
||||
List[str]: List of suggestions
|
||||
"""
|
||||
suggestions = []
|
||||
|
||||
if "-" in name or " " in name:
|
||||
suggestions.append(SEPARATOR_SUGGESTION)
|
||||
if any(x.isupper() for x in name):
|
||||
suggestions.append(UPPERCASE_SUGGESTION)
|
||||
if any(x.isnumeric() for x in name):
|
||||
suggestions.append(NUMERIC_SUGGESTION)
|
||||
|
||||
return suggestions
|
||||
|
||||
|
||||
def is_name_taken(name: str) -> bool:
|
||||
"""Check module filename for conflict.
|
||||
|
||||
Args:
|
||||
name (str): Name of the project
|
||||
|
||||
Returns:
|
||||
bool: True if name is taken, False otherwise
|
||||
"""
|
||||
response = _request_pypi(name)
|
||||
|
||||
if response:
|
||||
package_url = response.get("info").get("package_url") # type: ignore
|
||||
module_name = package_url.split("/")[-2]
|
||||
return name.lower() == module_name.lower()
|
||||
|
||||
return False
|
||||
79
libs/langchain/langchain/cli/create_repo/templates/pip/.github/CONTRIBUTING.md
vendored
Normal file
79
libs/langchain/langchain/cli/create_repo/templates/pip/.github/CONTRIBUTING.md
vendored
Normal file
@@ -0,0 +1,79 @@
|
||||
# Contributing to ____project_name
|
||||
|
||||
Hi there! Thank you for even being interested in contributing to ____project_name.
|
||||
|
||||
## 🚀 Quick Start
|
||||
|
||||
To install requirements:
|
||||
|
||||
```bash
|
||||
poetry install -e ".[dev]"
|
||||
```
|
||||
|
||||
This will install all requirements for running the package, examples, linting, formatting, tests, and coverage.
|
||||
|
||||
Now, you should be able to run the common tasks in the following section. To double check, run `make test`, all tests should pass.
|
||||
|
||||
## ✅ Common Tasks
|
||||
|
||||
Type `make` for a list of common tasks.
|
||||
|
||||
### Code Formatting
|
||||
|
||||
Formatting for this project is done via a combination of [Black](https://black.readthedocs.io/en/stable/) and [isort](https://pycqa.github.io/isort/).
|
||||
|
||||
To run formatting for this project:
|
||||
|
||||
```bash
|
||||
make format
|
||||
```
|
||||
|
||||
Additionally, you can run the formatter only on the files that have been modified in your current branch as compared to the main branch using the format_diff command:
|
||||
|
||||
```bash
|
||||
make format_diff
|
||||
```
|
||||
|
||||
This is especially useful when you have made changes to a subset of the project and want to ensure your changes are properly formatted without affecting the rest of the codebase.
|
||||
|
||||
### Linting
|
||||
|
||||
Linting for this project is done via a combination of [Black](https://black.readthedocs.io/en/stable/), [isort](https://pycqa.github.io/isort/), [flake8](https://flake8.pycqa.org/en/latest/), and [mypy](http://mypy-lang.org/).
|
||||
|
||||
To run linting for this project:
|
||||
|
||||
```bash
|
||||
make lint
|
||||
```
|
||||
|
||||
In addition, you can run the linter only on the files that have been modified in your current branch as compared to the main branch using the lint_diff command:
|
||||
|
||||
```bash
|
||||
make lint_diff
|
||||
```
|
||||
|
||||
This can be very helpful when you've made changes to only certain parts of the project and want to ensure your changes meet the linting standards without having to check the entire codebase.
|
||||
|
||||
We recognize linting can be annoying - if you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed.
|
||||
|
||||
### Testing
|
||||
|
||||
To run unit tests:
|
||||
|
||||
```bash
|
||||
make test
|
||||
```
|
||||
|
||||
If you add new logic, please add a unit test.
|
||||
|
||||
## 🏭 Release Process
|
||||
|
||||
____project_name follows the [semver](https://semver.org/) versioning standard.
|
||||
|
||||
To use the [automated release workflow](./workflows/release.yml) you'll need to set up a PyPI account and [create an API token](https://pypi.org/help/#apitoken). Configure the API token for this GitHub repo by going to settings -> security -> secrets -> actions, creating the `PYPI_API_TOKEN` variable and setting the value to be your PyPI API token.
|
||||
|
||||
Once that's set up, you can release a new version of the package by opening a PR that:
|
||||
1. updates the package version in the [pyproject.toml file](../pyproject.toml),
|
||||
2. labels the PR with a `release` tag.
|
||||
When the PR is merged into main, a new release will be created.
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . /app
|
||||
|
||||
RUN pip install --no-cache-dir .
|
||||
|
||||
CMD exec uvicorn ____project_name_identifier.server:app --host 0.0.0.0 --port $PORT
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
.PHONY: all format lint test help
|
||||
|
||||
# Default target executed when no arguments are given to make.
|
||||
all: help
|
||||
|
||||
start:
|
||||
uvicorn ____project_name_identifier.server:app --reload
|
||||
|
||||
# Define a variable for the test file path.
|
||||
TEST_FILE ?= tests/
|
||||
|
||||
test:
|
||||
pytest $(TEST_FILE)
|
||||
|
||||
# Define a variable for Python and notebook files.
|
||||
PYTHON_FILES=.
|
||||
lint format: PYTHON_FILES=.
|
||||
lint_diff format_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d main | grep -E '\.py$$|\.ipynb$$')
|
||||
|
||||
lint lint_diff:
|
||||
mypy $(PYTHON_FILES)
|
||||
black $(PYTHON_FILES) --check
|
||||
ruff .
|
||||
|
||||
format format_diff:
|
||||
black $(PYTHON_FILES)
|
||||
ruff --select I --fix $(PYTHON_FILES)
|
||||
|
||||
######################
|
||||
# HELP
|
||||
######################
|
||||
|
||||
help:
|
||||
@echo '----'
|
||||
@echo 'make start - start server'
|
||||
@echo 'make format - run code formatters'
|
||||
@echo 'make lint - run linters'
|
||||
@echo 'make test - run unit tests'
|
||||
@@ -0,0 +1,52 @@
|
||||
[project]
|
||||
name = "____project_name"
|
||||
version = "0.0.1"
|
||||
description = ""
|
||||
authors = [{name = "____author_name", email = "____author_email"}]
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.8,<4.0"
|
||||
dependencies = [
|
||||
"langchain~=____langchain_version",
|
||||
"langserve[server]>=0.0.6",
|
||||
"tiktoken~=0.4.0",
|
||||
"openai~=0.27.8",
|
||||
"fastapi~=0.96.0",
|
||||
"uvicorn[standard]~=0.22.0",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=61.0"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pytest~=7.4.0",
|
||||
"pytest-asyncio~=0.21.1",
|
||||
"mypy~=1.4.1",
|
||||
"ruff~=0.0.278",
|
||||
"black~=23.7.0",
|
||||
"syrupy~=4.0.2",
|
||||
]
|
||||
|
||||
[tool.ruff]
|
||||
select = [
|
||||
"E", # pycodestyle
|
||||
"F", # pyflakes
|
||||
"I", # isort
|
||||
]
|
||||
|
||||
[tool.mypy]
|
||||
ignore_missing_imports = "True"
|
||||
disallow_untyped_defs = "True"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
# --strict-markers will raise errors on unknown marks.
|
||||
# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
|
||||
#
|
||||
# https://docs.pytest.org/en/7.1.x/reference/reference.html
|
||||
# --strict-config any warnings encountered while parsing the `pytest`
|
||||
# section of the configuration file raise errors.
|
||||
#
|
||||
# https://github.com/tophat/syrupy
|
||||
# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite.
|
||||
addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused"
|
||||
89
libs/langchain/langchain/cli/create_repo/templates/poetry/.github/CONTRIBUTING.md
vendored
Normal file
89
libs/langchain/langchain/cli/create_repo/templates/poetry/.github/CONTRIBUTING.md
vendored
Normal file
@@ -0,0 +1,89 @@
|
||||
# Contributing to ____project_name
|
||||
|
||||
Hi there! Thank you for even being interested in contributing to ____project_name.
|
||||
|
||||
## 🚀 Quick Start
|
||||
|
||||
This project uses [Poetry](https://python-poetry.org/) as a dependency manager. Check out Poetry's [documentation on how to install it](https://python-poetry.org/docs/#installation) on your system before proceeding.
|
||||
|
||||
❗Note: If you use `Conda` or `Pyenv` as your environment / package manager, avoid dependency conflicts by doing the following first:
|
||||
1. *Before installing Poetry*, create and activate a new Conda env (e.g. `conda create -n langchain python=3.9`)
|
||||
2. Install Poetry (see above)
|
||||
3. Tell Poetry to use the virtualenv python environment (`poetry config virtualenvs.prefer-active-python true`)
|
||||
4. Continue with the following steps.
|
||||
|
||||
To install requirements:
|
||||
|
||||
```bash
|
||||
poetry install
|
||||
```
|
||||
|
||||
This will install all requirements for running the package, examples, linting, formatting, tests, and coverage.
|
||||
|
||||
❗Note: If you're running Poetry 1.4.1 and receive a `WheelFileValidationError` for `debugpy` during installation, you can try either downgrading to Poetry 1.4.0 or disabling "modern installation" (`poetry config installer.modern-installation false`) and re-install requirements. See [this `debugpy` issue](https://github.com/microsoft/debugpy/issues/1246) for more details.
|
||||
|
||||
Now, you should be able to run the common tasks in the following section.
|
||||
|
||||
## ✅ Common Tasks
|
||||
|
||||
Type `make` for a list of common tasks.
|
||||
|
||||
### Code Formatting
|
||||
|
||||
Formatting for this project is done via a combination of [Black](https://black.readthedocs.io/en/stable/) and [isort](https://pycqa.github.io/isort/).
|
||||
|
||||
To run formatting for this project:
|
||||
|
||||
```bash
|
||||
make format
|
||||
```
|
||||
|
||||
Additionally, you can run the formatter only on the files that have been modified in your current branch as compared to the main branch using the format_diff command:
|
||||
|
||||
```bash
|
||||
make format_diff
|
||||
```
|
||||
|
||||
This is especially useful when you have made changes to a subset of the project and want to ensure your changes are properly formatted without affecting the rest of the codebase.
|
||||
|
||||
### Linting
|
||||
|
||||
Linting for this project is done via a combination of [Black](https://black.readthedocs.io/en/stable/), [isort](https://pycqa.github.io/isort/), [flake8](https://flake8.pycqa.org/en/latest/), and [mypy](http://mypy-lang.org/).
|
||||
|
||||
To run linting for this project:
|
||||
|
||||
```bash
|
||||
make lint
|
||||
```
|
||||
|
||||
In addition, you can run the linter only on the files that have been modified in your current branch as compared to the main branch using the lint_diff command:
|
||||
|
||||
```bash
|
||||
make lint_diff
|
||||
```
|
||||
|
||||
This can be very helpful when you've made changes to only certain parts of the project and want to ensure your changes meet the linting standards without having to check the entire codebase.
|
||||
|
||||
We recognize linting can be annoying - if you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed.
|
||||
|
||||
### Testing
|
||||
|
||||
To run unit tests:
|
||||
|
||||
```bash
|
||||
make test
|
||||
```
|
||||
|
||||
If you add new logic, please add a unit test.
|
||||
|
||||
## 🏭 Release Process
|
||||
|
||||
____project_name follows the [semver](https://semver.org/) versioning standard.
|
||||
|
||||
To use the [automated release workflow](./workflows/release.yml) you'll need to set up a PyPI account and [create an API token](https://pypi.org/help/#apitoken). Configure the API token for this GitHub repo by going to settings -> security -> secrets -> actions, creating the `PYPI_API_TOKEN` variable and setting the value to be your PyPI API token.
|
||||
|
||||
Once that's set up, you can release a new version of the package by opening a PR that:
|
||||
1. updates the package version in the [pyproject.toml file](../pyproject.toml),
|
||||
2. labels the PR with a `release` tag.
|
||||
When the PR is merged into main, a new release will be created.
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
# An action for setting up poetry install with caching.
|
||||
# Using a custom action since the default action does not
|
||||
# take poetry install groups into account.
|
||||
# Action code from:
|
||||
# https://github.com/actions/setup-python/issues/505#issuecomment-1273013236
|
||||
name: poetry-install-with-caching
|
||||
description: Poetry install with support for caching of dependency groups.
|
||||
|
||||
inputs:
|
||||
python-version:
|
||||
description: Python version, supporting MAJOR.MINOR only
|
||||
required: true
|
||||
|
||||
poetry-version:
|
||||
description: Poetry version
|
||||
required: true
|
||||
|
||||
install-command:
|
||||
description: Command run for installing dependencies
|
||||
required: false
|
||||
default: poetry install
|
||||
|
||||
cache-key:
|
||||
description: Cache key to use for manual handling of caching
|
||||
required: true
|
||||
|
||||
working-directory:
|
||||
description: Directory to run install-command in
|
||||
required: false
|
||||
default: ""
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: actions/setup-python@v4
|
||||
name: Setup python $${ inputs.python-version }}
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- uses: actions/cache@v3
|
||||
id: cache-pip
|
||||
name: Cache Pip ${{ inputs.python-version }}
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pip
|
||||
key: pip-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}
|
||||
|
||||
- run: pipx install poetry==${{ inputs.poetry-version }} --python python${{ inputs.python-version }}
|
||||
shell: bash
|
||||
|
||||
- name: Check Poetry File
|
||||
shell: bash
|
||||
run: |
|
||||
poetry check
|
||||
|
||||
- name: Check lock file
|
||||
shell: bash
|
||||
run: |
|
||||
poetry lock --check
|
||||
|
||||
- uses: actions/cache@v3
|
||||
id: cache-poetry
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pypoetry/virtualenvs
|
||||
~/.cache/pypoetry/cache
|
||||
~/.cache/pypoetry/artifacts
|
||||
key: poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ hashFiles('poetry.lock') }}
|
||||
|
||||
- run: ${{ inputs.install-command }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
shell: bash
|
||||
36
libs/langchain/langchain/cli/create_repo/templates/poetry/.github/workflows/lint.yml
vendored
Normal file
36
libs/langchain/langchain/cli/create_repo/templates/poetry/.github/workflows/lint.yml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
name: lint
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.4.2"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install poetry
|
||||
run: |
|
||||
pipx install poetry==$POETRY_VERSION
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: poetry
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install
|
||||
- name: Analysing the code with our lint
|
||||
run: |
|
||||
make lint
|
||||
49
libs/langchain/langchain/cli/create_repo/templates/poetry/.github/workflows/release.yml
vendored
Normal file
49
libs/langchain/langchain/cli/create_repo/templates/poetry/.github/workflows/release.yml
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
name: release
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- closed
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- 'pyproject.toml'
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.4.2"
|
||||
|
||||
jobs:
|
||||
if_release:
|
||||
if: |
|
||||
${{ github.event.pull_request.merged == true }}
|
||||
&& ${{ contains(github.event.pull_request.labels.*.name, 'release') }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install poetry
|
||||
run: pipx install poetry==$POETRY_VERSION
|
||||
- name: Set up Python 3.10
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: "poetry"
|
||||
- name: Build project for distribution
|
||||
run: poetry build
|
||||
- name: Check Version
|
||||
id: check-version
|
||||
run: |
|
||||
echo version=$(poetry version --short) >> $GITHUB_OUTPUT
|
||||
- name: Create Release
|
||||
uses: ncipollo/release-action@v1
|
||||
with:
|
||||
artifacts: "dist/*"
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
draft: false
|
||||
generateReleaseNotes: true
|
||||
tag: v${{ steps.check-version.outputs.version }}
|
||||
commit: master
|
||||
- name: Publish to PyPI
|
||||
env:
|
||||
POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_API_TOKEN }}
|
||||
run: |
|
||||
poetry publish
|
||||
36
libs/langchain/langchain/cli/create_repo/templates/poetry/.github/workflows/test.yml
vendored
Normal file
36
libs/langchain/langchain/cli/create_repo/templates/poetry/.github/workflows/test.yml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
name: test
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.4.2"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: "1.4.2"
|
||||
install-command: |
|
||||
echo "Running tests, installing dependencies with poetry..."
|
||||
poetry install
|
||||
- name: Run tests
|
||||
run: |
|
||||
make test
|
||||
shell: bash
|
||||
@@ -0,0 +1,11 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . /app
|
||||
|
||||
RUN pip install poetry && \
|
||||
poetry config virtualenvs.create false && \
|
||||
poetry install --no-interaction --no-ansi --only main
|
||||
|
||||
CMD exec uvicorn ____project_name_identifier.server:app --host 0.0.0.0 --port $PORT
|
||||
@@ -0,0 +1,38 @@
|
||||
.PHONY: all format lint test help
|
||||
|
||||
# Default target executed when no arguments are given to make.
|
||||
all: help
|
||||
|
||||
start:
|
||||
poetry run uvicorn ____project_name_identifier.server:app --reload
|
||||
|
||||
# Define a variable for the test file path.
|
||||
TEST_FILE ?= tests/
|
||||
|
||||
test:
|
||||
poetry run pytest $(TEST_FILE)
|
||||
|
||||
# Define a variable for Python and notebook files.
|
||||
PYTHON_FILES=.
|
||||
lint format: PYTHON_FILES=.
|
||||
lint_diff format_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d main | grep -E '\.py$$|\.ipynb$$')
|
||||
|
||||
lint lint_diff:
|
||||
poetry run mypy $(PYTHON_FILES)
|
||||
poetry run black $(PYTHON_FILES) --check
|
||||
poetry run ruff .
|
||||
|
||||
format format_diff:
|
||||
poetry run black $(PYTHON_FILES)
|
||||
poetry run ruff --select I --fix $(PYTHON_FILES)
|
||||
|
||||
######################
|
||||
# HELP
|
||||
######################
|
||||
|
||||
help:
|
||||
@echo '----'
|
||||
@echo 'make start - start server'
|
||||
@echo 'make format - run code formatters'
|
||||
@echo 'make lint - run linters'
|
||||
@echo 'make test - run unit tests'
|
||||
@@ -0,0 +1,46 @@
|
||||
# `____project_name`
|
||||
|
||||
<!--- This is a LangChain project bootstrapped by [LangChain CLI](https://github.com/langchain-ai/langchain). --->
|
||||
|
||||
## Customise
|
||||
|
||||
To customise this project, edit the following files:
|
||||
|
||||
- `____project_name_identifier/chain.py` contains an example chain, which you can edit to suit your needs.
|
||||
- `____project_name_identifier/server.py` contains a FastAPI app that serves that chain using `langserve`. You can edit this to add more endpoints or customise your server.
|
||||
- `tests/test_chain.py` contains tests for the chain. You can edit this to add more tests.
|
||||
- `pyproject.toml` contains the project metadata, including the project name, version, and dependencies. You can edit this to add more dependencies or customise your project metadata.
|
||||
|
||||
## Install dependencies
|
||||
|
||||
```bash
|
||||
poetry install
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
To run the project locally, run
|
||||
|
||||
```
|
||||
make start
|
||||
```
|
||||
|
||||
This will launch a webserver on port 8000.
|
||||
|
||||
## Deploy
|
||||
|
||||
To deploy the project, first build the docker image:
|
||||
|
||||
```
|
||||
docker build . -t ____project_name_identifier:latest
|
||||
```
|
||||
|
||||
Then run the image:
|
||||
|
||||
```
|
||||
docker run -p 8000:8000 ____project_name_identifier:latest
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
For information on how to set up your dev environment and contribute, see [here](.github/CONTRIBUTING.md).
|
||||
@@ -0,0 +1,2 @@
|
||||
[virtualenvs]
|
||||
in-project = true
|
||||
@@ -0,0 +1,52 @@
|
||||
[tool.poetry]
|
||||
name = "____project_name"
|
||||
version = "0.0.1"
|
||||
description = ""
|
||||
authors = ["____author_name <____author_email>"]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
packages = [{include = "____project_name_identifier"}]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.8.1"
|
||||
langchain = "^____langchain_version"
|
||||
langserve = { version = ">=0.0.6", extras = ["server"] }
|
||||
tiktoken = "^0.4.0"
|
||||
openai = "^0.27.8"
|
||||
fastapi = "^0.96.0"
|
||||
uvicorn = {extras = ["standard"], version = "^0.22.0"}
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^7.4.0"
|
||||
pytest-asyncio = "^0.21.1"
|
||||
mypy = "^1.4.1"
|
||||
ruff = "^0.0.278"
|
||||
black = "^23.7.0"
|
||||
syrupy = "^4.0.2"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.ruff]
|
||||
select = [
|
||||
"E", # pycodestyle
|
||||
"F", # pyflakes
|
||||
"I", # isort
|
||||
]
|
||||
|
||||
[tool.mypy]
|
||||
ignore_missing_imports = "True"
|
||||
disallow_untyped_defs = "True"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
# --strict-markers will raise errors on unknown marks.
|
||||
# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
|
||||
#
|
||||
# https://docs.pytest.org/en/7.1.x/reference/reference.html
|
||||
# --strict-config any warnings encountered while parsing the `pytest`
|
||||
# section of the configuration file raise errors.
|
||||
#
|
||||
# https://github.com/tophat/syrupy
|
||||
# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite.
|
||||
addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused"
|
||||
157
libs/langchain/langchain/cli/create_repo/templates/repo/.gitignore
vendored
Normal file
157
libs/langchain/langchain/cli/create_repo/templates/repo/.gitignore
vendored
Normal file
@@ -0,0 +1,157 @@
|
||||
.vs/
|
||||
.vscode/
|
||||
.idea/
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
pip-wheel-metadata/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
docs/docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
notebooks/
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.envrc
|
||||
.venv
|
||||
.venvs
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# macOS display setting files
|
||||
.DS_Store
|
||||
|
||||
# Wandb directory
|
||||
wandb/
|
||||
|
||||
# asdf tool versions
|
||||
.tool-versions
|
||||
/.ruff_cache/
|
||||
|
||||
*.pkl
|
||||
*.bin
|
||||
|
||||
# integration test artifacts
|
||||
data_map*
|
||||
\[('_type', 'fake'), ('stop', None)]
|
||||
|
||||
# Replit files
|
||||
*replit*
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
The MIT License
|
||||
|
||||
Copyright (c) ____author_name
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
@@ -0,0 +1,19 @@
|
||||
# `____project_name`
|
||||
|
||||
<!--- This is a LangChain project bootstrapped by [LangChain CLI](https://github.com/langchain-ai/langchain). --->
|
||||
|
||||
## Run locally
|
||||
|
||||
To install run:
|
||||
|
||||
```bash
|
||||
poetry
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
<!--- Add detailed documentation on how to use this package. --->
|
||||
|
||||
## Contributing
|
||||
|
||||
For information on how to set up your dev environment and contribute, see [here](.github/CONTRIBUTING.md).
|
||||
@@ -0,0 +1,12 @@
|
||||
"""____project_name_identifier package."""
|
||||
from importlib import metadata
|
||||
|
||||
from ____project_name_identifier.chain import get_chain
|
||||
|
||||
try:
|
||||
__version__ = metadata.version(__package__)
|
||||
except metadata.PackageNotFoundError:
|
||||
# Case where package metadata is not available.
|
||||
__version__ = ""
|
||||
|
||||
__all__ = [__version__, "get_chain"]
|
||||
@@ -0,0 +1,29 @@
|
||||
"""This is a template for a custom chain.
|
||||
|
||||
Edit this file to implement your chain logic.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from langchain.chat_models.openai import ChatOpenAI
|
||||
from langchain.output_parsers.list import CommaSeparatedListOutputParser
|
||||
from langchain.prompts.chat import ChatPromptTemplate
|
||||
from langchain.schema.language_model import BaseLanguageModel
|
||||
from langchain.schema.runnable import Runnable
|
||||
|
||||
template = """You are a helpful assistant who generates comma separated lists.
|
||||
A user will pass in a category, and you should generate 5 objects in that category in a comma separated list.
|
||||
ONLY return a comma separated list, and nothing more.""" # noqa: E501
|
||||
human_template = "{text}"
|
||||
|
||||
|
||||
def get_chain(model: Optional[BaseLanguageModel] = None) -> Runnable:
|
||||
"""Return a chain."""
|
||||
model = model or ChatOpenAI()
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("system", template),
|
||||
("human", human_template),
|
||||
]
|
||||
)
|
||||
return prompt | model | CommaSeparatedListOutputParser()
|
||||
@@ -0,0 +1,17 @@
|
||||
from fastapi import FastAPI
|
||||
from langserve import add_routes
|
||||
|
||||
from ____project_name_identifier.chain import get_chain
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
add_routes(
|
||||
app,
|
||||
get_chain(),
|
||||
config_keys=["tags"],
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(app, host="0.0.0.0", port=8001)
|
||||
@@ -0,0 +1,10 @@
|
||||
from ____project_name_identifier import get_chain
|
||||
|
||||
|
||||
def test_my_chain() -> None:
|
||||
"""Edit this test to test your chain."""
|
||||
from langchain.llms.human import HumanInputLLM
|
||||
|
||||
llm = HumanInputLLM(input_func=lambda *args, **kwargs: "foo")
|
||||
chain = get_chain(llm)
|
||||
chain.invoke({"text": "foo"})
|
||||
29
libs/langchain/langchain/cli/create_repo/user_info.py
Normal file
29
libs/langchain/langchain/cli/create_repo/user_info.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""Look up user information from local git."""
|
||||
import subprocess
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def get_git_user_name() -> Optional[str]:
|
||||
"""Get the user's name from git, if it is configured, otherwise None."""
|
||||
try:
|
||||
return (
|
||||
subprocess.run(["git", "config", "--get", "user.name"], capture_output=True)
|
||||
.stdout.decode()
|
||||
.strip()
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
|
||||
def get_git_user_email() -> Optional[str]:
|
||||
"""Get the user's email from git if it is configured, otherwise None."""
|
||||
try:
|
||||
return (
|
||||
subprocess.run(
|
||||
["git", "config", "--get", "user.email"], capture_output=True
|
||||
)
|
||||
.stdout.decode()
|
||||
.strip()
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
@@ -4,6 +4,7 @@ from typing import Any, Callable, Optional, Union, cast
|
||||
|
||||
from langchain.evaluation.schema import StringEvaluator
|
||||
from langchain.output_parsers.json import parse_json_markdown
|
||||
import json
|
||||
|
||||
|
||||
class JsonValidityEvaluator(StringEvaluator):
|
||||
@@ -51,7 +52,7 @@ class JsonValidityEvaluator(StringEvaluator):
|
||||
prediction: str,
|
||||
input: Optional[str] = None,
|
||||
reference: Optional[str] = None,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Evaluate the prediction string.
|
||||
|
||||
@@ -131,7 +132,7 @@ class JsonEqualityEvaluator(StringEvaluator):
|
||||
prediction: str,
|
||||
input: Optional[str] = None,
|
||||
reference: Optional[str] = None,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Evaluate the prediction string.
|
||||
|
||||
@@ -151,3 +152,92 @@ class JsonEqualityEvaluator(StringEvaluator):
|
||||
parsed = sorted(parsed, key=lambda x: str(x))
|
||||
label = sorted(label, key=lambda x: str(x))
|
||||
return {"score": self.operator(parsed, label)}
|
||||
|
||||
|
||||
class JsonSchemaEvaluator(StringEvaluator):
|
||||
"""Evaluates whether the prediction conforms to a given JSON schema.
|
||||
|
||||
This evaluator checks if the prediction, when parsed as JSON, conforms to a
|
||||
specified JSON schema. It does not require an input string, but does require
|
||||
a reference string which should be the JSON schema.
|
||||
|
||||
Attributes:
|
||||
requires_input (bool): Whether this evaluator requires an
|
||||
input string. Always False.
|
||||
requires_reference (bool): Whether this evaluator requires
|
||||
a reference string. Always True.
|
||||
evaluation_name (str): The name of the evaluation metric.
|
||||
Always "json_schema".
|
||||
|
||||
Examples:
|
||||
>>> evaluator = JsonSchemaEvaluator()
|
||||
>>> schema = {
|
||||
... "type": "object",
|
||||
... "properties": {
|
||||
... "name": {"type": "string"},
|
||||
... "age": {"type": "integer"}
|
||||
... },
|
||||
... "required": ["name", "age"]
|
||||
... }
|
||||
>>> evaluator.evaluate_strings('{"name": "John", "age": 30}', reference=schema)
|
||||
{'score': 1}
|
||||
>>> evaluator.evaluate_strings('{"name": "John", "age": "30"}', reference=schema)
|
||||
{'score': 0, 'reasoning': '30 is not of type \'integer\''}
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
super().__init__()
|
||||
|
||||
@property
|
||||
def requires_input(self) -> bool:
|
||||
return False
|
||||
|
||||
@property
|
||||
def requires_reference(self) -> bool:
|
||||
return True
|
||||
|
||||
@property
|
||||
def evaluation_name(self) -> str:
|
||||
return "json_schema"
|
||||
|
||||
def _evaluate_strings(
|
||||
self,
|
||||
prediction: str,
|
||||
input: Optional[str] = None,
|
||||
reference: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Evaluate the prediction string.
|
||||
|
||||
Args:
|
||||
prediction (str): The prediction string to evaluate.
|
||||
input (str, optional): Not used in this evaluator.
|
||||
reference (str): The JSON schema to validate against.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing the evaluation score. The score is 1 if
|
||||
the prediction conforms to the schema, and 0 otherwise.
|
||||
If the prediction does not conform to the schema, the dictionary
|
||||
also contains a "reasoning" field with the error message.
|
||||
"""
|
||||
try:
|
||||
import jsonschema
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The jsonschema package is required for the JsonSchemaEvaluator. "
|
||||
"You can install it with `pip install jsonschema`."
|
||||
)
|
||||
if isinstance(reference, str):
|
||||
schema_json = parse_json_markdown(reference)
|
||||
else:
|
||||
schema_json = reference
|
||||
try:
|
||||
prediction_json = parse_json_markdown(prediction)
|
||||
# Validate the prediction against the schema
|
||||
jsonschema.validate(instance=prediction_json, schema=schema_json)
|
||||
return {"score": 1}
|
||||
except jsonschema.exceptions.ValidationError as e:
|
||||
return {"score": 0, "reasoning": str(e)}
|
||||
except json.JSONDecodeError as e:
|
||||
return {"score": 0, "reasoning": f"JSON Decode Error: {str(e)}"}
|
||||
|
||||
@@ -1,19 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Sequence, Union
|
||||
from typing import Any, Dict, List, Literal, Optional, Sequence, Union
|
||||
|
||||
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
||||
from langchain.schema import BaseRetriever, Document
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vespa.application import Vespa
|
||||
|
||||
|
||||
class VespaRetriever(BaseRetriever):
|
||||
"""`Vespa` retriever."""
|
||||
|
||||
app: Vespa
|
||||
app: Any
|
||||
"""Vespa application to query."""
|
||||
body: Dict
|
||||
"""Body of the query."""
|
||||
|
||||
@@ -76,6 +76,7 @@ from langchain.vectorstores.usearch import USearch
|
||||
from langchain.vectorstores.vald import Vald
|
||||
from langchain.vectorstores.vearch import Vearch
|
||||
from langchain.vectorstores.vectara import Vectara
|
||||
from langchain.vectorstores.vespa import VespaStore
|
||||
from langchain.vectorstores.weaviate import Weaviate
|
||||
from langchain.vectorstores.zep import ZepVectorStore
|
||||
from langchain.vectorstores.zilliz import Zilliz
|
||||
@@ -143,6 +144,7 @@ __all__ = [
|
||||
"Vearch",
|
||||
"Vectara",
|
||||
"VectorStore",
|
||||
"VespaStore",
|
||||
"Weaviate",
|
||||
"ZepVectorStore",
|
||||
"Zilliz",
|
||||
|
||||
@@ -7,7 +7,12 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
|
||||
import sqlalchemy
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.dialects.postgresql import JSON, UUID
|
||||
from sqlalchemy.orm import Session, declarative_base, relationship
|
||||
from sqlalchemy.orm import Session, relationship
|
||||
|
||||
try:
|
||||
from sqlalchemy.orm import declarative_base
|
||||
except ImportError:
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
|
||||
@@ -23,7 +23,12 @@ import numpy as np
|
||||
import sqlalchemy
|
||||
from sqlalchemy import delete
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import Session, declarative_base
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
try:
|
||||
from sqlalchemy.orm import declarative_base
|
||||
except ImportError:
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
|
||||
267
libs/langchain/langchain/vectorstores/vespa.py
Normal file
267
libs/langchain/langchain/vectorstores/vespa.py
Normal file
@@ -0,0 +1,267 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore, VectorStoreRetriever
|
||||
|
||||
|
||||
class VespaStore(VectorStore):
|
||||
"""
|
||||
`Vespa` vector store.
|
||||
|
||||
To use, you should have the python client library ``pyvespa`` installed.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.vectorstores import VespaStore
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from vespa.application import Vespa
|
||||
|
||||
# Create a vespa client dependent upon your application,
|
||||
# e.g. either connecting to Vespa Cloud or a local deployment
|
||||
# such as Docker. Please refer to the PyVespa documentation on
|
||||
# how to initialize the client.
|
||||
|
||||
vespa_app = Vespa(url="...", port=..., application_package=...)
|
||||
|
||||
# You need to instruct LangChain on which fields to use for embeddings
|
||||
vespa_config = dict(
|
||||
page_content_field="text",
|
||||
embedding_field="embedding",
|
||||
input_field="query_embedding",
|
||||
metadata_fields=["date", "rating", "author"]
|
||||
)
|
||||
|
||||
embedding_function = OpenAIEmbeddings()
|
||||
vectorstore = VespaStore(vespa_app, embedding_function, **vespa_config)
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
app: Any,
|
||||
embedding_function: Optional[Embeddings] = None,
|
||||
page_content_field: Optional[str] = None,
|
||||
embedding_field: Optional[str] = None,
|
||||
input_field: Optional[str] = None,
|
||||
metadata_fields: Optional[List[str]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize with a PyVespa client.
|
||||
"""
|
||||
try:
|
||||
from vespa.application import Vespa
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import Vespa python package. "
|
||||
"Please install it with `pip install pyvespa`."
|
||||
)
|
||||
if not isinstance(app, Vespa):
|
||||
raise ValueError(
|
||||
f"app should be an instance of vespa.application.Vespa, got {type(app)}"
|
||||
)
|
||||
|
||||
self._vespa_app = app
|
||||
self._embedding_function = embedding_function
|
||||
self._page_content_field = page_content_field
|
||||
self._embedding_field = embedding_field
|
||||
self._input_field = input_field
|
||||
self._metadata_fields = metadata_fields
|
||||
|
||||
def add_texts(
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""
|
||||
Add texts to the vectorstore.
|
||||
|
||||
Args:
|
||||
texts: Iterable of strings to add to the vectorstore.
|
||||
metadatas: Optional list of metadatas associated with the texts.
|
||||
ids: Optional list of ids associated with the texts.
|
||||
kwargs: vectorstore specific parameters
|
||||
|
||||
Returns:
|
||||
List of ids from adding the texts into the vectorstore.
|
||||
"""
|
||||
|
||||
embeddings = None
|
||||
if self._embedding_function is not None:
|
||||
embeddings = self._embedding_function.embed_documents(list(texts))
|
||||
|
||||
if ids is None:
|
||||
ids = [str(f"{i+1}") for i, _ in enumerate(texts)]
|
||||
|
||||
batch = []
|
||||
for i, text in enumerate(texts):
|
||||
fields: Dict[str, Union[str, List[float]]] = {}
|
||||
if self._page_content_field is not None:
|
||||
fields[self._page_content_field] = text
|
||||
if self._embedding_field is not None and embeddings is not None:
|
||||
fields[self._embedding_field] = embeddings[i]
|
||||
if metadatas is not None and self._metadata_fields is not None:
|
||||
for metadata_field in self._metadata_fields:
|
||||
if metadata_field in metadatas[i]:
|
||||
fields[metadata_field] = metadatas[i][metadata_field]
|
||||
batch.append({"id": ids[i], "fields": fields})
|
||||
|
||||
results = self._vespa_app.feed_batch(batch)
|
||||
for result in results:
|
||||
if not (str(result.status_code).startswith("2")):
|
||||
raise RuntimeError(
|
||||
f"Could not add document to Vespa. "
|
||||
f"Error code: {result.status_code}. "
|
||||
f"Message: {result.json['message']}"
|
||||
)
|
||||
return ids
|
||||
|
||||
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
|
||||
if ids is None:
|
||||
return False
|
||||
batch = [{"id": id} for id in ids]
|
||||
result = self._vespa_app.delete_batch(batch)
|
||||
return sum([0 if r.status_code == 200 else 1 for r in result]) == 0
|
||||
|
||||
def _create_query(
|
||||
self, query_embedding: List[float], k: int = 4, **kwargs: Any
|
||||
) -> Dict:
|
||||
hits = k
|
||||
doc_embedding_field = self._embedding_field
|
||||
input_embedding_field = self._input_field
|
||||
ranking_function = kwargs["ranking"] if "ranking" in kwargs else "default"
|
||||
filter = kwargs["filter"] if "filter" in kwargs else None
|
||||
|
||||
approximate = kwargs["approximate"] if "approximate" in kwargs else False
|
||||
approximate = "true" if approximate else "false"
|
||||
|
||||
yql = "select * from sources * where "
|
||||
yql += f"{{targetHits: {hits}, approximate: {approximate}}}"
|
||||
yql += f"nearestNeighbor({doc_embedding_field}, {input_embedding_field})"
|
||||
if filter is not None:
|
||||
yql += f" and {filter}"
|
||||
|
||||
query = {
|
||||
"yql": yql,
|
||||
f"input.query({input_embedding_field})": query_embedding,
|
||||
"ranking": ranking_function,
|
||||
"hits": hits,
|
||||
}
|
||||
return query
|
||||
|
||||
def similarity_search_by_vector_with_score(
|
||||
self, query_embedding: List[float], k: int = 4, **kwargs: Any
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""
|
||||
Performs similarity search from a embeddings vector.
|
||||
|
||||
Args:
|
||||
query_embedding: Embeddings vector to search for.
|
||||
k: Number of results to return.
|
||||
custom_query: Use this custom query instead default query (kwargs)
|
||||
kwargs: other vector store specific parameters
|
||||
|
||||
Returns:
|
||||
List of ids from adding the texts into the vectorstore.
|
||||
"""
|
||||
if "custom_query" in kwargs:
|
||||
query = kwargs["custom_query"]
|
||||
else:
|
||||
query = self._create_query(query_embedding, k, **kwargs)
|
||||
|
||||
try:
|
||||
response = self._vespa_app.query(body=query)
|
||||
except Exception as e:
|
||||
raise RuntimeError(
|
||||
f"Could not retrieve data from Vespa: "
|
||||
f"{e.args[0][0]['summary']}. "
|
||||
f"Error: {e.args[0][0]['message']}"
|
||||
)
|
||||
if not str(response.status_code).startswith("2"):
|
||||
raise RuntimeError(
|
||||
f"Could not retrieve data from Vespa. "
|
||||
f"Error code: {response.status_code}. "
|
||||
f"Message: {response.json['message']}"
|
||||
)
|
||||
|
||||
root = response.json["root"]
|
||||
if "errors" in root:
|
||||
import json
|
||||
|
||||
raise RuntimeError(json.dumps(root["errors"]))
|
||||
|
||||
if response is None or response.hits is None:
|
||||
return []
|
||||
|
||||
docs = []
|
||||
for child in response.hits:
|
||||
page_content = child["fields"][self._page_content_field]
|
||||
score = child["relevance"]
|
||||
metadata = {"id": child["id"]}
|
||||
if self._metadata_fields is not None:
|
||||
for field in self._metadata_fields:
|
||||
metadata[field] = child["fields"].get(field)
|
||||
doc = Document(page_content=page_content, metadata=metadata)
|
||||
docs.append((doc, score))
|
||||
return docs
|
||||
|
||||
def similarity_search_by_vector(
|
||||
self, embedding: List[float], k: int = 4, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
results = self.similarity_search_by_vector_with_score(embedding, k, **kwargs)
|
||||
return [r[0] for r in results]
|
||||
|
||||
def similarity_search_with_score(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> List[Tuple[Document, float]]:
|
||||
query_emb = []
|
||||
if self._embedding_function is not None:
|
||||
query_emb = self._embedding_function.embed_query(query)
|
||||
return self.similarity_search_by_vector_with_score(query_emb, k, **kwargs)
|
||||
|
||||
def similarity_search(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
results = self.similarity_search_with_score(query, k, **kwargs)
|
||||
return [r[0] for r in results]
|
||||
|
||||
def max_marginal_relevance_search(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
raise NotImplementedError("MMR search not implemented")
|
||||
|
||||
def max_marginal_relevance_search_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
raise NotImplementedError("MMR search by vector not implemented")
|
||||
|
||||
@classmethod
|
||||
def from_texts(
|
||||
cls: Type[VespaStore],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> VespaStore:
|
||||
vespa = cls(embedding_function=embedding, **kwargs)
|
||||
vespa.add_texts(texts=texts, metadatas=metadatas, ids=ids)
|
||||
return vespa
|
||||
|
||||
def as_retriever(self, **kwargs: Any) -> VectorStoreRetriever:
|
||||
return super().as_retriever(**kwargs)
|
||||
48
libs/langchain/poetry.lock
generated
48
libs/langchain/poetry.lock
generated
@@ -2846,6 +2846,7 @@ files = [
|
||||
{file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"},
|
||||
{file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"},
|
||||
{file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"},
|
||||
{file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d967650d3f56af314b72df7089d96cda1083a7fc2da05b375d2bc48c82ab3f3c"},
|
||||
{file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"},
|
||||
{file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"},
|
||||
{file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"},
|
||||
@@ -2854,6 +2855,7 @@ files = [
|
||||
{file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"},
|
||||
{file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"},
|
||||
{file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"},
|
||||
{file = "greenlet-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d4606a527e30548153be1a9f155f4e283d109ffba663a15856089fb55f933e47"},
|
||||
{file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"},
|
||||
{file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"},
|
||||
{file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"},
|
||||
@@ -2883,6 +2885,7 @@ files = [
|
||||
{file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"},
|
||||
{file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"},
|
||||
{file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"},
|
||||
{file = "greenlet-2.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1087300cf9700bbf455b1b97e24db18f2f77b55302a68272c56209d5587c12d1"},
|
||||
{file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"},
|
||||
{file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"},
|
||||
{file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"},
|
||||
@@ -2891,6 +2894,7 @@ files = [
|
||||
{file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"},
|
||||
{file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"},
|
||||
{file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"},
|
||||
{file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8512a0c38cfd4e66a858ddd1b17705587900dd760c6003998e9472b77b56d417"},
|
||||
{file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"},
|
||||
{file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"},
|
||||
{file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"},
|
||||
@@ -4478,6 +4482,16 @@ files = [
|
||||
{file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
|
||||
{file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
|
||||
{file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
|
||||
{file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
|
||||
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
|
||||
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
|
||||
@@ -7437,6 +7451,7 @@ files = [
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
|
||||
@@ -7444,8 +7459,15 @@ files = [
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
|
||||
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
|
||||
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
|
||||
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
|
||||
@@ -7462,6 +7484,7 @@ files = [
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
|
||||
@@ -7469,6 +7492,7 @@ files = [
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
|
||||
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
|
||||
@@ -9678,6 +9702,27 @@ torchhub = ["filelock", "huggingface-hub (>=0.15.1,<1.0)", "importlib-metadata",
|
||||
video = ["av (==9.2.0)", "decord (==0.6.0)"]
|
||||
vision = ["Pillow (<10.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "typer"
|
||||
version = "0.9.0"
|
||||
description = "Typer, build great CLIs. Easy to code. Based on Python type hints."
|
||||
optional = true
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "typer-0.9.0-py3-none-any.whl", hash = "sha256:5d96d986a21493606a358cae4461bd8cdf83cbf33a5aa950ae629ca3b51467ee"},
|
||||
{file = "typer-0.9.0.tar.gz", hash = "sha256:50922fd79aea2f4751a8e0408ff10d2662bd0c8bbfa84755a699f3bada2978b2"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
click = ">=7.1.1,<9.0.0"
|
||||
typing-extensions = ">=3.7.4.3"
|
||||
|
||||
[package.extras]
|
||||
all = ["colorama (>=0.4.3,<0.5.0)", "rich (>=10.11.0,<14.0.0)", "shellingham (>=1.3.0,<2.0.0)"]
|
||||
dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "pre-commit (>=2.17.0,<3.0.0)"]
|
||||
doc = ["cairosvg (>=2.5.2,<3.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pillow (>=9.3.0,<10.0.0)"]
|
||||
test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.910)", "pytest (>=4.4.0,<8.0.0)", "pytest-cov (>=2.10.0,<5.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<4.0.0)", "rich (>=10.11.0,<14.0.0)", "shellingham (>=1.3.0,<2.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "types-chardet"
|
||||
version = "5.0.4.6"
|
||||
@@ -10602,6 +10647,7 @@ cffi = ["cffi (>=1.11)"]
|
||||
all = ["O365", "aleph-alpha-client", "amadeus", "arxiv", "atlassian-python-api", "awadb", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-cosmos", "azure-identity", "beautifulsoup4", "clarifai", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "esprima", "faiss-cpu", "google-api-python-client", "google-auth", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jinja2", "jq", "lancedb", "langkit", "lark", "libdeeplake", "librosa", "lxml", "manifest-ml", "marqo", "momento", "nebula3-python", "neo4j", "networkx", "nlpcloud", "nltk", "nomic", "openai", "openlm", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pymongo", "pyowm", "pypdf", "pytesseract", "python-arango", "pyvespa", "qdrant-client", "rdflib", "redis", "requests-toolbelt", "sentence-transformers", "singlestoredb", "tensorflow-text", "tigrisdb", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"]
|
||||
azure = ["azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-core", "azure-cosmos", "azure-identity", "azure-search-documents", "openai"]
|
||||
clarifai = ["clarifai"]
|
||||
cli = ["typer"]
|
||||
cohere = ["cohere"]
|
||||
docarray = ["docarray"]
|
||||
embeddings = ["sentence-transformers"]
|
||||
@@ -10615,4 +10661,4 @@ text-helpers = ["chardet"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<4.0"
|
||||
content-hash = "d40457accab6666901b6d8c2fd0d911814ab8f28265637c189f2512e1496fd92"
|
||||
content-hash = "498a5510e617012122596bf4e947f7466d7f574e7c7f1bb69e264ff0990f2277"
|
||||
|
||||
@@ -9,6 +9,7 @@ repository = "https://github.com/langchain-ai/langchain"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
langchain-server = "langchain.server:main"
|
||||
langchain = "langchain.cli.cli:app"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8.1,<4.0"
|
||||
@@ -133,6 +134,7 @@ motor = {version = "^3.3.1", optional = true}
|
||||
anyio = "<4.0"
|
||||
jsonpatch = "^1.33"
|
||||
timescale-vector = {version = "^0.0.1", optional = true}
|
||||
typer = {version= "^0.9.0", optional = true}
|
||||
anthropic = {version = "^0.3.11", optional = true}
|
||||
|
||||
|
||||
@@ -302,6 +304,10 @@ all = [
|
||||
"python-arango",
|
||||
]
|
||||
|
||||
cli = [
|
||||
"typer"
|
||||
]
|
||||
|
||||
# An extra used to be able to add extended testing.
|
||||
# Please use new-line on formatting to make it easier to add new packages without
|
||||
# merge-conflicts
|
||||
|
||||
@@ -1239,7 +1239,7 @@ async def test_prompt() -> None:
|
||||
assert len(stream_log[0].ops) == 1
|
||||
assert stream_log[0].ops[0]["op"] == "replace"
|
||||
assert stream_log[0].ops[0]["path"] == ""
|
||||
assert stream_log[0].ops[0]["value"]["logs"] == []
|
||||
assert stream_log[0].ops[0]["value"]["logs"] == {}
|
||||
assert stream_log[0].ops[0]["value"]["final_output"] is None
|
||||
assert stream_log[0].ops[0]["value"]["streamed_output"] == []
|
||||
assert isinstance(stream_log[0].ops[0]["value"]["id"], str)
|
||||
@@ -1249,40 +1249,12 @@ async def test_prompt() -> None:
|
||||
{
|
||||
"op": "replace",
|
||||
"path": "/final_output",
|
||||
"value": {
|
||||
"id": ["langchain", "prompts", "chat", "ChatPromptValue"],
|
||||
"kwargs": {
|
||||
"messages": [
|
||||
{
|
||||
"id": [
|
||||
"langchain",
|
||||
"schema",
|
||||
"messages",
|
||||
"SystemMessage",
|
||||
],
|
||||
"kwargs": {"content": "You are a nice " "assistant."},
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
},
|
||||
{
|
||||
"id": [
|
||||
"langchain",
|
||||
"schema",
|
||||
"messages",
|
||||
"HumanMessage",
|
||||
],
|
||||
"kwargs": {
|
||||
"additional_kwargs": {},
|
||||
"content": "What is your " "name?",
|
||||
},
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
},
|
||||
]
|
||||
},
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
},
|
||||
"value": ChatPromptValue(
|
||||
messages=[
|
||||
SystemMessage(content="You are a nice assistant."),
|
||||
HumanMessage(content="What is your name?"),
|
||||
]
|
||||
),
|
||||
}
|
||||
),
|
||||
RunLogPatch({"op": "add", "path": "/streamed_output/-", "value": expected}),
|
||||
@@ -1525,7 +1497,7 @@ async def test_prompt_with_llm(
|
||||
"op": "replace",
|
||||
"path": "",
|
||||
"value": {
|
||||
"logs": [],
|
||||
"logs": {},
|
||||
"final_output": None,
|
||||
"streamed_output": [],
|
||||
},
|
||||
@@ -1534,7 +1506,7 @@ async def test_prompt_with_llm(
|
||||
RunLogPatch(
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/0",
|
||||
"path": "/logs/ChatPromptTemplate",
|
||||
"value": {
|
||||
"end_time": None,
|
||||
"final_output": None,
|
||||
@@ -1550,55 +1522,24 @@ async def test_prompt_with_llm(
|
||||
RunLogPatch(
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/0/final_output",
|
||||
"value": {
|
||||
"id": ["langchain", "prompts", "chat", "ChatPromptValue"],
|
||||
"kwargs": {
|
||||
"messages": [
|
||||
{
|
||||
"id": [
|
||||
"langchain",
|
||||
"schema",
|
||||
"messages",
|
||||
"SystemMessage",
|
||||
],
|
||||
"kwargs": {
|
||||
"additional_kwargs": {},
|
||||
"content": "You are a nice " "assistant.",
|
||||
},
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
},
|
||||
{
|
||||
"id": [
|
||||
"langchain",
|
||||
"schema",
|
||||
"messages",
|
||||
"HumanMessage",
|
||||
],
|
||||
"kwargs": {
|
||||
"additional_kwargs": {},
|
||||
"content": "What is your " "name?",
|
||||
},
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
},
|
||||
]
|
||||
},
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
},
|
||||
"path": "/logs/ChatPromptTemplate/final_output",
|
||||
"value": ChatPromptValue(
|
||||
messages=[
|
||||
SystemMessage(content="You are a nice assistant."),
|
||||
HumanMessage(content="What is your name?"),
|
||||
]
|
||||
),
|
||||
},
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/0/end_time",
|
||||
"path": "/logs/ChatPromptTemplate/end_time",
|
||||
"value": "2023-01-01T00:00:00.000",
|
||||
},
|
||||
),
|
||||
RunLogPatch(
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/1",
|
||||
"path": "/logs/FakeListLLM",
|
||||
"value": {
|
||||
"end_time": None,
|
||||
"final_output": None,
|
||||
@@ -1614,7 +1555,7 @@ async def test_prompt_with_llm(
|
||||
RunLogPatch(
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/1/final_output",
|
||||
"path": "/logs/FakeListLLM/final_output",
|
||||
"value": {
|
||||
"generations": [[{"generation_info": None, "text": "foo"}]],
|
||||
"llm_output": None,
|
||||
@@ -1623,7 +1564,7 @@ async def test_prompt_with_llm(
|
||||
},
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/1/end_time",
|
||||
"path": "/logs/FakeListLLM/end_time",
|
||||
"value": "2023-01-01T00:00:00.000",
|
||||
},
|
||||
),
|
||||
@@ -1634,6 +1575,192 @@ async def test_prompt_with_llm(
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@freeze_time("2023-01-01")
|
||||
async def test_stream_log_retriever() -> None:
|
||||
prompt = (
|
||||
SystemMessagePromptTemplate.from_template("You are a nice assistant.")
|
||||
+ "{documents}"
|
||||
+ "{question}"
|
||||
)
|
||||
llm = FakeListLLM(responses=["foo", "bar"])
|
||||
|
||||
chain: Runnable = (
|
||||
{"documents": FakeRetriever(), "question": itemgetter("question")}
|
||||
| prompt
|
||||
| {"one": llm, "two": llm}
|
||||
)
|
||||
|
||||
stream_log = [
|
||||
part async for part in chain.astream_log({"question": "What is your name?"})
|
||||
]
|
||||
|
||||
# remove ids from logs
|
||||
for part in stream_log:
|
||||
for op in part.ops:
|
||||
if (
|
||||
isinstance(op["value"], dict)
|
||||
and "id" in op["value"]
|
||||
and not isinstance(op["value"]["id"], list) # serialized lc id
|
||||
):
|
||||
del op["value"]["id"]
|
||||
|
||||
assert stream_log[:-9] == [
|
||||
RunLogPatch(
|
||||
{
|
||||
"op": "replace",
|
||||
"path": "",
|
||||
"value": {
|
||||
"logs": {},
|
||||
"final_output": None,
|
||||
"streamed_output": [],
|
||||
},
|
||||
}
|
||||
),
|
||||
RunLogPatch(
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/RunnableMap",
|
||||
"value": {
|
||||
"end_time": None,
|
||||
"final_output": None,
|
||||
"metadata": {},
|
||||
"name": "RunnableMap",
|
||||
"start_time": "2023-01-01T00:00:00.000",
|
||||
"streamed_output_str": [],
|
||||
"tags": ["seq:step:1"],
|
||||
"type": "chain",
|
||||
},
|
||||
}
|
||||
),
|
||||
RunLogPatch(
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/RunnableLambda",
|
||||
"value": {
|
||||
"end_time": None,
|
||||
"final_output": None,
|
||||
"metadata": {},
|
||||
"name": "RunnableLambda",
|
||||
"start_time": "2023-01-01T00:00:00.000",
|
||||
"streamed_output_str": [],
|
||||
"tags": ["map:key:question"],
|
||||
"type": "chain",
|
||||
},
|
||||
}
|
||||
),
|
||||
RunLogPatch(
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/RunnableLambda/final_output",
|
||||
"value": {"output": "What is your name?"},
|
||||
},
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/RunnableLambda/end_time",
|
||||
"value": "2023-01-01T00:00:00.000",
|
||||
},
|
||||
),
|
||||
RunLogPatch(
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/Retriever",
|
||||
"value": {
|
||||
"end_time": None,
|
||||
"final_output": None,
|
||||
"metadata": {},
|
||||
"name": "Retriever",
|
||||
"start_time": "2023-01-01T00:00:00.000",
|
||||
"streamed_output_str": [],
|
||||
"tags": ["map:key:documents"],
|
||||
"type": "retriever",
|
||||
},
|
||||
}
|
||||
),
|
||||
RunLogPatch(
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/Retriever/final_output",
|
||||
"value": {
|
||||
"documents": [
|
||||
Document(page_content="foo"),
|
||||
Document(page_content="bar"),
|
||||
]
|
||||
},
|
||||
},
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/Retriever/end_time",
|
||||
"value": "2023-01-01T00:00:00.000",
|
||||
},
|
||||
),
|
||||
RunLogPatch(
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/RunnableMap/final_output",
|
||||
"value": {
|
||||
"documents": [
|
||||
Document(page_content="foo"),
|
||||
Document(page_content="bar"),
|
||||
],
|
||||
"question": "What is your name?",
|
||||
},
|
||||
},
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/RunnableMap/end_time",
|
||||
"value": "2023-01-01T00:00:00.000",
|
||||
},
|
||||
),
|
||||
RunLogPatch(
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/ChatPromptTemplate",
|
||||
"value": {
|
||||
"end_time": None,
|
||||
"final_output": None,
|
||||
"metadata": {},
|
||||
"name": "ChatPromptTemplate",
|
||||
"start_time": "2023-01-01T00:00:00.000",
|
||||
"streamed_output_str": [],
|
||||
"tags": ["seq:step:2"],
|
||||
"type": "prompt",
|
||||
},
|
||||
}
|
||||
),
|
||||
RunLogPatch(
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/ChatPromptTemplate/final_output",
|
||||
"value": ChatPromptValue(
|
||||
messages=[
|
||||
SystemMessage(content="You are a nice assistant."),
|
||||
HumanMessage(
|
||||
content="[Document(page_content='foo'), Document(page_content='bar')]" # noqa: E501
|
||||
),
|
||||
HumanMessage(content="What is your name?"),
|
||||
]
|
||||
),
|
||||
},
|
||||
{
|
||||
"op": "add",
|
||||
"path": "/logs/ChatPromptTemplate/end_time",
|
||||
"value": "2023-01-01T00:00:00.000",
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
assert sorted(cast(RunLog, add(stream_log)).state["logs"]) == [
|
||||
"ChatPromptTemplate",
|
||||
"FakeListLLM",
|
||||
"FakeListLLM:2",
|
||||
"Retriever",
|
||||
"RunnableLambda",
|
||||
"RunnableMap",
|
||||
"RunnableMap:2",
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@freeze_time("2023-01-01")
|
||||
async def test_prompt_with_llm_and_async_lambda(
|
||||
@@ -2291,14 +2418,18 @@ async def test_map_astream() -> None:
|
||||
assert isinstance(final_state.state["id"], str)
|
||||
assert len(final_state.ops) == len(streamed_ops)
|
||||
assert len(final_state.state["logs"]) == 5
|
||||
assert final_state.state["logs"][0]["name"] == "ChatPromptTemplate"
|
||||
assert final_state.state["logs"][0]["final_output"] == dumpd(
|
||||
prompt.invoke({"question": "What is your name?"})
|
||||
assert (
|
||||
final_state.state["logs"]["ChatPromptTemplate"]["name"] == "ChatPromptTemplate"
|
||||
)
|
||||
assert final_state.state["logs"][1]["name"] == "RunnableMap"
|
||||
assert sorted(log["name"] for log in final_state.state["logs"][2:]) == [
|
||||
assert final_state.state["logs"]["ChatPromptTemplate"][
|
||||
"final_output"
|
||||
] == prompt.invoke({"question": "What is your name?"})
|
||||
assert final_state.state["logs"]["RunnableMap"]["name"] == "RunnableMap"
|
||||
assert sorted(final_state.state["logs"]) == [
|
||||
"ChatPromptTemplate",
|
||||
"FakeListChatModel",
|
||||
"FakeStreamingListLLM",
|
||||
"RunnableMap",
|
||||
"RunnablePassthrough",
|
||||
]
|
||||
|
||||
@@ -2316,7 +2447,7 @@ async def test_map_astream() -> None:
|
||||
assert final_state.state["final_output"] == final_value
|
||||
assert len(final_state.state["streamed_output"]) == len(streamed_chunks)
|
||||
assert len(final_state.state["logs"]) == 1
|
||||
assert final_state.state["logs"][0]["name"] == "FakeListChatModel"
|
||||
assert final_state.state["logs"]["FakeListChatModel"]["name"] == "FakeListChatModel"
|
||||
|
||||
# Test astream_log with exclude filters
|
||||
final_state = None
|
||||
@@ -2332,13 +2463,17 @@ async def test_map_astream() -> None:
|
||||
assert final_state.state["final_output"] == final_value
|
||||
assert len(final_state.state["streamed_output"]) == len(streamed_chunks)
|
||||
assert len(final_state.state["logs"]) == 4
|
||||
assert final_state.state["logs"][0]["name"] == "ChatPromptTemplate"
|
||||
assert final_state.state["logs"][0]["final_output"] == dumpd(
|
||||
assert (
|
||||
final_state.state["logs"]["ChatPromptTemplate"]["name"] == "ChatPromptTemplate"
|
||||
)
|
||||
assert final_state.state["logs"]["ChatPromptTemplate"]["final_output"] == (
|
||||
prompt.invoke({"question": "What is your name?"})
|
||||
)
|
||||
assert final_state.state["logs"][1]["name"] == "RunnableMap"
|
||||
assert sorted(log["name"] for log in final_state.state["logs"][2:]) == [
|
||||
assert final_state.state["logs"]["RunnableMap"]["name"] == "RunnableMap"
|
||||
assert sorted(final_state.state["logs"]) == [
|
||||
"ChatPromptTemplate",
|
||||
"FakeStreamingListLLM",
|
||||
"RunnableMap",
|
||||
"RunnablePassthrough",
|
||||
]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user