mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-07 22:11:51 +00:00
Templates (#12294)
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Lance Martin <lance@langchain.dev> Co-authored-by: Jacob Lee <jacoblee93@gmail.com>
This commit is contained in:
28
templates/elastic-query-generator/README.md
Normal file
28
templates/elastic-query-generator/README.md
Normal file
@@ -0,0 +1,28 @@
|
||||
# elastic-query-generator
|
||||
|
||||
We can use LLMs to interact with Elasticsearch analytics databases in natural language.
|
||||
|
||||
This chain builds search queries via the Elasticsearch DSL API (filters and aggregations).
|
||||
|
||||
The Elasticsearch client must have permissions for index listing, mapping description and search queries.
|
||||
|
||||
|
||||
|
||||
## Setup
|
||||
|
||||
## Installing Elasticsearch
|
||||
|
||||
There are a number of ways to run Elasticsearch.
|
||||
|
||||
### Elastic Cloud
|
||||
|
||||
Create a free trial account on [Elastic Cloud](https://cloud.elastic.co/registration?utm_source=langchain&utm_content=langserve).
|
||||
|
||||
With a deployment, update the connection string.
|
||||
|
||||
Password and connection (elasticsearch url) can be found on the deployment console.
|
||||
|
||||
```bash
|
||||
> export ELASTIC_SEARCH_SERVER="https://elastic:<password>@<es-url>"
|
||||
|
||||
If you want to populate the DB with some example info, you can run `python ingest.py`.
|
@@ -0,0 +1,25 @@
|
||||
import os
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.output_parsers.json import SimpleJsonOutputParser
|
||||
from elasticsearch import Elasticsearch
|
||||
from pathlib import Path
|
||||
|
||||
from .prompts import DSL_PROMPT
|
||||
from .elastic_index_info import get_indices_infos
|
||||
|
||||
es_host = os.environ["ELASTIC_SEARCH_SERVER"]
|
||||
es_password = os.environ["ELASTIC_PASSWORD"]
|
||||
|
||||
db = Elasticsearch(
|
||||
es_host,
|
||||
http_auth=('elastic', es_password),
|
||||
ca_certs=Path(__file__).parents[1] / 'http_ca.crt' # Replace with your actual path
|
||||
)
|
||||
|
||||
_model = ChatOpenAI(temperature=0, model="gpt-4")
|
||||
|
||||
chain = {
|
||||
"input": lambda x: x["input"],
|
||||
"indices_info": lambda _: get_indices_infos(db),
|
||||
"top_k": lambda x: x.get("top_k", 5),
|
||||
} | DSL_PROMPT | _model | SimpleJsonOutputParser()
|
@@ -0,0 +1,32 @@
|
||||
from typing import List
|
||||
|
||||
def _list_indices(database, include_indices=None, ignore_indices=None) -> List[str]:
|
||||
all_indices = [
|
||||
index["index"] for index in database.cat.indices(format="json")
|
||||
]
|
||||
|
||||
if include_indices:
|
||||
all_indices = [i for i in all_indices if i in include_indices]
|
||||
if ignore_indices:
|
||||
all_indices = [i for i in all_indices if i not in ignore_indices]
|
||||
|
||||
return all_indices
|
||||
|
||||
def get_indices_infos(database, sample_documents_in_index_info=5) -> str:
|
||||
indices = _list_indices(database)
|
||||
mappings = database.indices.get_mapping(index=",".join(indices))
|
||||
if sample_documents_in_index_info > 0:
|
||||
for k, v in mappings.items():
|
||||
hits = database.search(
|
||||
index=k,
|
||||
query={"match_all": {}},
|
||||
size=sample_documents_in_index_info,
|
||||
)["hits"]["hits"]
|
||||
hits = [str(hit["_source"]) for hit in hits]
|
||||
mappings[k]["mappings"] = str(v) + "\n\n/*\n" + "\n".join(hits) + "\n*/"
|
||||
return "\n\n".join(
|
||||
[
|
||||
"Mapping for index {}:\n{}".format(index, mappings[index]["mappings"])
|
||||
for index in mappings
|
||||
]
|
||||
)
|
@@ -0,0 +1,21 @@
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
|
||||
PROMPT_SUFFIX = """Only use the following Elasticsearch indices:
|
||||
{indices_info}
|
||||
|
||||
Question: {input}
|
||||
ESQuery:"""
|
||||
|
||||
DEFAULT_DSL_TEMPLATE = """Given an input question, create a syntactically correct Elasticsearch query to run. Always limit your query to at most {top_k} results, unless the user specifies in their question a specific number of examples they wish to obtain, or unless its implied that they want to see all. You can order the results by a relevant column to return the most interesting examples in the database.
|
||||
|
||||
Unless told to do not query for all the columns from a specific index, only ask for a the few relevant columns given the question.
|
||||
|
||||
Pay attention to use only the column names that you can see in the mapping description. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which index. Return the query as valid json.
|
||||
|
||||
Use the following format:
|
||||
|
||||
Question: Question here
|
||||
ESQuery: Elasticsearch Query formatted as json
|
||||
"""
|
||||
|
||||
DSL_PROMPT = PromptTemplate.from_template(DEFAULT_DSL_TEMPLATE + PROMPT_SUFFIX)
|
21
templates/elastic-query-generator/ingest.py
Normal file
21
templates/elastic-query-generator/ingest.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import os
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
es_host = os.environ["ELASTIC_SEARCH_SERVER"]
|
||||
es_password = os.environ["ELASTIC_PASSWORD"]
|
||||
|
||||
db = Elasticsearch(
|
||||
es_host,
|
||||
http_auth=('elastic', es_password),
|
||||
ca_certs='http_ca.crt' # Replace with your actual path
|
||||
)
|
||||
|
||||
customers = [
|
||||
{"firstname": "Jennifer", "lastname": "Walters"},
|
||||
{"firstname": "Monica","lastname":"Rambeau"},
|
||||
{"firstname": "Carol","lastname":"Danvers"},
|
||||
{"firstname": "Wanda","lastname":"Maximoff"},
|
||||
{"firstname": "Jennifer","lastname":"Takeda"},
|
||||
]
|
||||
for i, customer in enumerate(customers):
|
||||
db.create(index="customers", document=customer, id=i)
|
5
templates/elastic-query-generator/main.py
Normal file
5
templates/elastic-query-generator/main.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from elastic_query_generator.chain import chain
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(chain.invoke({"input": "how many customers named Carol"}))
|
1246
templates/elastic-query-generator/poetry.lock
generated
Normal file
1246
templates/elastic-query-generator/poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
20
templates/elastic-query-generator/pyproject.toml
Normal file
20
templates/elastic-query-generator/pyproject.toml
Normal file
@@ -0,0 +1,20 @@
|
||||
[tool.poetry]
|
||||
name = "elastic_query_generator"
|
||||
version = "0.0.1"
|
||||
description = ""
|
||||
authors = []
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8.1,<4.0"
|
||||
langchain = ">=0.0.313"
|
||||
elasticsearch = "^8.10.1"
|
||||
|
||||
[tool.langserve]
|
||||
export_module = "elastic_query_generator.chain"
|
||||
export_attr = "chain"
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
0
templates/elastic-query-generator/tests/__init__.py
Normal file
0
templates/elastic-query-generator/tests/__init__.py
Normal file
Reference in New Issue
Block a user