Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
Co-authored-by: Lance Martin <lance@langchain.dev>
Co-authored-by: Jacob Lee <jacoblee93@gmail.com>
This commit is contained in:
Erick Friis
2023-10-25 18:47:42 -07:00
committed by GitHub
parent 43257a295c
commit ebf998acb6
242 changed files with 53432 additions and 31 deletions

View File

@@ -0,0 +1,25 @@
import os
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers.json import SimpleJsonOutputParser
from elasticsearch import Elasticsearch
from pathlib import Path
from .prompts import DSL_PROMPT
from .elastic_index_info import get_indices_infos
es_host = os.environ["ELASTIC_SEARCH_SERVER"]
es_password = os.environ["ELASTIC_PASSWORD"]
db = Elasticsearch(
es_host,
http_auth=('elastic', es_password),
ca_certs=Path(__file__).parents[1] / 'http_ca.crt' # Replace with your actual path
)
_model = ChatOpenAI(temperature=0, model="gpt-4")
chain = {
"input": lambda x: x["input"],
"indices_info": lambda _: get_indices_infos(db),
"top_k": lambda x: x.get("top_k", 5),
} | DSL_PROMPT | _model | SimpleJsonOutputParser()

View File

@@ -0,0 +1,32 @@
from typing import List
def _list_indices(database, include_indices=None, ignore_indices=None) -> List[str]:
all_indices = [
index["index"] for index in database.cat.indices(format="json")
]
if include_indices:
all_indices = [i for i in all_indices if i in include_indices]
if ignore_indices:
all_indices = [i for i in all_indices if i not in ignore_indices]
return all_indices
def get_indices_infos(database, sample_documents_in_index_info=5) -> str:
indices = _list_indices(database)
mappings = database.indices.get_mapping(index=",".join(indices))
if sample_documents_in_index_info > 0:
for k, v in mappings.items():
hits = database.search(
index=k,
query={"match_all": {}},
size=sample_documents_in_index_info,
)["hits"]["hits"]
hits = [str(hit["_source"]) for hit in hits]
mappings[k]["mappings"] = str(v) + "\n\n/*\n" + "\n".join(hits) + "\n*/"
return "\n\n".join(
[
"Mapping for index {}:\n{}".format(index, mappings[index]["mappings"])
for index in mappings
]
)

View File

@@ -0,0 +1,21 @@
from langchain.prompts.prompt import PromptTemplate
PROMPT_SUFFIX = """Only use the following Elasticsearch indices:
{indices_info}
Question: {input}
ESQuery:"""
DEFAULT_DSL_TEMPLATE = """Given an input question, create a syntactically correct Elasticsearch query to run. Always limit your query to at most {top_k} results, unless the user specifies in their question a specific number of examples they wish to obtain, or unless its implied that they want to see all. You can order the results by a relevant column to return the most interesting examples in the database.
Unless told to do not query for all the columns from a specific index, only ask for a the few relevant columns given the question.
Pay attention to use only the column names that you can see in the mapping description. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which index. Return the query as valid json.
Use the following format:
Question: Question here
ESQuery: Elasticsearch Query formatted as json
"""
DSL_PROMPT = PromptTemplate.from_template(DEFAULT_DSL_TEMPLATE + PROMPT_SUFFIX)