multiple: langchain 0.2 in master (#21191)

0.2rc 

migrations

- [x] Move memory
- [x] Move remaining retrievers
- [x] graph_qa chains
- [x] some dependency from evaluation code potentially on math utils
- [x] Move openapi chain from `langchain.chains.api.openapi` to
`langchain_community.chains.openapi`
- [x] Migrate `langchain.chains.ernie_functions` to
`langchain_community.chains.ernie_functions`
- [x] migrate `langchain/chains/llm_requests.py` to
`langchain_community.chains.llm_requests`
- [x] Moving `langchain_community.cross_enoders.base:BaseCrossEncoder`
->
`langchain_community.retrievers.document_compressors.cross_encoder:BaseCrossEncoder`
(namespace not ideal, but it needs to be moved to `langchain` to avoid
circular deps)
- [x] unit tests langchain -- add pytest.mark.community to some unit
tests that will stay in langchain
- [x] unit tests community -- move unit tests that depend on community
to community
- [x] mv integration tests that depend on community to community
- [x] mypy checks

Other todo

- [x] Make deprecation warnings not noisy (need to use warn deprecated
and check that things are implemented properly)
- [x] Update deprecation messages with timeline for code removal (likely
we actually won't be removing things until 0.4 release) -- will give
people more time to transition their code.
- [ ] Add information to deprecation warning to show users how to
migrate their code base using langchain-cli
- [ ] Remove any unnecessary requirements in langchain (e.g., is
SQLALchemy required?)

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Eugene Yurtsev
2024-05-08 16:46:52 -04:00
committed by GitHub
parent 6b392d6d12
commit f92006de3c
238 changed files with 7552 additions and 5899 deletions

View File

@@ -0,0 +1,105 @@
"""Test LLM Math functionality."""
import json
from typing import Any
import pytest
from langchain.chains.api.base import APIChain
from langchain.chains.api.prompt import API_RESPONSE_PROMPT, API_URL_PROMPT
from langchain.chains.llm import LLMChain
from langchain_community.utilities.requests import TextRequestsWrapper
from tests.unit_tests.llms.fake_llm import FakeLLM
class FakeRequestsChain(TextRequestsWrapper):
"""Fake requests chain just for testing purposes."""
output: str
def get(self, url: str, **kwargs: Any) -> str:
"""Just return the specified output."""
return self.output
def get_test_api_data() -> dict:
"""Fake api data to use for testing."""
api_docs = """
This API endpoint will search the notes for a user.
Endpoint: https://thisapidoesntexist.com
GET /api/notes
Query parameters:
q | string | The search term for notes
"""
return {
"api_docs": api_docs,
"question": "Search for notes containing langchain",
"api_url": "https://thisapidoesntexist.com/api/notes?q=langchain",
"api_response": json.dumps(
{
"success": True,
"results": [{"id": 1, "content": "Langchain is awesome!"}],
}
),
"api_summary": "There is 1 note about langchain.",
}
def get_api_chain(**kwargs: Any) -> APIChain:
"""Fake LLM API chain for testing."""
data = get_test_api_data()
test_api_docs = data["api_docs"]
test_question = data["question"]
test_url = data["api_url"]
test_api_response = data["api_response"]
test_api_summary = data["api_summary"]
api_url_query_prompt = API_URL_PROMPT.format(
api_docs=test_api_docs, question=test_question
)
api_response_prompt = API_RESPONSE_PROMPT.format(
api_docs=test_api_docs,
question=test_question,
api_url=test_url,
api_response=test_api_response,
)
queries = {api_url_query_prompt: test_url, api_response_prompt: test_api_summary}
fake_llm = FakeLLM(queries=queries)
api_request_chain = LLMChain(llm=fake_llm, prompt=API_URL_PROMPT)
api_answer_chain = LLMChain(llm=fake_llm, prompt=API_RESPONSE_PROMPT)
requests_wrapper = FakeRequestsChain(output=test_api_response)
return APIChain(
api_request_chain=api_request_chain,
api_answer_chain=api_answer_chain,
requests_wrapper=requests_wrapper,
api_docs=test_api_docs,
**kwargs,
)
def test_api_question() -> None:
"""Test simple question that needs API access."""
with pytest.raises(ValueError):
get_api_chain()
with pytest.raises(ValueError):
get_api_chain(limit_to_domains=tuple())
# All domains allowed (not advised)
api_chain = get_api_chain(limit_to_domains=None)
data = get_test_api_data()
assert api_chain.run(data["question"]) == data["api_summary"]
# Use a domain that's allowed
api_chain = get_api_chain(
limit_to_domains=["https://thisapidoesntexist.com/api/notes?q=langchain"]
)
# Attempts to make a request against a domain that's not allowed
assert api_chain.run(data["question"]) == data["api_summary"]
# Use domains that are not valid
api_chain = get_api_chain(limit_to_domains=["h", "*"])
with pytest.raises(ValueError):
# Attempts to make a request against a domain that's not allowed
assert api_chain.run(data["question"]) == data["api_summary"]

View File

@@ -0,0 +1,335 @@
import pathlib
from typing import Any, Dict, List
import pandas as pd
from langchain.chains.graph_qa.prompts import CYPHER_GENERATION_PROMPT, CYPHER_QA_PROMPT
from langchain.memory import ConversationBufferMemory, ReadOnlySharedMemory
from langchain_core.prompts import PromptTemplate
from langchain_community.chains.graph_qa.cypher import (
GraphCypherQAChain,
construct_schema,
extract_cypher,
)
from langchain_community.chains.graph_qa.cypher_utils import (
CypherQueryCorrector,
Schema,
)
from langchain_community.graphs.graph_document import GraphDocument
from langchain_community.graphs.graph_store import GraphStore
from tests.unit_tests.llms.fake_llm import FakeLLM
class FakeGraphStore(GraphStore):
@property
def get_schema(self) -> str:
"""Returns the schema of the Graph database"""
return ""
@property
def get_structured_schema(self) -> Dict[str, Any]:
"""Returns the schema of the Graph database"""
return {}
def query(self, query: str, params: dict = {}) -> List[Dict[str, Any]]:
"""Query the graph."""
return []
def refresh_schema(self) -> None:
"""Refreshes the graph schema information."""
pass
def add_graph_documents(
self, graph_documents: List[GraphDocument], include_source: bool = False
) -> None:
"""Take GraphDocument as input as uses it to construct a graph."""
pass
def test_graph_cypher_qa_chain_prompt_selection_1() -> None:
# Pass prompts directly. No kwargs is specified.
qa_prompt_template = "QA Prompt"
cypher_prompt_template = "Cypher Prompt"
qa_prompt = PromptTemplate(template=qa_prompt_template, input_variables=[])
cypher_prompt = PromptTemplate(template=cypher_prompt_template, input_variables=[])
chain = GraphCypherQAChain.from_llm(
llm=FakeLLM(),
graph=FakeGraphStore(),
verbose=True,
return_intermediate_steps=False,
qa_prompt=qa_prompt,
cypher_prompt=cypher_prompt,
)
assert chain.qa_chain.prompt == qa_prompt
assert chain.cypher_generation_chain.prompt == cypher_prompt
def test_graph_cypher_qa_chain_prompt_selection_2() -> None:
# Default case. Pass nothing
chain = GraphCypherQAChain.from_llm(
llm=FakeLLM(),
graph=FakeGraphStore(),
verbose=True,
return_intermediate_steps=False,
)
assert chain.qa_chain.prompt == CYPHER_QA_PROMPT
assert chain.cypher_generation_chain.prompt == CYPHER_GENERATION_PROMPT
def test_graph_cypher_qa_chain_prompt_selection_3() -> None:
# Pass non-prompt args only to sub-chains via kwargs
memory = ConversationBufferMemory(memory_key="chat_history")
readonlymemory = ReadOnlySharedMemory(memory=memory)
chain = GraphCypherQAChain.from_llm(
llm=FakeLLM(),
graph=FakeGraphStore(),
verbose=True,
return_intermediate_steps=False,
cypher_llm_kwargs={"memory": readonlymemory},
qa_llm_kwargs={"memory": readonlymemory},
)
assert chain.qa_chain.prompt == CYPHER_QA_PROMPT
assert chain.cypher_generation_chain.prompt == CYPHER_GENERATION_PROMPT
def test_graph_cypher_qa_chain_prompt_selection_4() -> None:
# Pass prompt, non-prompt args to subchains via kwargs
qa_prompt_template = "QA Prompt"
cypher_prompt_template = "Cypher Prompt"
memory = ConversationBufferMemory(memory_key="chat_history")
readonlymemory = ReadOnlySharedMemory(memory=memory)
qa_prompt = PromptTemplate(template=qa_prompt_template, input_variables=[])
cypher_prompt = PromptTemplate(template=cypher_prompt_template, input_variables=[])
chain = GraphCypherQAChain.from_llm(
llm=FakeLLM(),
graph=FakeGraphStore(),
verbose=True,
return_intermediate_steps=False,
cypher_llm_kwargs={"prompt": cypher_prompt, "memory": readonlymemory},
qa_llm_kwargs={"prompt": qa_prompt, "memory": readonlymemory},
)
assert chain.qa_chain.prompt == qa_prompt
assert chain.cypher_generation_chain.prompt == cypher_prompt
def test_graph_cypher_qa_chain_prompt_selection_5() -> None:
# Can't pass both prompt and kwargs at the same time
qa_prompt_template = "QA Prompt"
cypher_prompt_template = "Cypher Prompt"
memory = ConversationBufferMemory(memory_key="chat_history")
readonlymemory = ReadOnlySharedMemory(memory=memory)
qa_prompt = PromptTemplate(template=qa_prompt_template, input_variables=[])
cypher_prompt = PromptTemplate(template=cypher_prompt_template, input_variables=[])
try:
GraphCypherQAChain.from_llm(
llm=FakeLLM(),
graph=FakeGraphStore(),
verbose=True,
return_intermediate_steps=False,
qa_prompt=qa_prompt,
cypher_prompt=cypher_prompt,
cypher_llm_kwargs={"memory": readonlymemory},
qa_llm_kwargs={"memory": readonlymemory},
)
assert False
except ValueError:
assert True
def test_graph_cypher_qa_chain() -> None:
template = """You are a nice chatbot having a conversation with a human.
Schema:
{schema}
Previous conversation:
{chat_history}
New human question: {question}
Response:"""
prompt = PromptTemplate(
input_variables=["schema", "question", "chat_history"], template=template
)
memory = ConversationBufferMemory(memory_key="chat_history")
readonlymemory = ReadOnlySharedMemory(memory=memory)
prompt1 = (
"You are a nice chatbot having a conversation with a human.\n\n "
"Schema:\n Node properties are the following:\n\nRelationship "
"properties are the following:\n\nThe relationships are the "
"following:\n\n\n "
"Previous conversation:\n \n\n New human question: "
"Test question\n Response:"
)
prompt2 = (
"You are a nice chatbot having a conversation with a human.\n\n "
"Schema:\n Node properties are the following:\n\nRelationship "
"properties are the following:\n\nThe relationships are the "
"following:\n\n\n "
"Previous conversation:\n Human: Test question\nAI: foo\n\n "
"New human question: Test new question\n Response:"
)
llm = FakeLLM(queries={prompt1: "answer1", prompt2: "answer2"})
chain = GraphCypherQAChain.from_llm(
cypher_llm=llm,
qa_llm=FakeLLM(),
graph=FakeGraphStore(),
verbose=True,
return_intermediate_steps=False,
cypher_llm_kwargs={"prompt": prompt, "memory": readonlymemory},
memory=memory,
)
chain.run("Test question")
chain.run("Test new question")
# If we get here without a key error, that means memory
# was used properly to create prompts.
assert True
def test_no_backticks() -> None:
"""Test if there are no backticks, so the original text should be returned."""
query = "MATCH (n) RETURN n"
output = extract_cypher(query)
assert output == query
def test_backticks() -> None:
"""Test if there are backticks. Query from within backticks should be returned."""
query = "You can use the following query: ```MATCH (n) RETURN n```"
output = extract_cypher(query)
assert output == "MATCH (n) RETURN n"
def test_exclude_types() -> None:
structured_schema = {
"node_props": {
"Movie": [{"property": "title", "type": "STRING"}],
"Actor": [{"property": "name", "type": "STRING"}],
"Person": [{"property": "name", "type": "STRING"}],
},
"rel_props": {},
"relationships": [
{"start": "Actor", "end": "Movie", "type": "ACTED_IN"},
{"start": "Person", "end": "Movie", "type": "DIRECTED"},
],
}
exclude_types = ["Person", "DIRECTED"]
output = construct_schema(structured_schema, [], exclude_types)
expected_schema = (
"Node properties are the following:\n"
"Movie {title: STRING},Actor {name: STRING}\n"
"Relationship properties are the following:\n\n"
"The relationships are the following:\n"
"(:Actor)-[:ACTED_IN]->(:Movie)"
)
assert output == expected_schema
def test_include_types() -> None:
structured_schema = {
"node_props": {
"Movie": [{"property": "title", "type": "STRING"}],
"Actor": [{"property": "name", "type": "STRING"}],
"Person": [{"property": "name", "type": "STRING"}],
},
"rel_props": {},
"relationships": [
{"start": "Actor", "end": "Movie", "type": "ACTED_IN"},
{"start": "Person", "end": "Movie", "type": "DIRECTED"},
],
}
include_types = ["Movie", "Actor", "ACTED_IN"]
output = construct_schema(structured_schema, include_types, [])
expected_schema = (
"Node properties are the following:\n"
"Movie {title: STRING},Actor {name: STRING}\n"
"Relationship properties are the following:\n\n"
"The relationships are the following:\n"
"(:Actor)-[:ACTED_IN]->(:Movie)"
)
assert output == expected_schema
def test_include_types2() -> None:
structured_schema = {
"node_props": {
"Movie": [{"property": "title", "type": "STRING"}],
"Actor": [{"property": "name", "type": "STRING"}],
"Person": [{"property": "name", "type": "STRING"}],
},
"rel_props": {},
"relationships": [
{"start": "Actor", "end": "Movie", "type": "ACTED_IN"},
{"start": "Person", "end": "Movie", "type": "DIRECTED"},
],
}
include_types = ["Movie", "Actor"]
output = construct_schema(structured_schema, include_types, [])
expected_schema = (
"Node properties are the following:\n"
"Movie {title: STRING},Actor {name: STRING}\n"
"Relationship properties are the following:\n\n"
"The relationships are the following:\n"
)
assert output == expected_schema
def test_include_types3() -> None:
structured_schema = {
"node_props": {
"Movie": [{"property": "title", "type": "STRING"}],
"Actor": [{"property": "name", "type": "STRING"}],
"Person": [{"property": "name", "type": "STRING"}],
},
"rel_props": {},
"relationships": [
{"start": "Actor", "end": "Movie", "type": "ACTED_IN"},
{"start": "Person", "end": "Movie", "type": "DIRECTED"},
],
}
include_types = ["Movie", "Actor", "ACTED_IN"]
output = construct_schema(structured_schema, include_types, [])
expected_schema = (
"Node properties are the following:\n"
"Movie {title: STRING},Actor {name: STRING}\n"
"Relationship properties are the following:\n\n"
"The relationships are the following:\n"
"(:Actor)-[:ACTED_IN]->(:Movie)"
)
assert output == expected_schema
HERE = pathlib.Path(__file__).parent
UNIT_TESTS_ROOT = HERE.parent
def test_validating_cypher_statements() -> None:
cypher_file = str(UNIT_TESTS_ROOT / "data/cypher_corrector.csv")
examples = pd.read_csv(cypher_file)
examples.fillna("", inplace=True)
for _, row in examples.iterrows():
schema = load_schemas(row["schema"])
corrector = CypherQueryCorrector(schema)
assert corrector(row["statement"]) == row["correct_query"]
def load_schemas(str_schemas: str) -> List[Schema]:
"""
Args:
str_schemas: string of schemas
"""
values = str_schemas.replace("(", "").replace(")", "").split(",")
schemas = []
for i in range(len(values) // 3):
schemas.append(
Schema(
values[i * 3].strip(),
values[i * 3 + 1].strip(),
values[i * 3 + 2].strip(),
)
)
return schemas

View File

@@ -0,0 +1,75 @@
"""Test LLM chain."""
from tempfile import TemporaryDirectory
from typing import Dict, List, Union
from unittest.mock import patch
import pytest
from langchain.chains.llm import LLMChain
from langchain_core.output_parsers import BaseOutputParser
from langchain_core.prompts import PromptTemplate
from tests.unit_tests.llms.fake_llm import FakeLLM
class FakeOutputParser(BaseOutputParser):
"""Fake output parser class for testing."""
def parse(self, text: str) -> Union[str, List[str], Dict[str, str]]:
"""Parse by splitting."""
return text.split()
@pytest.fixture
def fake_llm_chain() -> LLMChain:
"""Fake LLM chain for testing purposes."""
prompt = PromptTemplate(input_variables=["bar"], template="This is a {bar}:")
return LLMChain(prompt=prompt, llm=FakeLLM(), output_key="text1")
@patch(
"langchain_community.llms.loading.get_type_to_cls_dict",
lambda: {"fake": lambda: FakeLLM},
)
def test_serialization(fake_llm_chain: LLMChain) -> None:
"""Test serialization."""
from langchain.chains.loading import load_chain
with TemporaryDirectory() as temp_dir:
file = temp_dir + "/llm.json"
fake_llm_chain.save(file)
loaded_chain = load_chain(file)
assert loaded_chain == fake_llm_chain
def test_missing_inputs(fake_llm_chain: LLMChain) -> None:
"""Test error is raised if inputs are missing."""
with pytest.raises(ValueError):
fake_llm_chain({"foo": "bar"})
def test_valid_call(fake_llm_chain: LLMChain) -> None:
"""Test valid call of LLM chain."""
output = fake_llm_chain({"bar": "baz"})
assert output == {"bar": "baz", "text1": "foo"}
# Test with stop words.
output = fake_llm_chain({"bar": "baz", "stop": ["foo"]})
# Response should be `bar` now.
assert output == {"bar": "baz", "stop": ["foo"], "text1": "bar"}
def test_predict_method(fake_llm_chain: LLMChain) -> None:
"""Test predict method works."""
output = fake_llm_chain.predict(bar="baz")
assert output == "foo"
def test_predict_and_parse() -> None:
"""Test parsing ability."""
prompt = PromptTemplate(
input_variables=["foo"], template="{foo}", output_parser=FakeOutputParser()
)
llm = FakeLLM(queries={"foo": "foo bar"})
chain = LLMChain(prompt=prompt, llm=llm)
output = chain.predict_and_parse(foo="foo")
assert output == ["foo", "bar"]