multiple: langchain 0.2 in master (#21191)

0.2rc migrations - [x] Move memory - [x] Move remaining retrievers - [x] graph_qa chains - [x] some dependency from evaluation code potentially on math utils - [x] Move openapi chain from `langchain.chains.api.openapi` to `langchain_community.chains.openapi` - [x] Migrate `langchain.chains.ernie_functions` to `langchain_community.chains.ernie_functions` - [x] migrate `langchain/chains/llm_requests.py` to `langchain_community.chains.llm_requests` - [x] Moving `langchain_community.cross_enoders.base:BaseCrossEncoder` -> `langchain_community.retrievers.document_compressors.cross_encoder:BaseCrossEncoder` (namespace not ideal, but it needs to be moved to `langchain` to avoid circular deps) - [x] unit tests langchain -- add pytest.mark.community to some unit tests that will stay in langchain - [x] unit tests community -- move unit tests that depend on community to community - [x] mv integration tests that depend on community to community - [x] mypy checks Other todo - [x] Make deprecation warnings not noisy (need to use warn deprecated and check that things are implemented properly) - [x] Update deprecation messages with timeline for code removal (likely we actually won't be removing things until 0.4 release) -- will give people more time to transition their code. - [ ] Add information to deprecation warning to show users how to migrate their code base using langchain-cli - [ ] Remove any unnecessary requirements in langchain (e.g., is SQLALchemy required?) --------- Co-authored-by: Erick Friis <erick@langchain.dev>
2025-09-18 16:16:33 +00:00 · 2024-05-08 16:46:52 -04:00
parent 6b392d6d12
commit f92006de3c
238 changed files with 7552 additions and 5899 deletions
--- a/libs/community/tests/unit_tests/chains/init.py
+++ b/libs/community/tests/unit_tests/chains/init.py
--- a/libs/community/tests/unit_tests/chains/test_api.py
+++ b/libs/community/tests/unit_tests/chains/test_api.py
@@ -0,0 +1,105 @@
+"""Test LLM Math functionality."""
+
+import json
+from typing import Any
+
+import pytest
+from langchain.chains.api.base import APIChain
+from langchain.chains.api.prompt import API_RESPONSE_PROMPT, API_URL_PROMPT
+from langchain.chains.llm import LLMChain
+
+from langchain_community.utilities.requests import TextRequestsWrapper
+from tests.unit_tests.llms.fake_llm import FakeLLM
+
+
+class FakeRequestsChain(TextRequestsWrapper):
+    """Fake requests chain just for testing purposes."""
+
+    output: str
+
+    def get(self, url: str, **kwargs: Any) -> str:
+        """Just return the specified output."""
+        return self.output
+
+
+def get_test_api_data() -> dict:
+    """Fake api data to use for testing."""
+    api_docs = """
+    This API endpoint will search the notes for a user.
+
+    Endpoint: https://thisapidoesntexist.com
+    GET /api/notes
+
+    Query parameters:
+    q | string | The search term for notes
+    """
+    return {
+        "api_docs": api_docs,
+        "question": "Search for notes containing langchain",
+        "api_url": "https://thisapidoesntexist.com/api/notes?q=langchain",
+        "api_response": json.dumps(
+            {
+                "success": True,
+                "results": [{"id": 1, "content": "Langchain is awesome!"}],
+            }
+        ),
+        "api_summary": "There is 1 note about langchain.",
+    }
+
+
+def get_api_chain(**kwargs: Any) -> APIChain:
+    """Fake LLM API chain for testing."""
+    data = get_test_api_data()
+    test_api_docs = data["api_docs"]
+    test_question = data["question"]
+    test_url = data["api_url"]
+    test_api_response = data["api_response"]
+    test_api_summary = data["api_summary"]
+
+    api_url_query_prompt = API_URL_PROMPT.format(
+        api_docs=test_api_docs, question=test_question
+    )
+    api_response_prompt = API_RESPONSE_PROMPT.format(
+        api_docs=test_api_docs,
+        question=test_question,
+        api_url=test_url,
+        api_response=test_api_response,
+    )
+    queries = {api_url_query_prompt: test_url, api_response_prompt: test_api_summary}
+    fake_llm = FakeLLM(queries=queries)
+    api_request_chain = LLMChain(llm=fake_llm, prompt=API_URL_PROMPT)
+    api_answer_chain = LLMChain(llm=fake_llm, prompt=API_RESPONSE_PROMPT)
+    requests_wrapper = FakeRequestsChain(output=test_api_response)
+    return APIChain(
+        api_request_chain=api_request_chain,
+        api_answer_chain=api_answer_chain,
+        requests_wrapper=requests_wrapper,
+        api_docs=test_api_docs,
+        **kwargs,
+    )
+
+
+def test_api_question() -> None:
+    """Test simple question that needs API access."""
+    with pytest.raises(ValueError):
+        get_api_chain()
+    with pytest.raises(ValueError):
+        get_api_chain(limit_to_domains=tuple())
+
+    # All domains allowed (not advised)
+    api_chain = get_api_chain(limit_to_domains=None)
+    data = get_test_api_data()
+    assert api_chain.run(data["question"]) == data["api_summary"]
+
+    # Use a domain that's allowed
+    api_chain = get_api_chain(
+        limit_to_domains=["https://thisapidoesntexist.com/api/notes?q=langchain"]
+    )
+    # Attempts to make a request against a domain that's not allowed
+    assert api_chain.run(data["question"]) == data["api_summary"]
+
+    # Use domains that are not valid
+    api_chain = get_api_chain(limit_to_domains=["h", "*"])
+    with pytest.raises(ValueError):
+        # Attempts to make a request against a domain that's not allowed
+        assert api_chain.run(data["question"]) == data["api_summary"]
--- a/libs/community/tests/unit_tests/chains/test_graph_qa.py
+++ b/libs/community/tests/unit_tests/chains/test_graph_qa.py
@@ -0,0 +1,335 @@
+import pathlib
+from typing import Any, Dict, List
+
+import pandas as pd
+from langchain.chains.graph_qa.prompts import CYPHER_GENERATION_PROMPT, CYPHER_QA_PROMPT
+from langchain.memory import ConversationBufferMemory, ReadOnlySharedMemory
+from langchain_core.prompts import PromptTemplate
+
+from langchain_community.chains.graph_qa.cypher import (
+    GraphCypherQAChain,
+    construct_schema,
+    extract_cypher,
+)
+from langchain_community.chains.graph_qa.cypher_utils import (
+    CypherQueryCorrector,
+    Schema,
+)
+from langchain_community.graphs.graph_document import GraphDocument
+from langchain_community.graphs.graph_store import GraphStore
+from tests.unit_tests.llms.fake_llm import FakeLLM
+
+
+class FakeGraphStore(GraphStore):
+    @property
+    def get_schema(self) -> str:
+        """Returns the schema of the Graph database"""
+        return ""
+
+    @property
+    def get_structured_schema(self) -> Dict[str, Any]:
+        """Returns the schema of the Graph database"""
+        return {}
+
+    def query(self, query: str, params: dict = {}) -> List[Dict[str, Any]]:
+        """Query the graph."""
+        return []
+
+    def refresh_schema(self) -> None:
+        """Refreshes the graph schema information."""
+        pass
+
+    def add_graph_documents(
+        self, graph_documents: List[GraphDocument], include_source: bool = False
+    ) -> None:
+        """Take GraphDocument as input as uses it to construct a graph."""
+        pass
+
+
+def test_graph_cypher_qa_chain_prompt_selection_1() -> None:
+    # Pass prompts directly. No kwargs is specified.
+    qa_prompt_template = "QA Prompt"
+    cypher_prompt_template = "Cypher Prompt"
+    qa_prompt = PromptTemplate(template=qa_prompt_template, input_variables=[])
+    cypher_prompt = PromptTemplate(template=cypher_prompt_template, input_variables=[])
+    chain = GraphCypherQAChain.from_llm(
+        llm=FakeLLM(),
+        graph=FakeGraphStore(),
+        verbose=True,
+        return_intermediate_steps=False,
+        qa_prompt=qa_prompt,
+        cypher_prompt=cypher_prompt,
+    )
+    assert chain.qa_chain.prompt == qa_prompt
+    assert chain.cypher_generation_chain.prompt == cypher_prompt
+
+
+def test_graph_cypher_qa_chain_prompt_selection_2() -> None:
+    # Default case. Pass nothing
+    chain = GraphCypherQAChain.from_llm(
+        llm=FakeLLM(),
+        graph=FakeGraphStore(),
+        verbose=True,
+        return_intermediate_steps=False,
+    )
+    assert chain.qa_chain.prompt == CYPHER_QA_PROMPT
+    assert chain.cypher_generation_chain.prompt == CYPHER_GENERATION_PROMPT
+
+
+def test_graph_cypher_qa_chain_prompt_selection_3() -> None:
+    # Pass non-prompt args only to sub-chains via kwargs
+    memory = ConversationBufferMemory(memory_key="chat_history")
+    readonlymemory = ReadOnlySharedMemory(memory=memory)
+    chain = GraphCypherQAChain.from_llm(
+        llm=FakeLLM(),
+        graph=FakeGraphStore(),
+        verbose=True,
+        return_intermediate_steps=False,
+        cypher_llm_kwargs={"memory": readonlymemory},
+        qa_llm_kwargs={"memory": readonlymemory},
+    )
+    assert chain.qa_chain.prompt == CYPHER_QA_PROMPT
+    assert chain.cypher_generation_chain.prompt == CYPHER_GENERATION_PROMPT
+
+
+def test_graph_cypher_qa_chain_prompt_selection_4() -> None:
+    # Pass prompt, non-prompt args to subchains via kwargs
+    qa_prompt_template = "QA Prompt"
+    cypher_prompt_template = "Cypher Prompt"
+    memory = ConversationBufferMemory(memory_key="chat_history")
+    readonlymemory = ReadOnlySharedMemory(memory=memory)
+    qa_prompt = PromptTemplate(template=qa_prompt_template, input_variables=[])
+    cypher_prompt = PromptTemplate(template=cypher_prompt_template, input_variables=[])
+    chain = GraphCypherQAChain.from_llm(
+        llm=FakeLLM(),
+        graph=FakeGraphStore(),
+        verbose=True,
+        return_intermediate_steps=False,
+        cypher_llm_kwargs={"prompt": cypher_prompt, "memory": readonlymemory},
+        qa_llm_kwargs={"prompt": qa_prompt, "memory": readonlymemory},
+    )
+    assert chain.qa_chain.prompt == qa_prompt
+    assert chain.cypher_generation_chain.prompt == cypher_prompt
+
+
+def test_graph_cypher_qa_chain_prompt_selection_5() -> None:
+    # Can't pass both prompt and kwargs at the same time
+    qa_prompt_template = "QA Prompt"
+    cypher_prompt_template = "Cypher Prompt"
+    memory = ConversationBufferMemory(memory_key="chat_history")
+    readonlymemory = ReadOnlySharedMemory(memory=memory)
+    qa_prompt = PromptTemplate(template=qa_prompt_template, input_variables=[])
+    cypher_prompt = PromptTemplate(template=cypher_prompt_template, input_variables=[])
+    try:
+        GraphCypherQAChain.from_llm(
+            llm=FakeLLM(),
+            graph=FakeGraphStore(),
+            verbose=True,
+            return_intermediate_steps=False,
+            qa_prompt=qa_prompt,
+            cypher_prompt=cypher_prompt,
+            cypher_llm_kwargs={"memory": readonlymemory},
+            qa_llm_kwargs={"memory": readonlymemory},
+        )
+        assert False
+    except ValueError:
+        assert True
+
+
+def test_graph_cypher_qa_chain() -> None:
+    template = """You are a nice chatbot having a conversation with a human.
+
+    Schema:
+    {schema}
+
+    Previous conversation:
+    {chat_history}
+
+    New human question: {question}
+    Response:"""
+
+    prompt = PromptTemplate(
+        input_variables=["schema", "question", "chat_history"], template=template
+    )
+
+    memory = ConversationBufferMemory(memory_key="chat_history")
+    readonlymemory = ReadOnlySharedMemory(memory=memory)
+    prompt1 = (
+        "You are a nice chatbot having a conversation with a human.\n\n    "
+        "Schema:\n    Node properties are the following:\n\nRelationship "
+        "properties are the following:\n\nThe relationships are the "
+        "following:\n\n\n    "
+        "Previous conversation:\n    \n\n    New human question: "
+        "Test question\n    Response:"
+    )
+
+    prompt2 = (
+        "You are a nice chatbot having a conversation with a human.\n\n    "
+        "Schema:\n    Node properties are the following:\n\nRelationship "
+        "properties are the following:\n\nThe relationships are the "
+        "following:\n\n\n    "
+        "Previous conversation:\n    Human: Test question\nAI: foo\n\n    "
+        "New human question: Test new question\n    Response:"
+    )
+
+    llm = FakeLLM(queries={prompt1: "answer1", prompt2: "answer2"})
+    chain = GraphCypherQAChain.from_llm(
+        cypher_llm=llm,
+        qa_llm=FakeLLM(),
+        graph=FakeGraphStore(),
+        verbose=True,
+        return_intermediate_steps=False,
+        cypher_llm_kwargs={"prompt": prompt, "memory": readonlymemory},
+        memory=memory,
+    )
+    chain.run("Test question")
+    chain.run("Test new question")
+    # If we get here without a key error, that means memory
+    # was used properly to create prompts.
+    assert True
+
+
+def test_no_backticks() -> None:
+    """Test if there are no backticks, so the original text should be returned."""
+    query = "MATCH (n) RETURN n"
+    output = extract_cypher(query)
+    assert output == query
+
+
+def test_backticks() -> None:
+    """Test if there are backticks. Query from within backticks should be returned."""
+    query = "You can use the following query: ```MATCH (n) RETURN n```"
+    output = extract_cypher(query)
+    assert output == "MATCH (n) RETURN n"
+
+
+def test_exclude_types() -> None:
+    structured_schema = {
+        "node_props": {
+            "Movie": [{"property": "title", "type": "STRING"}],
+            "Actor": [{"property": "name", "type": "STRING"}],
+            "Person": [{"property": "name", "type": "STRING"}],
+        },
+        "rel_props": {},
+        "relationships": [
+            {"start": "Actor", "end": "Movie", "type": "ACTED_IN"},
+            {"start": "Person", "end": "Movie", "type": "DIRECTED"},
+        ],
+    }
+    exclude_types = ["Person", "DIRECTED"]
+    output = construct_schema(structured_schema, [], exclude_types)
+    expected_schema = (
+        "Node properties are the following:\n"
+        "Movie {title: STRING},Actor {name: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
+        "(:Actor)-[:ACTED_IN]->(:Movie)"
+    )
+    assert output == expected_schema
+
+
+def test_include_types() -> None:
+    structured_schema = {
+        "node_props": {
+            "Movie": [{"property": "title", "type": "STRING"}],
+            "Actor": [{"property": "name", "type": "STRING"}],
+            "Person": [{"property": "name", "type": "STRING"}],
+        },
+        "rel_props": {},
+        "relationships": [
+            {"start": "Actor", "end": "Movie", "type": "ACTED_IN"},
+            {"start": "Person", "end": "Movie", "type": "DIRECTED"},
+        ],
+    }
+    include_types = ["Movie", "Actor", "ACTED_IN"]
+    output = construct_schema(structured_schema, include_types, [])
+    expected_schema = (
+        "Node properties are the following:\n"
+        "Movie {title: STRING},Actor {name: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
+        "(:Actor)-[:ACTED_IN]->(:Movie)"
+    )
+    assert output == expected_schema
+
+
+def test_include_types2() -> None:
+    structured_schema = {
+        "node_props": {
+            "Movie": [{"property": "title", "type": "STRING"}],
+            "Actor": [{"property": "name", "type": "STRING"}],
+            "Person": [{"property": "name", "type": "STRING"}],
+        },
+        "rel_props": {},
+        "relationships": [
+            {"start": "Actor", "end": "Movie", "type": "ACTED_IN"},
+            {"start": "Person", "end": "Movie", "type": "DIRECTED"},
+        ],
+    }
+    include_types = ["Movie", "Actor"]
+    output = construct_schema(structured_schema, include_types, [])
+    expected_schema = (
+        "Node properties are the following:\n"
+        "Movie {title: STRING},Actor {name: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
+    )
+    assert output == expected_schema
+
+
+def test_include_types3() -> None:
+    structured_schema = {
+        "node_props": {
+            "Movie": [{"property": "title", "type": "STRING"}],
+            "Actor": [{"property": "name", "type": "STRING"}],
+            "Person": [{"property": "name", "type": "STRING"}],
+        },
+        "rel_props": {},
+        "relationships": [
+            {"start": "Actor", "end": "Movie", "type": "ACTED_IN"},
+            {"start": "Person", "end": "Movie", "type": "DIRECTED"},
+        ],
+    }
+    include_types = ["Movie", "Actor", "ACTED_IN"]
+    output = construct_schema(structured_schema, include_types, [])
+    expected_schema = (
+        "Node properties are the following:\n"
+        "Movie {title: STRING},Actor {name: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
+        "(:Actor)-[:ACTED_IN]->(:Movie)"
+    )
+    assert output == expected_schema
+
+
+HERE = pathlib.Path(__file__).parent
+
+UNIT_TESTS_ROOT = HERE.parent
+
+
+def test_validating_cypher_statements() -> None:
+    cypher_file = str(UNIT_TESTS_ROOT / "data/cypher_corrector.csv")
+    examples = pd.read_csv(cypher_file)
+    examples.fillna("", inplace=True)
+    for _, row in examples.iterrows():
+        schema = load_schemas(row["schema"])
+        corrector = CypherQueryCorrector(schema)
+        assert corrector(row["statement"]) == row["correct_query"]
+
+
+def load_schemas(str_schemas: str) -> List[Schema]:
+    """
+    Args:
+        str_schemas: string of schemas
+    """
+    values = str_schemas.replace("(", "").replace(")", "").split(",")
+    schemas = []
+    for i in range(len(values) // 3):
+        schemas.append(
+            Schema(
+                values[i * 3].strip(),
+                values[i * 3 + 1].strip(),
+                values[i * 3 + 2].strip(),
+            )
+        )
+    return schemas
--- a/libs/community/tests/unit_tests/chains/test_llm.py
+++ b/libs/community/tests/unit_tests/chains/test_llm.py
@@ -0,0 +1,75 @@
+"""Test LLM chain."""
+from tempfile import TemporaryDirectory
+from typing import Dict, List, Union
+from unittest.mock import patch
+
+import pytest
+from langchain.chains.llm import LLMChain
+from langchain_core.output_parsers import BaseOutputParser
+from langchain_core.prompts import PromptTemplate
+
+from tests.unit_tests.llms.fake_llm import FakeLLM
+
+
+class FakeOutputParser(BaseOutputParser):
+    """Fake output parser class for testing."""
+
+    def parse(self, text: str) -> Union[str, List[str], Dict[str, str]]:
+        """Parse by splitting."""
+        return text.split()
+
+
+@pytest.fixture
+def fake_llm_chain() -> LLMChain:
+    """Fake LLM chain for testing purposes."""
+    prompt = PromptTemplate(input_variables=["bar"], template="This is a {bar}:")
+    return LLMChain(prompt=prompt, llm=FakeLLM(), output_key="text1")
+
+
+@patch(
+    "langchain_community.llms.loading.get_type_to_cls_dict",
+    lambda: {"fake": lambda: FakeLLM},
+)
+def test_serialization(fake_llm_chain: LLMChain) -> None:
+    """Test serialization."""
+    from langchain.chains.loading import load_chain
+
+    with TemporaryDirectory() as temp_dir:
+        file = temp_dir + "/llm.json"
+        fake_llm_chain.save(file)
+        loaded_chain = load_chain(file)
+        assert loaded_chain == fake_llm_chain
+
+
+def test_missing_inputs(fake_llm_chain: LLMChain) -> None:
+    """Test error is raised if inputs are missing."""
+    with pytest.raises(ValueError):
+        fake_llm_chain({"foo": "bar"})
+
+
+def test_valid_call(fake_llm_chain: LLMChain) -> None:
+    """Test valid call of LLM chain."""
+    output = fake_llm_chain({"bar": "baz"})
+    assert output == {"bar": "baz", "text1": "foo"}
+
+    # Test with stop words.
+    output = fake_llm_chain({"bar": "baz", "stop": ["foo"]})
+    # Response should be `bar` now.
+    assert output == {"bar": "baz", "stop": ["foo"], "text1": "bar"}
+
+
+def test_predict_method(fake_llm_chain: LLMChain) -> None:
+    """Test predict method works."""
+    output = fake_llm_chain.predict(bar="baz")
+    assert output == "foo"
+
+
+def test_predict_and_parse() -> None:
+    """Test parsing ability."""
+    prompt = PromptTemplate(
+        input_variables=["foo"], template="{foo}", output_parser=FakeOutputParser()
+    )
+    llm = FakeLLM(queries={"foo": "foo bar"})
+    chain = LLMChain(prompt=prompt, llm=llm)
+    output = chain.predict_and_parse(foo="foo")
+    assert output == ["foo", "bar"]