mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-24 20:09:01 +00:00
multiple: langchain 0.2 in master (#21191)
0.2rc migrations - [x] Move memory - [x] Move remaining retrievers - [x] graph_qa chains - [x] some dependency from evaluation code potentially on math utils - [x] Move openapi chain from `langchain.chains.api.openapi` to `langchain_community.chains.openapi` - [x] Migrate `langchain.chains.ernie_functions` to `langchain_community.chains.ernie_functions` - [x] migrate `langchain/chains/llm_requests.py` to `langchain_community.chains.llm_requests` - [x] Moving `langchain_community.cross_enoders.base:BaseCrossEncoder` -> `langchain_community.retrievers.document_compressors.cross_encoder:BaseCrossEncoder` (namespace not ideal, but it needs to be moved to `langchain` to avoid circular deps) - [x] unit tests langchain -- add pytest.mark.community to some unit tests that will stay in langchain - [x] unit tests community -- move unit tests that depend on community to community - [x] mv integration tests that depend on community to community - [x] mypy checks Other todo - [x] Make deprecation warnings not noisy (need to use warn deprecated and check that things are implemented properly) - [x] Update deprecation messages with timeline for code removal (likely we actually won't be removing things until 0.4 release) -- will give people more time to transition their code. - [ ] Add information to deprecation warning to show users how to migrate their code base using langchain-cli - [ ] Remove any unnecessary requirements in langchain (e.g., is SQLALchemy required?) --------- Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
@@ -0,0 +1,29 @@
|
||||
"""Integration test for compression pipelines."""
|
||||
from langchain.retrievers.document_compressors import (
|
||||
DocumentCompressorPipeline,
|
||||
EmbeddingsFilter,
|
||||
)
|
||||
from langchain_core.documents import Document
|
||||
from langchain_text_splitters.character import CharacterTextSplitter
|
||||
|
||||
from langchain_community.document_transformers import EmbeddingsRedundantFilter
|
||||
from langchain_community.embeddings import OpenAIEmbeddings
|
||||
|
||||
|
||||
def test_document_compressor_pipeline() -> None:
|
||||
embeddings = OpenAIEmbeddings()
|
||||
splitter = CharacterTextSplitter(chunk_size=20, chunk_overlap=0, separator=". ")
|
||||
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
|
||||
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.8)
|
||||
pipeline_filter = DocumentCompressorPipeline(
|
||||
transformers=[splitter, redundant_filter, relevant_filter]
|
||||
)
|
||||
texts = [
|
||||
"This sentence is about cows",
|
||||
"This sentence was about cows",
|
||||
"foo bar baz",
|
||||
]
|
||||
docs = [Document(page_content=". ".join(texts))]
|
||||
actual = pipeline_filter.compress_documents(docs, "Tell me about farm animals")
|
||||
assert len(actual) == 1
|
||||
assert actual[0].page_content in texts[:2]
|
@@ -0,0 +1,45 @@
|
||||
"""Integration test for LLMChainExtractor."""
|
||||
from langchain.retrievers.document_compressors import LLMChainExtractor
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.chat_models import ChatOpenAI
|
||||
|
||||
|
||||
def test_llm_construction_with_kwargs() -> None:
|
||||
llm_chain_kwargs = {"verbose": True}
|
||||
compressor = LLMChainExtractor.from_llm(
|
||||
ChatOpenAI(), llm_chain_kwargs=llm_chain_kwargs
|
||||
)
|
||||
assert compressor.llm_chain.verbose is True
|
||||
|
||||
|
||||
def test_llm_chain_extractor() -> None:
|
||||
texts = [
|
||||
"The Roman Empire followed the Roman Republic.",
|
||||
"I love chocolate chip cookies—my mother makes great cookies.",
|
||||
"The first Roman emperor was Caesar Augustus.",
|
||||
"Don't you just love Caesar salad?",
|
||||
"The Roman Empire collapsed in 476 AD after the fall of Rome.",
|
||||
"Let's go to Olive Garden!",
|
||||
]
|
||||
doc = Document(page_content=" ".join(texts))
|
||||
compressor = LLMChainExtractor.from_llm(ChatOpenAI())
|
||||
actual = compressor.compress_documents([doc], "Tell me about the Roman Empire")[
|
||||
0
|
||||
].page_content
|
||||
expected_returned = [0, 2, 4]
|
||||
expected_not_returned = [1, 3, 5]
|
||||
assert all([texts[i] in actual for i in expected_returned])
|
||||
assert all([texts[i] not in actual for i in expected_not_returned])
|
||||
|
||||
|
||||
def test_llm_chain_extractor_empty() -> None:
|
||||
texts = [
|
||||
"I love chocolate chip cookies—my mother makes great cookies.",
|
||||
"Don't you just love Caesar salad?",
|
||||
"Let's go to Olive Garden!",
|
||||
]
|
||||
doc = Document(page_content=" ".join(texts))
|
||||
compressor = LLMChainExtractor.from_llm(ChatOpenAI())
|
||||
actual = compressor.compress_documents([doc], "Tell me about the Roman Empire")
|
||||
assert len(actual) == 0
|
@@ -0,0 +1,18 @@
|
||||
"""Integration test for llm-based relevant doc filtering."""
|
||||
from langchain.retrievers.document_compressors import LLMChainFilter
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.chat_models import ChatOpenAI
|
||||
|
||||
|
||||
def test_llm_chain_filter() -> None:
|
||||
texts = [
|
||||
"What happened to all of my cookies?",
|
||||
"I wish there were better Italian restaurants in my neighborhood.",
|
||||
"My favorite color is green",
|
||||
]
|
||||
docs = [Document(page_content=t) for t in texts]
|
||||
relevant_filter = LLMChainFilter.from_llm(llm=ChatOpenAI())
|
||||
actual = relevant_filter.compress_documents(docs, "Things I said related to food")
|
||||
assert len(actual) == 2
|
||||
assert len(set(texts[:2]).intersection([d.page_content for d in actual])) == 2
|
@@ -0,0 +1,43 @@
|
||||
"""Integration test for embedding-based relevant doc filtering."""
|
||||
import numpy as np
|
||||
from langchain.retrievers.document_compressors import EmbeddingsFilter
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.document_transformers.embeddings_redundant_filter import (
|
||||
_DocumentWithState,
|
||||
)
|
||||
from langchain_community.embeddings import OpenAIEmbeddings
|
||||
|
||||
|
||||
def test_embeddings_filter() -> None:
|
||||
texts = [
|
||||
"What happened to all of my cookies?",
|
||||
"I wish there were better Italian restaurants in my neighborhood.",
|
||||
"My favorite color is green",
|
||||
]
|
||||
docs = [Document(page_content=t) for t in texts]
|
||||
embeddings = OpenAIEmbeddings()
|
||||
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.75)
|
||||
actual = relevant_filter.compress_documents(docs, "What did I say about food?")
|
||||
assert len(actual) == 2
|
||||
assert len(set(texts[:2]).intersection([d.page_content for d in actual])) == 2
|
||||
|
||||
|
||||
def test_embeddings_filter_with_state() -> None:
|
||||
texts = [
|
||||
"What happened to all of my cookies?",
|
||||
"I wish there were better Italian restaurants in my neighborhood.",
|
||||
"My favorite color is green",
|
||||
]
|
||||
query = "What did I say about food?"
|
||||
embeddings = OpenAIEmbeddings()
|
||||
embedded_query = embeddings.embed_query(query)
|
||||
state = {"embedded_doc": np.zeros(len(embedded_query))}
|
||||
docs = [_DocumentWithState(page_content=t, state=state) for t in texts]
|
||||
docs[-1].state = {"embedded_doc": embedded_query}
|
||||
relevant_filter = EmbeddingsFilter( # type: ignore[call-arg]
|
||||
embeddings=embeddings, similarity_threshold=0.75, return_similarity_scores=True
|
||||
)
|
||||
actual = relevant_filter.compress_documents(docs, query)
|
||||
assert len(actual) == 1
|
||||
assert texts[-1] == actual[0].page_content
|
@@ -0,0 +1,26 @@
|
||||
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
|
||||
from langchain.retrievers.document_compressors import EmbeddingsFilter
|
||||
|
||||
from langchain_community.embeddings import OpenAIEmbeddings
|
||||
from langchain_community.vectorstores import FAISS
|
||||
|
||||
|
||||
def test_contextual_compression_retriever_get_relevant_docs() -> None:
|
||||
"""Test get_relevant_docs."""
|
||||
texts = [
|
||||
"This is a document about the Boston Celtics",
|
||||
"The Boston Celtics won the game by 20 points",
|
||||
"I simply love going to the movies",
|
||||
]
|
||||
embeddings = OpenAIEmbeddings()
|
||||
base_compressor = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.75)
|
||||
base_retriever = FAISS.from_texts(texts, embedding=embeddings).as_retriever(
|
||||
search_kwargs={"k": len(texts)}
|
||||
)
|
||||
retriever = ContextualCompressionRetriever(
|
||||
base_compressor=base_compressor, base_retriever=base_retriever
|
||||
)
|
||||
|
||||
actual = retriever.invoke("Tell me about the Celtics")
|
||||
assert len(actual) == 2
|
||||
assert texts[-1] not in [d.page_content for d in actual]
|
@@ -0,0 +1,33 @@
|
||||
from langchain.retrievers.merger_retriever import MergerRetriever
|
||||
|
||||
from langchain_community.embeddings import OpenAIEmbeddings
|
||||
from langchain_community.vectorstores import Chroma
|
||||
|
||||
|
||||
def test_merger_retriever_get_relevant_docs() -> None:
|
||||
"""Test get_relevant_docs."""
|
||||
texts_group_a = [
|
||||
"This is a document about the Boston Celtics",
|
||||
"Fly me to the moon is one of my favourite songs."
|
||||
"I simply love going to the movies",
|
||||
]
|
||||
texts_group_b = [
|
||||
"This is a document about the Poenix Suns",
|
||||
"The Boston Celtics won the game by 20 points",
|
||||
"Real stupidity beats artificial intelligence every time. TP",
|
||||
]
|
||||
embeddings = OpenAIEmbeddings()
|
||||
retriever_a = Chroma.from_texts(texts_group_a, embedding=embeddings).as_retriever(
|
||||
search_kwargs={"k": 1}
|
||||
)
|
||||
retriever_b = Chroma.from_texts(texts_group_b, embedding=embeddings).as_retriever(
|
||||
search_kwargs={"k": 1}
|
||||
)
|
||||
|
||||
# The Lord of the Retrievers.
|
||||
lotr = MergerRetriever(retrievers=[retriever_a, retriever_b])
|
||||
|
||||
actual = lotr.invoke("Tell me about the Celtics")
|
||||
assert len(actual) == 2
|
||||
assert texts_group_a[0] in [d.page_content for d in actual]
|
||||
assert texts_group_b[1] in [d.page_content for d in actual]
|
Reference in New Issue
Block a user