multiple: langchain 0.2 in master (#21191)

0.2rc 

migrations

- [x] Move memory
- [x] Move remaining retrievers
- [x] graph_qa chains
- [x] some dependency from evaluation code potentially on math utils
- [x] Move openapi chain from `langchain.chains.api.openapi` to
`langchain_community.chains.openapi`
- [x] Migrate `langchain.chains.ernie_functions` to
`langchain_community.chains.ernie_functions`
- [x] migrate `langchain/chains/llm_requests.py` to
`langchain_community.chains.llm_requests`
- [x] Moving `langchain_community.cross_enoders.base:BaseCrossEncoder`
->
`langchain_community.retrievers.document_compressors.cross_encoder:BaseCrossEncoder`
(namespace not ideal, but it needs to be moved to `langchain` to avoid
circular deps)
- [x] unit tests langchain -- add pytest.mark.community to some unit
tests that will stay in langchain
- [x] unit tests community -- move unit tests that depend on community
to community
- [x] mv integration tests that depend on community to community
- [x] mypy checks

Other todo

- [x] Make deprecation warnings not noisy (need to use warn deprecated
and check that things are implemented properly)
- [x] Update deprecation messages with timeline for code removal (likely
we actually won't be removing things until 0.4 release) -- will give
people more time to transition their code.
- [ ] Add information to deprecation warning to show users how to
migrate their code base using langchain-cli
- [ ] Remove any unnecessary requirements in langchain (e.g., is
SQLALchemy required?)

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Eugene Yurtsev
2024-05-08 16:46:52 -04:00
committed by GitHub
parent 6b392d6d12
commit f92006de3c
238 changed files with 7552 additions and 5899 deletions

View File

@@ -0,0 +1,29 @@
"""Integration test for compression pipelines."""
from langchain.retrievers.document_compressors import (
DocumentCompressorPipeline,
EmbeddingsFilter,
)
from langchain_core.documents import Document
from langchain_text_splitters.character import CharacterTextSplitter
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain_community.embeddings import OpenAIEmbeddings
def test_document_compressor_pipeline() -> None:
embeddings = OpenAIEmbeddings()
splitter = CharacterTextSplitter(chunk_size=20, chunk_overlap=0, separator=". ")
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.8)
pipeline_filter = DocumentCompressorPipeline(
transformers=[splitter, redundant_filter, relevant_filter]
)
texts = [
"This sentence is about cows",
"This sentence was about cows",
"foo bar baz",
]
docs = [Document(page_content=". ".join(texts))]
actual = pipeline_filter.compress_documents(docs, "Tell me about farm animals")
assert len(actual) == 1
assert actual[0].page_content in texts[:2]

View File

@@ -0,0 +1,45 @@
"""Integration test for LLMChainExtractor."""
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain_core.documents import Document
from langchain_community.chat_models import ChatOpenAI
def test_llm_construction_with_kwargs() -> None:
llm_chain_kwargs = {"verbose": True}
compressor = LLMChainExtractor.from_llm(
ChatOpenAI(), llm_chain_kwargs=llm_chain_kwargs
)
assert compressor.llm_chain.verbose is True
def test_llm_chain_extractor() -> None:
texts = [
"The Roman Empire followed the Roman Republic.",
"I love chocolate chip cookies—my mother makes great cookies.",
"The first Roman emperor was Caesar Augustus.",
"Don't you just love Caesar salad?",
"The Roman Empire collapsed in 476 AD after the fall of Rome.",
"Let's go to Olive Garden!",
]
doc = Document(page_content=" ".join(texts))
compressor = LLMChainExtractor.from_llm(ChatOpenAI())
actual = compressor.compress_documents([doc], "Tell me about the Roman Empire")[
0
].page_content
expected_returned = [0, 2, 4]
expected_not_returned = [1, 3, 5]
assert all([texts[i] in actual for i in expected_returned])
assert all([texts[i] not in actual for i in expected_not_returned])
def test_llm_chain_extractor_empty() -> None:
texts = [
"I love chocolate chip cookies—my mother makes great cookies.",
"Don't you just love Caesar salad?",
"Let's go to Olive Garden!",
]
doc = Document(page_content=" ".join(texts))
compressor = LLMChainExtractor.from_llm(ChatOpenAI())
actual = compressor.compress_documents([doc], "Tell me about the Roman Empire")
assert len(actual) == 0

View File

@@ -0,0 +1,18 @@
"""Integration test for llm-based relevant doc filtering."""
from langchain.retrievers.document_compressors import LLMChainFilter
from langchain_core.documents import Document
from langchain_community.chat_models import ChatOpenAI
def test_llm_chain_filter() -> None:
texts = [
"What happened to all of my cookies?",
"I wish there were better Italian restaurants in my neighborhood.",
"My favorite color is green",
]
docs = [Document(page_content=t) for t in texts]
relevant_filter = LLMChainFilter.from_llm(llm=ChatOpenAI())
actual = relevant_filter.compress_documents(docs, "Things I said related to food")
assert len(actual) == 2
assert len(set(texts[:2]).intersection([d.page_content for d in actual])) == 2

View File

@@ -0,0 +1,43 @@
"""Integration test for embedding-based relevant doc filtering."""
import numpy as np
from langchain.retrievers.document_compressors import EmbeddingsFilter
from langchain_core.documents import Document
from langchain_community.document_transformers.embeddings_redundant_filter import (
_DocumentWithState,
)
from langchain_community.embeddings import OpenAIEmbeddings
def test_embeddings_filter() -> None:
texts = [
"What happened to all of my cookies?",
"I wish there were better Italian restaurants in my neighborhood.",
"My favorite color is green",
]
docs = [Document(page_content=t) for t in texts]
embeddings = OpenAIEmbeddings()
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.75)
actual = relevant_filter.compress_documents(docs, "What did I say about food?")
assert len(actual) == 2
assert len(set(texts[:2]).intersection([d.page_content for d in actual])) == 2
def test_embeddings_filter_with_state() -> None:
texts = [
"What happened to all of my cookies?",
"I wish there were better Italian restaurants in my neighborhood.",
"My favorite color is green",
]
query = "What did I say about food?"
embeddings = OpenAIEmbeddings()
embedded_query = embeddings.embed_query(query)
state = {"embedded_doc": np.zeros(len(embedded_query))}
docs = [_DocumentWithState(page_content=t, state=state) for t in texts]
docs[-1].state = {"embedded_doc": embedded_query}
relevant_filter = EmbeddingsFilter( # type: ignore[call-arg]
embeddings=embeddings, similarity_threshold=0.75, return_similarity_scores=True
)
actual = relevant_filter.compress_documents(docs, query)
assert len(actual) == 1
assert texts[-1] == actual[0].page_content

View File

@@ -0,0 +1,26 @@
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import EmbeddingsFilter
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
def test_contextual_compression_retriever_get_relevant_docs() -> None:
"""Test get_relevant_docs."""
texts = [
"This is a document about the Boston Celtics",
"The Boston Celtics won the game by 20 points",
"I simply love going to the movies",
]
embeddings = OpenAIEmbeddings()
base_compressor = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.75)
base_retriever = FAISS.from_texts(texts, embedding=embeddings).as_retriever(
search_kwargs={"k": len(texts)}
)
retriever = ContextualCompressionRetriever(
base_compressor=base_compressor, base_retriever=base_retriever
)
actual = retriever.invoke("Tell me about the Celtics")
assert len(actual) == 2
assert texts[-1] not in [d.page_content for d in actual]

View File

@@ -0,0 +1,33 @@
from langchain.retrievers.merger_retriever import MergerRetriever
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
def test_merger_retriever_get_relevant_docs() -> None:
"""Test get_relevant_docs."""
texts_group_a = [
"This is a document about the Boston Celtics",
"Fly me to the moon is one of my favourite songs."
"I simply love going to the movies",
]
texts_group_b = [
"This is a document about the Poenix Suns",
"The Boston Celtics won the game by 20 points",
"Real stupidity beats artificial intelligence every time. TP",
]
embeddings = OpenAIEmbeddings()
retriever_a = Chroma.from_texts(texts_group_a, embedding=embeddings).as_retriever(
search_kwargs={"k": 1}
)
retriever_b = Chroma.from_texts(texts_group_b, embedding=embeddings).as_retriever(
search_kwargs={"k": 1}
)
# The Lord of the Retrievers.
lotr = MergerRetriever(retrievers=[retriever_a, retriever_b])
actual = lotr.invoke("Tell me about the Celtics")
assert len(actual) == 2
assert texts_group_a[0] in [d.page_content for d in actual]
assert texts_group_b[1] in [d.page_content for d in actual]