From 7677ceea6015bfccd44759a128daedcd87c59433 Mon Sep 17 00:00:00 2001 From: Anirudh31415926535 Date: Sat, 13 Jul 2024 07:07:28 +0800 Subject: [PATCH] docs: model parameter mandatory for cohere embedding and rerank (#23349) Latest langchain-cohere sdk mandates passing in the model parameter into the Embeddings and Reranker inits. This PR is to update the docs to reflect these changes. --- docs/docs/how_to/embed_text.mdx | 7 ++++--- .../integrations/retrievers/cohere-reranker.ipynb | 11 ++++++----- docs/docs/integrations/text_embedding/cohere.ipynb | 4 +++- docs/docs/integrations/vectorstores/pgvector.ipynb | 2 +- .../cohere_librarian/blurb_matcher.py | 2 +- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/docs/docs/how_to/embed_text.mdx b/docs/docs/how_to/embed_text.mdx index ed75a36a624..0cf48c24ec7 100644 --- a/docs/docs/how_to/embed_text.mdx +++ b/docs/docs/how_to/embed_text.mdx @@ -67,15 +67,16 @@ If you'd prefer not to set an environment variable you can pass the key in direc ```python from langchain_cohere import CohereEmbeddings -embeddings_model = CohereEmbeddings(cohere_api_key="...") +embeddings_model = CohereEmbeddings(cohere_api_key="...", model='embed-english-v3.0') ``` -Otherwise you can initialize without any params: +Otherwise you can initialize simply as shown below: ```python from langchain_cohere import CohereEmbeddings -embeddings_model = CohereEmbeddings() +embeddings_model = CohereEmbeddings(model='embed-english-v3.0') ``` +Do note that it is mandatory to pass the model parameter while initializing the CohereEmbeddings class. diff --git a/docs/docs/integrations/retrievers/cohere-reranker.ipynb b/docs/docs/integrations/retrievers/cohere-reranker.ipynb index 623869d02c9..4054aad8e75 100644 --- a/docs/docs/integrations/retrievers/cohere-reranker.ipynb +++ b/docs/docs/integrations/retrievers/cohere-reranker.ipynb @@ -309,9 +309,9 @@ "documents = TextLoader(\"../../how_to/state_of_the_union.txt\").load()\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n", "texts = text_splitter.split_documents(documents)\n", - "retriever = FAISS.from_documents(texts, CohereEmbeddings()).as_retriever(\n", - " search_kwargs={\"k\": 20}\n", - ")\n", + "retriever = FAISS.from_documents(\n", + " texts, CohereEmbeddings(model=\"embed-english-v3.0\")\n", + ").as_retriever(search_kwargs={\"k\": 20})\n", "\n", "query = \"What did the president say about Ketanji Brown Jackson\"\n", "docs = retriever.invoke(query)\n", @@ -324,7 +324,8 @@ "metadata": {}, "source": [ "## Doing reranking with CohereRerank\n", - "Now let's wrap our base retriever with a `ContextualCompressionRetriever`. We'll add an `CohereRerank`, uses the Cohere rerank endpoint to rerank the returned results." + "Now let's wrap our base retriever with a `ContextualCompressionRetriever`. We'll add an `CohereRerank`, uses the Cohere rerank endpoint to rerank the returned results.\n", + "Do note that it is mandatory to specify the model name in CohereRerank!" ] }, { @@ -339,7 +340,7 @@ "from langchain_community.llms import Cohere\n", "\n", "llm = Cohere(temperature=0)\n", - "compressor = CohereRerank()\n", + "compressor = CohereRerank(model=\"rerank-english-v3.0\")\n", "compression_retriever = ContextualCompressionRetriever(\n", " base_compressor=compressor, base_retriever=retriever\n", ")\n", diff --git a/docs/docs/integrations/text_embedding/cohere.ipynb b/docs/docs/integrations/text_embedding/cohere.ipynb index d8273163ca9..d13cd450b06 100644 --- a/docs/docs/integrations/text_embedding/cohere.ipynb +++ b/docs/docs/integrations/text_embedding/cohere.ipynb @@ -40,7 +40,9 @@ "metadata": {}, "outputs": [], "source": [ - "embeddings = CohereEmbeddings(model=\"embed-english-light-v3.0\")" + "embeddings = CohereEmbeddings(\n", + " model=\"embed-english-light-v3.0\"\n", + ") # It is mandatory to pass a model parameter to initialize the CohereEmbeddings object" ] }, { diff --git a/docs/docs/integrations/vectorstores/pgvector.ipynb b/docs/docs/integrations/vectorstores/pgvector.ipynb index 85bbc3bcf45..0a83ccb219b 100644 --- a/docs/docs/integrations/vectorstores/pgvector.ipynb +++ b/docs/docs/integrations/vectorstores/pgvector.ipynb @@ -78,7 +78,7 @@ "# See docker command above to launch a postgres instance with pgvector enabled.\n", "connection = \"postgresql+psycopg://langchain:langchain@localhost:6024/langchain\" # Uses psycopg3!\n", "collection_name = \"my_docs\"\n", - "embeddings = CohereEmbeddings()\n", + "embeddings = CohereEmbeddings(model=\"embed-english-v3.0\")\n", "\n", "vectorstore = PGVector(\n", " embeddings=embeddings,\n", diff --git a/templates/cohere-librarian/cohere_librarian/blurb_matcher.py b/templates/cohere-librarian/cohere_librarian/blurb_matcher.py index c029935aaff..50e63827a05 100644 --- a/templates/cohere-librarian/cohere_librarian/blurb_matcher.py +++ b/templates/cohere-librarian/cohere_librarian/blurb_matcher.py @@ -23,7 +23,7 @@ parsed_data = [ ] parsed_data[1] -embeddings = CohereEmbeddings() +embeddings = CohereEmbeddings(model="embed-english-v3.0") docsearch = Chroma.from_texts( [x["title"] for x in parsed_data], embeddings, metadatas=parsed_data