mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-27 17:08:47 +00:00
Marqo Vector Store Examples & Type Hints (#7326)
This PR improves the example notebook for the Marqo vectorstore implementation by adding a new RetrievalQAWithSourcesChain example. The `embedding` parameter in `from_documents` has its type updated to `Union[Embeddings, None]` and a default parameter of None because this is ignored in Marqo. This PR also upgrades the Marqo version to 0.11.0 to remove the device parameter after a breaking change to the API. Related to #7068 @tomhamer @hwchase17 --------- Co-authored-by: Tom Hamer <tom@marqo.ai>
This commit is contained in:
parent
5809c3d29d
commit
3074306ae1
@ -28,4 +28,4 @@ To import this vectorstore:
|
||||
from langchain.vectorstores import Marqo
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the Marqo wrapper and some of its unique features, see [this notebook](../modules/indexes/vectorstores/examples/marqo.ipynb)
|
||||
For a more detailed walkthrough of the Marqo wrapper and some of its unique features, see [this notebook](../modules/data_connection/vectorstores/integrations/marqo.ipynb)
|
||||
|
@ -8,7 +8,17 @@
|
||||
"source": [
|
||||
"# Marqo\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the Marqo database."
|
||||
"This notebook shows how to use functionality related to the Marqo vectorstore.\n",
|
||||
"\n",
|
||||
">[Marqo](https://www.marqo.ai/) is an open-source vector search engine. Marqo allows you to store and query multimodal data such as text and images. Marqo creates the vectors for you using a huge selection of opensource models, you can also provide your own finetuned models and Marqo will handle the loading and inference for you.\n",
|
||||
"\n",
|
||||
"To run this notebook with our docker image please run the following commands first to get Marqo:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"docker pull marqoai/marqo:latest\n",
|
||||
"docker rm -f marqo\n",
|
||||
"docker run --name marqo -it --privileged -p 8882:8882 --add-host host.docker.internal:host-gateway marqoai/marqo:latest\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -17,6 +27,16 @@
|
||||
"id": "aac9563e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install marqo"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "5d1489ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import Marqo\n",
|
||||
@ -33,7 +53,7 @@
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)"
|
||||
]
|
||||
},
|
||||
@ -78,6 +98,10 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
@ -98,6 +122,10 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
|
||||
@ -146,12 +174,12 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'errors': False,\n",
|
||||
" 'processingTimeMs': 4675.6921890009835,\n",
|
||||
" 'processingTimeMs': 2090.2822139996715,\n",
|
||||
" 'index_name': 'langchain-multimodal-demo',\n",
|
||||
" 'items': [{'_id': '7af25f35-5d41-4ff5-95fa-ab6bd6755176',\n",
|
||||
" 'items': [{'_id': 'aa92fc1c-1fb2-4d86-b027-feb507c419f7',\n",
|
||||
" 'result': 'created',\n",
|
||||
" 'status': 201},\n",
|
||||
" {'_id': '70434d17-2680-4e33-b060-a37b9b8b6959',\n",
|
||||
" {'_id': '5142c258-ef9f-4bf2-a1a6-2307280173a0',\n",
|
||||
" 'result': 'created',\n",
|
||||
" 'status': 201}]}"
|
||||
]
|
||||
@ -248,12 +276,12 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'errors': False,\n",
|
||||
" 'processingTimeMs': 500.1302719992964,\n",
|
||||
" 'processingTimeMs': 139.2144540004665,\n",
|
||||
" 'index_name': 'langchain-byo-index-demo',\n",
|
||||
" 'items': [{'_id': 'cbad6f9e-a4ea-45c6-9a85-1b9c0a59827c',\n",
|
||||
" 'items': [{'_id': '27c05a1c-b8a9-49a5-ae73-fbf1eb51dc3f',\n",
|
||||
" 'result': 'created',\n",
|
||||
" 'status': 201},\n",
|
||||
" {'_id': 'c0be68cb-8847-4e95-a4c9-4791b54f772c',\n",
|
||||
" {'_id': '6889afe0-e600-43c1-aa3b-1d91bf6db274',\n",
|
||||
" 'result': 'created',\n",
|
||||
" 'status': 201}]}"
|
||||
]
|
||||
@ -275,6 +303,7 @@
|
||||
" print(f\"Creating {index_name}\")\n",
|
||||
"\n",
|
||||
"# This index could have been created by another system\n",
|
||||
"client.create_index(index_name)\n",
|
||||
"client.index(index_name).add_documents(\n",
|
||||
" [ \n",
|
||||
" {\n",
|
||||
@ -300,7 +329,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['484d8436-cb09-49f2-8f9d-39671c7ebfaa']"
|
||||
"['9986cc72-adcd-4080-9d74-265c173a9ec3']"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
@ -416,6 +445,110 @@
|
||||
"doc_results = docsearch.similarity_search(query)\n",
|
||||
"print(doc_results[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "2d026aa0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Question Answering with Sources\n",
|
||||
"\n",
|
||||
"This section shows how to use Marqo as part of a `RetrievalQAWithSourcesChain`. Marqo will perform the searches for information in the sources."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "e4ca223c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"OpenAI API Key:········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQAWithSourcesChain\n",
|
||||
"from langchain import OpenAI\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "5c6e45f9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open(\"../../../state_of_the_union.txt\") as f:\n",
|
||||
" state_of_the_union = f.read()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "70a7f320",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Index langchain-qa-with-retrieval exists.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"index_name = \"langchain-qa-with-retrieval\"\n",
|
||||
"docsearch = Marqo.from_documents(docs, index_name=index_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "b3b008a4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = RetrievalQAWithSourcesChain.from_chain_type(\n",
|
||||
" OpenAI(temperature=0), chain_type=\"stuff\", retriever=docsearch.as_retriever()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "e1457716",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'answer': ' The president honored Justice Breyer, thanking him for his service and noting that he is a retiring Justice of the United States Supreme Court.\\n',\n",
|
||||
" 'sources': '../../../state_of_the_union.txt'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain(\n",
|
||||
" {\"question\": \"What did the president say about Justice Breyer\"},\n",
|
||||
" return_only_outputs=True,\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -434,7 +567,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -343,7 +343,7 @@ class Marqo(VectorStore):
|
||||
def from_documents(
|
||||
cls: Type[Marqo],
|
||||
documents: List[Document],
|
||||
embedding: Embeddings,
|
||||
embedding: Union[Embeddings, None] = None,
|
||||
**kwargs: Any,
|
||||
) -> Marqo:
|
||||
"""Return VectorStore initialized from documents. Note that Marqo does not
|
||||
@ -371,7 +371,6 @@ class Marqo(VectorStore):
|
||||
index_name: str = "",
|
||||
url: str = "http://localhost:8882",
|
||||
api_key: str = "",
|
||||
marqo_device: str = "cpu",
|
||||
add_documents_settings: Optional[Dict[str, Any]] = {},
|
||||
searchable_attributes: Optional[List[str]] = None,
|
||||
page_content_builder: Optional[Callable[[Dict[str, str]], str]] = None,
|
||||
@ -407,7 +406,6 @@ class Marqo(VectorStore):
|
||||
api_key (str, optional): The API key for Marqo. Defaults to "".
|
||||
metadatas (Optional[List[dict]], optional): A list of metadatas, to
|
||||
accompany the texts. Defaults to None.
|
||||
marqo_device (str, optional): The device for the marqo to use on the server,
|
||||
this is only used when a new index is being created. Defaults to "cpu". Can
|
||||
be "cpu" or "cuda".
|
||||
add_documents_settings (Optional[Dict[str, Any]], optional): Settings
|
||||
@ -433,7 +431,7 @@ class Marqo(VectorStore):
|
||||
if not index_name:
|
||||
index_name = str(uuid.uuid4())
|
||||
|
||||
client = marqo.Client(url=url, api_key=api_key, indexing_device=marqo_device)
|
||||
client = marqo.Client(url=url, api_key=api_key)
|
||||
|
||||
try:
|
||||
client.create_index(index_name, settings_dict=index_settings)
|
||||
|
561
poetry.lock
generated
561
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -38,7 +38,7 @@ pinecone-text = {version = "^0.4.2", optional = true}
|
||||
pymongo = {version = "^4.3.3", optional = true}
|
||||
clickhouse-connect = {version="^0.5.14", optional=true}
|
||||
weaviate-client = {version = "^3", optional = true}
|
||||
marqo = {version = "^0.9.1", optional=true}
|
||||
marqo = {version = "^0.11.0", optional=true}
|
||||
google-api-python-client = {version = "2.70.0", optional = true}
|
||||
google-auth = {version = "^2.18.1", optional = true}
|
||||
wolframalpha = {version = "5.0.0", optional = true}
|
||||
|
Loading…
Reference in New Issue
Block a user