From d972c6d6eab8ca44e81b100c7f81a0e6e1e9b25e Mon Sep 17 00:00:00 2001 From: "Chaunte W. Lacewell" Date: Thu, 20 Feb 2025 16:48:46 -0800 Subject: [PATCH] partners: add langchain-vdms (#29857) **Description:** Deprecate vdms in community, add integration langchain-vdms, and update any related files **Issue:** n/a **Dependencies:** langchain-vdms **Twitter handle:** n/a --------- Co-authored-by: Chester Curme --- cookbook/multi_modal_RAG_vdms.ipynb | 233 ++- cookbook/visual_RAG_vdms.ipynb | 45 +- docs/docs/integrations/providers/vdms.mdx | 52 +- .../docs/integrations/vectorstores/vdms.ipynb | 1757 +++++++---------- .../langchain_community/vectorstores/vdms.py | 2 + libs/packages.yml | 4 + 6 files changed, 969 insertions(+), 1124 deletions(-) diff --git a/cookbook/multi_modal_RAG_vdms.ipynb b/cookbook/multi_modal_RAG_vdms.ipynb index 20a19810cf2..28833eecc29 100644 --- a/cookbook/multi_modal_RAG_vdms.ipynb +++ b/cookbook/multi_modal_RAG_vdms.ipynb @@ -21,40 +21,6 @@ "* Passing raw images and text chunks to a multimodal LLM for answer synthesis " ] }, - { - "cell_type": "markdown", - "id": "6a6b6e73", - "metadata": {}, - "source": [ - "## Start VDMS Server\n", - "\n", - "Let's start a VDMS docker using port 55559 instead of default 55555. \n", - "Keep note of the port and hostname as this is needed for the vector store as it uses the VDMS Python client to connect to the server." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "5f483872", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "a1b9206b08ef626e15b356bf9e031171f7c7eb8f956a2733f196f0109246fe2b\n" - ] - } - ], - "source": [ - "! docker run --rm -d -p 55559:55555 --name vdms_rag_nb intellabs/vdms:latest\n", - "\n", - "# Connect to VDMS Vector Store\n", - "from langchain_community.vectorstores.vdms import VDMS_Client\n", - "\n", - "vdms_client = VDMS_Client(port=55559)" - ] - }, { "cell_type": "markdown", "id": "2498a0a1", @@ -67,20 +33,20 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "febbc459-ebba-4c1a-a52b-fed7731593f8", "metadata": {}, "outputs": [], "source": [ - "! pip install --quiet -U vdms langchain-experimental\n", + "! pip install --quiet -U langchain-vdms langchain-experimental langchain-ollama\n", "\n", "# lock to 0.10.19 due to a persistent bug in more recent versions\n", - "! pip install --quiet pdf2image \"unstructured[all-docs]==0.10.19\" pillow pydantic lxml open_clip_torch" + "! pip install --quiet pdf2image \"unstructured[all-docs]==0.10.19\" \"onnxruntime==1.17.0\" pillow pydantic lxml open_clip_torch" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "78ac6543", "metadata": {}, "outputs": [], @@ -89,6 +55,40 @@ "# load_dotenv(find_dotenv(), override=True);" ] }, + { + "cell_type": "markdown", + "id": "e5c8916e", + "metadata": {}, + "source": [ + "## Start VDMS Server\n", + "\n", + "Let's start a VDMS docker using port 55559 instead of default 55555. \n", + "Keep note of the port and hostname as this is needed for the vector store as it uses the VDMS Python client to connect to the server." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1e6e2c15", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a701e5ac3523006e9540b5355e2d872d5d78383eab61562a675d5b9ac21fde65\n" + ] + } + ], + "source": [ + "! docker run --rm -d -p 55559:55555 --name vdms_rag_nb intellabs/vdms:latest\n", + "\n", + "# Connect to VDMS Vector Store\n", + "from langchain_vdms.vectorstores import VDMS_Client\n", + "\n", + "vdms_client = VDMS_Client(port=55559)" + ] + }, { "cell_type": "markdown", "id": "1e94b3fb-8e3e-4736-be0a-ad881626c7bd", @@ -115,11 +115,12 @@ "import requests\n", "\n", "# Folder to store pdf and extracted images\n", - "datapath = Path(\"./data/multimodal_files\").resolve()\n", + "base_datapath = Path(\"./data/multimodal_files\").resolve()\n", + "datapath = base_datapath / \"images\"\n", "datapath.mkdir(parents=True, exist_ok=True)\n", "\n", "pdf_url = \"https://www.loc.gov/lcm/pdf/LCM_2020_1112.pdf\"\n", - "pdf_path = str(datapath / pdf_url.split(\"/\")[-1])\n", + "pdf_path = str(base_datapath / pdf_url.split(\"/\")[-1])\n", "with open(pdf_path, \"wb\") as f:\n", " f.write(requests.get(pdf_url).content)" ] @@ -185,8 +186,8 @@ "source": [ "import os\n", "\n", - "from langchain_community.vectorstores import VDMS\n", "from langchain_experimental.open_clip import OpenCLIPEmbeddings\n", + "from langchain_vdms import VDMS\n", "\n", "# Create VDMS\n", "vectorstore = VDMS(\n", @@ -312,10 +313,10 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain_community.llms.ollama import Ollama\n", - "from langchain_core.messages import HumanMessage\n", + "from langchain_core.messages import HumanMessage, SystemMessage\n", "from langchain_core.output_parsers import StrOutputParser\n", "from langchain_core.runnables import RunnableLambda, RunnablePassthrough\n", + "from langchain_ollama.llms import OllamaLLM\n", "\n", "\n", "def prompt_func(data_dict):\n", @@ -340,8 +341,8 @@ " \"As an expert art critic and historian, your task is to analyze and interpret images, \"\n", " \"considering their historical and cultural significance. Alongside the images, you will be \"\n", " \"provided with related text to offer context. Both will be retrieved from a vectorstore based \"\n", - " \"on user-input keywords. Please convert answers to english and use your extensive knowledge \"\n", - " \"and analytical skills to provide a comprehensive summary that includes:\\n\"\n", + " \"on user-input keywords. Please use your extensive knowledge and analytical skills to provide a \"\n", + " \"comprehensive summary that includes:\\n\"\n", " \"- A detailed description of the visual elements in the image.\\n\"\n", " \"- The historical and cultural context of the image.\\n\"\n", " \"- An interpretation of the image's symbolism and meaning.\\n\"\n", @@ -359,7 +360,7 @@ " \"\"\"Multi-modal RAG chain\"\"\"\n", "\n", " # Multi-modal LLM\n", - " llm_model = Ollama(\n", + " llm_model = OllamaLLM(\n", " verbose=True, temperature=0.5, model=\"llava\", base_url=\"http://localhost:11434\"\n", " )\n", "\n", @@ -419,6 +420,121 @@ }, "metadata": {}, "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "© 2017 LARRY D. MOORE\n", + "\n", + "contemporary criticism of the less-than- thoughtful circumstances under which Lange photographed Thomson, the picture’s power to engage has not diminished. Artists in other countries have appropriated the image, changing the mother’s features into those of other ethnicities, but keeping her expression and the positions of her clinging children. Long after anyone could help the Thompson family, this picture has resonance in another time of national crisis, unemployment and food shortages.\n", + "\n", + "A striking, but very different picture is a 1900 portrait of the legendary Hin-mah-too-yah- lat-kekt (Chief Joseph) of the Nez Percé people. The Bureau of American Ethnology in Washington, D.C., regularly arranged for its photographer, De Lancey Gill, to photograph Native American delegations that came to the capital to confer with officials about tribal needs and concerns. Although Gill described Chief Joseph as having “an air of gentleness and quiet reserve,” the delegate skeptically appraises the photographer, which is not surprising given that the United States broke five treaties with Chief Joseph and his father between 1855 and 1885.\n", + "\n", + "More than a glance, second looks may reveal new knowledge into complex histories.\n", + "\n", + "Anne Wilkes Tucker is the photography curator emeritus of the Museum of Fine Arts, Houston and curator of the “Not an Ostrich” exhibition.\n", + "\n", + "28\n", + "\n", + "28 LIBRARY OF CONGRESS MAGAZINE\n", + "\n", + "LIBRARY OF CONGRESS MAGAZINE\n", + "THEYRE WILLING TO HAVE MEENTERTAIN THEM DURING THE DAY,BUT AS SOON AS IT STARTSGETTING DARK, THEY ALLGO OFF, AND LEAVE ME! \n", + "ROSA PARKS: IN HER OWN WORDS\n", + "\n", + "COMIC ART: 120 YEARS OF PANELS AND PAGES\n", + "\n", + "SHALL NOT BE DENIED: WOMEN FIGHT FOR THE VOTE\n", + "\n", + "More information loc.gov/exhibits\n", + "Nuestra Sefiora de las Iguanas\n", + "\n", + "Graciela Iturbide’s 1979 portrait of Zobeida Díaz in the town of Juchitán in southeastern Mexico conveys the strength of women and reflects their important contributions to the economy. Díaz, a merchant, was selling iguanas to cook and eat, carrying them on her head, as is customary.\n", + "\n", + "GRACIELA ITURBIDE. “NUESTRA SEÑORA DE LAS IGUANAS.” 1979. GELATIN SILVER PRINT. © GRACIELA ITURBIDE, USED BY PERMISSION. PRINTS AND PHOTOGRAPHS DIVISION.\n", + "\n", + "Iturbide requested permission to take a photograph, but this proved challenging because the iguanas were constantly moving, causing Díaz to laugh. The result, however, was a brilliant portrait that the inhabitants of Juchitán claimed with pride. They have reproduced it on posters and erected a statue honoring Díaz and her iguanas. The photo now appears throughout the world, inspiring supporters of feminism, women’s rights and gender equality.\n", + "\n", + "—Adam Silvia is a curator in the Prints and Photographs Division.\n", + "\n", + "6\n", + "\n", + "6 LIBRARY OF CONGRESS MAGAZINE\n", + "\n", + "LIBRARY OF CONGRESS MAGAZINE\n", + "\n", + "‘Migrant Mother’ is Florence Owens Thompson\n", + "\n", + "The iconic portrait that became the face of the Great Depression is also the most famous photograph in the collections of the Library of Congress.\n", + "\n", + "The Library holds the original source of the photo — a nitrate negative measuring 4 by 5 inches. Do you see a faint thumb in the bottom right? The photographer, Dorothea Lange, found the thumb distracting and after a few years had the negative altered to make the thumb almost invisible. Lange’s boss at the Farm Security Administration, Roy Stryker, criticized her action because altering a negative undermines the credibility of a documentary photo.\n", + "Shrimp Picker\n", + "\n", + "The photos and evocative captions of Lewis Hine served as source material for National Child Labor Committee reports and exhibits exposing abusive child labor practices in the United States in the first decades of the 20th century.\n", + "\n", + "LEWIS WICKES HINE. “MANUEL, THE YOUNG SHRIMP-PICKER, FIVE YEARS OLD, AND A MOUNTAIN OF CHILD-LABOR OYSTER SHELLS BEHIND HIM. HE WORKED LAST YEAR. UNDERSTANDS NOT A WORD OF ENGLISH. DUNBAR, LOPEZ, DUKATE COMPANY. LOCATION: BILOXI, MISSISSIPPI.” FEBRUARY 1911. NATIONAL CHILD LABOR COMMITTEE COLLECTION. PRINTS AND PHOTOGRAPHS DIVISION.\n", + "\n", + "For 15 years, Hine\n", + "\n", + "crisscrossed the country, documenting the practices of the worst offenders. His effective use of photography made him one of the committee's greatest publicists in the campaign for legislation to ban child labor.\n", + "\n", + "Hine was a master at taking photos that catch attention and convey a message and, in this photo, he framed Manuel in a setting that drove home the boy’s small size and unsafe environment.\n", + "\n", + "Captions on photos of other shrimp pickers emphasized their long working hours as well as one hazard of the job: The acid from the shrimp made pickers’ hands sore and “eats the shoes off your feet.”\n", + "\n", + "Such images alerted viewers to all that workers, their families and the nation sacrificed when children were part of the labor force. The Library holds paper records of the National Child Labor Committee as well as over 5,000 photographs.\n", + "\n", + "—Barbara Natanson is head of the Reference Section in the Prints and Photographs Division.\n", + "\n", + "8\n", + "\n", + "LIBRARY OF CONGRESS MAGAZINE\n", + "\n", + "LIBRARY OF CONGRESS MAGAZINE\n", + "\n", + "Intergenerational Portrait\n", + "\n", + "Raised on the Apsáalooke (Crow) reservation in Montana, photographer Wendy Red Star created her “Apsáalooke Feminist” self-portrait series with her daughter Beatrice. With a dash of wry humor, mother and daughter are their own first-person narrators.\n", + "\n", + "Red Star explains the significance of their appearance: “The dress has power: You feel strong and regal wearing it. In my art, the elk tooth dress specifically symbolizes Crow womanhood and the matrilineal line connecting me to my ancestors. As a mother, I spend hours searching for the perfect elk tooth dress materials to make a prized dress for my daughter.”\n", + "\n", + "In a world that struggles with cultural identities, this photograph shows us the power and beauty of blending traditional and contemporary styles.\n", + "‘American Gothic’ Product #216040262 Price: $24\n", + "\n", + "U.S. Capitol at Night Product #216040052 Price: $24\n", + "\n", + "Good Reading Ahead Product #21606142 Price: $24\n", + "\n", + "Gordon Parks created an iconic image with this 1942 photograph of cleaning woman Ella Watson.\n", + "\n", + "Snow blankets the U.S. Capitol in this classic image by Ernest L. Crandall.\n", + "\n", + "Start your new year out right with a poster promising good reading for months to come.\n", + "\n", + "▪ Order online: loc.gov/shop ▪ Order by phone: 888.682.3557\n", + "\n", + "26\n", + "\n", + "LIBRARY OF CONGRESS MAGAZINE\n", + "\n", + "LIBRARY OF CONGRESS MAGAZINE\n", + "\n", + "SUPPORT\n", + "\n", + "A PICTURE OF PHILANTHROPY Annenberg Foundation Gives $1 Million and a Photographic Collection to the Library.\n", + "\n", + "A major gift by Wallis Annenberg and the Annenberg Foundation in Los Angeles will support the effort to reimagine the visitor experience at the Library of Congress. The foundation also is donating 1,000 photographic prints from its Annenberg Space for Photography exhibitions to the Library.\n", + "\n", + "The Library is pursuing a multiyear plan to transform the experience of its nearly 2 million annual visitors, share more of its treasures with the public and show how Library collections connect with visitors’ own creativity and research. The project is part of a strategic plan established by Librarian of Congress Carla Hayden to make the Library more user-centered for Congress, creators and learners of all ages.\n", + "\n", + "A 2018 exhibition at the Annenberg Space for Photography in Los Angeles featured over 400 photographs from the Library. The Library is planning a future photography exhibition, based on the Annenberg-curated show, along with a documentary film on the Library and its history, produced by the Annenberg Space for Photography.\n", + "\n", + "“The nation’s library is honored to have the strong support of Wallis Annenberg and the Annenberg Foundation as we enhance the experience for our visitors,” Hayden said. “We know that visitors will find new connections to the Library through the incredible photography collections and countless other treasures held here to document our nation’s history and creativity.”\n", + "\n", + "To enhance the Library’s holdings, the foundation is giving the Library photographic prints for long-term preservation from 10 other exhibitions hosted at the Annenberg Space for Photography. The Library holds one of the world’s largest photography collections, with about 14 million photos and over 1 million images digitized and available online.\n", + "18 LIBRARY OF CONGRESS MAGAZINE\n" + ] } ], "source": [ @@ -461,10 +577,17 @@ "name": "stdout", "output_type": "stream", "text": [ - " The image depicts a woman with several children. The woman appears to be of Cherokee heritage, as suggested by the text provided. The image is described as having been initially regretted by the subject, Florence Owens Thompson, due to her feeling that it did not accurately represent her leadership qualities.\n", - "The historical and cultural context of the image is tied to the Great Depression and the Dust Bowl, both of which affected the Cherokee people in Oklahoma. The photograph was taken during this period, and its subject, Florence Owens Thompson, was a leader within her community who worked tirelessly to help those affected by these crises.\n", - "The image's symbolism and meaning can be interpreted as a representation of resilience and strength in the face of adversity. The woman is depicted with multiple children, which could signify her role as a caregiver and protector during difficult times.\n", - "Connections between the image and the related text include Florence Owens Thompson's leadership qualities and her regretted feelings about the photograph. Additionally, the mention of Dorothea Lange, the photographer who took this photo, ties the image to its historical context and the broader narrative of the Great Depression and Dust Bowl in Oklahoma. \n" + " The image is a black and white photograph by Dorothea Lange titled \"Destitute Pea Pickers in California. Mother of Seven Children. Age Thirty-Two. Nipomo, California.\" It was taken in March 1936 as part of the Farm Security Administration-Office of War Information Collection.\n", + "\n", + "The photograph features a woman with seven children, who appear to be in a state of poverty and hardship. The woman is seated, looking directly at the camera, while three of her children are standing behind her. They all seem to be dressed in ragged clothing, indicative of their impoverished condition.\n", + "\n", + "The historical context of this image is related to the Great Depression, which was a period of economic hardship in the United States that lasted from 1929 to 1939. During this time, many people struggled to make ends meet, and poverty was widespread. This photograph captures the plight of one such family during this difficult period.\n", + "\n", + "The symbolism of the image is multifaceted. The woman's direct gaze at the camera can be seen as a plea for help or an expression of desperation. The ragged clothing of the children serves as a stark reminder of the poverty and hardship experienced by many during this time.\n", + "\n", + "In terms of connections to the related text, it is mentioned that Florence Owens Thompson, the woman in the photograph, initially regretted having her picture taken. However, she later came to appreciate the importance of the image as a representation of the struggles faced by many during the Great Depression. The mention of Helena Zinkham suggests that she may have played a role in the creation or distribution of this photograph.\n", + "\n", + "Overall, this image is a powerful depiction of poverty and hardship during the Great Depression, capturing the resilience and struggles of one family amidst difficult times. \n" ] } ], @@ -491,11 +614,17 @@ "source": [ "! docker kill vdms_rag_nb" ] + }, + { + "cell_type": "markdown", + "id": "fe4a98ee", + "metadata": {}, + "source": [] } ], "metadata": { "kernelspec": { - "display_name": ".langchain-venv", + "display_name": ".test-venv", "language": "python", "name": "python3" }, @@ -509,7 +638,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.10" } }, "nbformat": 4, diff --git a/cookbook/visual_RAG_vdms.ipynb b/cookbook/visual_RAG_vdms.ipynb index d0d87185d3a..740831c8adb 100644 --- a/cookbook/visual_RAG_vdms.ipynb +++ b/cookbook/visual_RAG_vdms.ipynb @@ -26,7 +26,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "2e44b44201c8778b462342ac97f5ccf05a4e02aa8a04505ecde97bf20dcc4cbb\n" + "76e78b89cee4d6d31154823f93592315df79c28410dfbfc87c9f70cbfdfa648b\n" ] } ], @@ -49,7 +49,7 @@ "metadata": {}, "outputs": [], "source": [ - "! pip install --quiet -U vdms langchain-experimental sentence-transformers opencv-python open_clip_torch torch accelerate" + "! pip install --quiet -U langchain-vdms langchain-experimental sentence-transformers opencv-python open_clip_torch torch accelerate" ] }, { @@ -63,7 +63,16 @@ "cell_type": "code", "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/data1/cwlacewe/apps/cwlacewe_langchain/.langchain-venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "import json\n", "import os\n", @@ -80,10 +89,10 @@ "from langchain_community.embeddings.sentence_transformer import (\n", " SentenceTransformerEmbeddings,\n", ")\n", - "from langchain_community.vectorstores.vdms import VDMS, VDMS_Client\n", "from langchain_core.callbacks.manager import CallbackManagerForLLMRun\n", "from langchain_core.runnables import ConfigurableField\n", "from langchain_experimental.open_clip import OpenCLIPEmbeddings\n", + "from langchain_vdms.vectorstores import VDMS, VDMS_Client\n", "from transformers import (\n", " AutoModelForCausalLM,\n", " AutoTokenizer,\n", @@ -363,7 +372,7 @@ "\t\tThere are 2 shoppers in this video. Shopper 1 is wearing a plaid shirt and a spectacle. Shopper 2 who is not completely captured in the frame seems to wear a black shirt and is moving away with his back turned towards the camera. There is a shelf towards the right of the camera frame. Shopper 2 is hanging an item back to a hanger and then quickly walks away in a similar fashion as shopper 2. Contents of the nearer side of the shelf with respect to camera seems to be camping lanterns and cleansing agents, arranged at the top. In the middle part of the shelf, various tools including grommets, a pocket saw, candles, and other helpful camping items can be observed. Midway through the shelf contains items which appear to be steel containers and items made up of plastic with red, green, orange, and yellow colors, while those at the bottom are packed in cardboard boxes. Contents at the farther part of the shelf are well stocked and organized but are not glaringly visible.\n", "\n", "\tMetadata:\n", - "\t\t{'fps': 24.0, 'id': 'c6e5f894-b905-46f5-ac9e-4487a9235561', 'total_frames': 120.0, 'video': 'clip16.mp4'}\n", + "\t\t{'fps': 24.0, 'total_frames': 120.0, 'video': 'clip16.mp4'}\n", "Retrieved Top matching video!\n", "\n", "\n" @@ -392,18 +401,12 @@ "metadata": {}, "outputs": [ { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3edf8783e114487ca490d8dec5c46884", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Loading checkpoint shards: 0%| | 0/2 [00:00\n" + "I see a person standing in front of a well-stocked shelf, they are wearing a light-colored shirt and glasses, and they have a red shopping basket in their left hand. They are leaning forward and picking up an item from the shelf with their right hand. The item is packaged in a blue-green box. Based on the available information, I cannot confirm whether the basket is empty or contains items. However, the rest of the\n" ] } ], @@ -655,7 +658,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": ".langchain-venv", "language": "python", "name": "python3" }, @@ -669,7 +672,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.10" } }, "nbformat": 4, diff --git a/docs/docs/integrations/providers/vdms.mdx b/docs/docs/integrations/providers/vdms.mdx index 2ed0ea4455d..b1b02217741 100644 --- a/docs/docs/integrations/providers/vdms.mdx +++ b/docs/docs/integrations/providers/vdms.mdx @@ -9,53 +9,35 @@ ### Install Client ```bash -pip install vdms +pip install langchain-vdms ``` ### Install Database There are two ways to get started with VDMS: -#### Install VDMS on your local machine via docker -```bash - docker run -d -p 55555:55555 intellabs/vdms:latest -``` - -#### Install VDMS directly on your local machine -Please see [installation instructions](https://github.com/IntelLabs/vdms/blob/master/INSTALL.md). +1. Install VDMS on your local machine via docker + ```bash + docker run -d -p 55555:55555 intellabs/vdms:latest + ``` +2. Install VDMS directly on your local machine. Please see +[installation instructions](https://github.com/IntelLabs/vdms/blob/master/INSTALL.md). ## VectorStore -The vector store is a simple wrapper around VDMS. It provides a simple interface to store and retrieve data. +To import this vectorstore: ```python -from langchain_community.document_loaders import TextLoader -from langchain.text_splitter import CharacterTextSplitter - -loader = TextLoader("./state_of_the_union.txt") -documents = loader.load() -text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0) -docs = text_splitter.split_documents(documents) - -from langchain_community.vectorstores import VDMS -from langchain_community.vectorstores.vdms import VDMS_Client -from langchain_huggingface import HuggingFaceEmbeddings - -client = VDMS_Client("localhost", 55555) -model_name = "sentence-transformers/all-mpnet-base-v2" -vectorstore = VDMS.from_documents( - docs, - client=client, - collection_name="langchain-demo", - embedding_function=HuggingFaceEmbeddings(model_name=model_name), - engine="FaissFlat" - distance_strategy="L2", -) - -query = "What did the president say about Ketanji Brown Jackson" -results = vectorstore.similarity_search(query) +from langchain_vdms import VDMS +from langchain_vdms.vectorstores import VDMS ``` -For a more detailed walkthrough of the VDMS wrapper, see [this notebook](/docs/integrations/vectorstores/vdms) +To import the VDMS Client connector: + +```python +from langchain_vdms.vectorstores import VDMS_Client +``` + +For a more detailed walkthrough of the VDMS wrapper, see [this guide](/docs/integrations/vectorstores/vdms). diff --git a/docs/docs/integrations/vectorstores/vdms.ipynb b/docs/docs/integrations/vectorstores/vdms.ipynb index dd3adceab35..f8fe1b69c01 100644 --- a/docs/docs/integrations/vectorstores/vdms.ipynb +++ b/docs/docs/integrations/vectorstores/vdms.ipynb @@ -1,1156 +1,590 @@ { "cells": [ + { + "cell_type": "raw", + "id": "1957f5cb", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: VDMS\n", + "---" + ] + }, { "cell_type": "markdown", - "id": "683953b3", + "id": "ef1f0986", "metadata": {}, "source": [ "# Intel's Visual Data Management System (VDMS)\n", "\n", - ">Intel's [VDMS](https://github.com/IntelLabs/vdms) is a storage solution for efficient access of big-”visual”-data that aims to achieve cloud scale by searching for relevant visual data via visual metadata stored as a graph and enabling machine friendly enhancements to visual data for faster access. VDMS is licensed under MIT.\n", + "This notebook covers how to get started with VDMS as a vector store.\n", + "\n", + ">Intel's [Visual Data Management System (VDMS)](https://github.com/IntelLabs/vdms) is a storage solution for efficient access of big-”visual”-data that aims to achieve cloud scale by searching for relevant visual data via visual metadata stored as a graph and enabling machine friendly enhancements to visual data for faster access. VDMS is licensed under MIT. For more information on `VDMS`, visit [this page](https://github.com/IntelLabs/vdms/wiki), and find the LangChain API reference [here](https://python.langchain.com/api_reference/community/vectorstores/langchain_community.vectorstores.vdms.VDMS.html).\n", "\n", "VDMS supports:\n", "* K nearest neighbor search\n", "* Euclidean distance (L2) and inner product (IP)\n", - "* Libraries for indexing and computing distances: TileDBDense, TileDBSparse, FaissFlat (Default), FaissIVFFlat, Flinng\n", + "* Libraries for indexing and computing distances: FaissFlat (Default), FaissHNSWFlat, FaissIVFFlat, Flinng, TileDBDense, TileDBSparse\n", "* Embeddings for text, images, and video\n", - "* Vector and metadata searches\n", + "* Vector and metadata searches" + ] + }, + { + "cell_type": "markdown", + "id": "36fdc060", + "metadata": {}, + "source": [ + "## Setup\n", "\n", - "VDMS has server and client components. To setup the server, see the [installation instructions](https://github.com/IntelLabs/vdms/blob/master/INSTALL.md) or use the [docker image](https://hub.docker.com/r/intellabs/vdms).\n", - "\n", - "This notebook shows how to use VDMS as a vector store using the docker image.\n", - "\n", - "You'll need to install `langchain-community` with `pip install -qU langchain-community` to use this integration\n", - "\n", - "To begin, install the Python packages for the VDMS client and Sentence Transformers:" + "To access VDMS vector stores you'll need to install the `langchain-vdms` integration package and deploy a VDMS server via the publicly available Docker image.\n", + "For simplicity, this notebook will deploy a VDMS server on local host using port 55555." ] }, { "cell_type": "code", "execution_count": 1, - "id": "2167badd", - "metadata": {}, + "id": "64e28aa6", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Note: you may need to restart the kernel to use updated packages.\n" + "Note: you may need to restart the kernel to use updated packages.\n", + "c464076e292613df27241765184a673b00c775cecb7792ef058591c2cbf0bde8\n" ] } ], "source": [ - "# Pip install necessary package\n", - "%pip install --upgrade --quiet pip vdms sentence-transformers langchain-huggingface > /dev/null" + "%pip install -qU \"langchain-vdms>=0.1.3\"\n", + "!docker run --no-healthcheck --rm -d -p 55555:55555 --name vdms_vs_test_nb intellabs/vdms:latest\n", + "!sleep 5" ] }, { "cell_type": "markdown", - "id": "af2b4512", + "id": "9695dee7", "metadata": {}, "source": [ - "## Start VDMS Server\n", - "Here we start the VDMS server with port 55555." + "### Credentials\n" + ] + }, + { + "cell_type": "markdown", + "id": "7f98392b", + "metadata": {}, + "source": [ + "You can use `VDMS` without any credentials.\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:" ] }, { "cell_type": "code", "execution_count": 2, - "id": "4b1537c7", + "id": "e7b6a6e0", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "b26917ffac236673ef1d035ab9c91fe999e29c9eb24aa6c7103d7baa6bf2f72d\n" - ] - } - ], + "outputs": [], "source": [ - "!docker run --rm -d -p 55555:55555 --name vdms_vs_test_nb intellabs/vdms:latest" + "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n", + "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"" ] }, { "cell_type": "markdown", - "id": "2b5ffbf8", + "id": "93df377e", "metadata": {}, "source": [ - "## Basic Example (using the Docker Container)\n", + "## Initialization\n", + "Use the VDMS Client to connect to a VDMS vectorstore using FAISS IndexFlat indexing (default) and Euclidean distance (default) as the distance metric for similarity search.\n", "\n", - "In this basic example, we demonstrate adding documents into VDMS and using it as a vector database.\n", + "import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n", "\n", - "You can run the VDMS Server in a Docker container separately to use with LangChain which connects to the server via the VDMS Python Client. \n", - "\n", - "VDMS has the ability to handle multiple collections of documents, but the LangChain interface expects one, so we need to specify the name of the collection . The default collection name used by LangChain is \"langchain\".\n" + "" ] }, { "cell_type": "code", - "execution_count": 3, - "id": "5201ba0c", + "execution_count": null, + "id": "01d25284", "metadata": {}, "outputs": [], "source": [ - "import time\n", - "import warnings\n", + "# | output: false\n", + "# | echo: false\n", "\n", - "warnings.filterwarnings(\"ignore\")\n", - "\n", - "from langchain_community.document_loaders.text import TextLoader\n", - "from langchain_community.vectorstores import VDMS\n", - "from langchain_community.vectorstores.vdms import VDMS_Client\n", + "! pip install -qU langchain-huggingface\n", "from langchain_huggingface import HuggingFaceEmbeddings\n", - "from langchain_text_splitters.character import CharacterTextSplitter\n", "\n", - "time.sleep(2)\n", - "DELIMITER = \"-\" * 50\n", - "\n", - "# Connect to VDMS Vector Store\n", - "vdms_client = VDMS_Client(host=\"localhost\", port=55555)" - ] - }, - { - "cell_type": "markdown", - "id": "935069bc", - "metadata": {}, - "source": [ - "Here are some helper functions for printing results." + "embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\")" ] }, { "cell_type": "code", "execution_count": 4, - "id": "e78814eb", - "metadata": {}, + "id": "dc37144c-208d-4ab3-9f3a-0407a69fe052", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "def print_document_details(doc):\n", - " print(f\"Content:\\n\\t{doc.page_content}\\n\")\n", - " print(\"Metadata:\")\n", - " for key, value in doc.metadata.items():\n", - " if value != \"Missing property\":\n", - " print(f\"\\t{key}:\\t{value}\")\n", + "from langchain_vdms.vectorstores import VDMS, VDMS_Client\n", "\n", + "collection_name = \"test_collection_faiss_L2\"\n", "\n", - "def print_results(similarity_results, score=True):\n", - " print(f\"{DELIMITER}\\n\")\n", - " if score:\n", - " for doc, score in similarity_results:\n", - " print(f\"Score:\\t{score}\\n\")\n", - " print_document_details(doc)\n", - " print(f\"{DELIMITER}\\n\")\n", - " else:\n", - " for doc in similarity_results:\n", - " print_document_details(doc)\n", - " print(f\"{DELIMITER}\\n\")\n", + "vdms_client = VDMS_Client(host=\"localhost\", port=55555)\n", "\n", - "\n", - "def print_response(list_of_entities):\n", - " for ent in list_of_entities:\n", - " for key, value in ent.items():\n", - " if value != \"Missing property\":\n", - " print(f\"\\n{key}:\\n\\t{value}\")\n", - " print(f\"{DELIMITER}\\n\")" + "vector_store = VDMS(\n", + " client=vdms_client,\n", + " embedding=embeddings,\n", + " collection_name=collection_name,\n", + " engine=\"FaissFlat\",\n", + " distance_strategy=\"L2\",\n", + ")" ] }, { "cell_type": "markdown", - "id": "88229867", + "id": "ac6071d4", "metadata": {}, "source": [ - "### Load Document and Obtain Embedding Function\n", - "Here we load the most recent State of the Union Address and split the document into chunks. \n", + "## Manage vector store\n", "\n", - "LangChain vector stores use a string/keyword `id` for bookkeeping documents. By default, `id` is a uuid but here we're defining it as an integer cast as a string. Additional metadata is also provided with the documents and the HuggingFaceEmbeddings are used for this example as the embedding function." + "### Add items to vector store" ] }, { "cell_type": "code", "execution_count": 5, - "id": "2ebfc16c", + "id": "17f5efc0", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "# Documents: 42\n", - "# Embedding Dimensions: 768\n" - ] + "data": { + "text/plain": [ + "['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "# load the document and split it into chunks\n", - "document_path = \"../../how_to/state_of_the_union.txt\"\n", - "raw_documents = TextLoader(document_path).load()\n", + "import logging\n", "\n", - "# split it into chunks\n", - "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", - "docs = text_splitter.split_documents(raw_documents)\n", - "ids = []\n", - "for doc_idx, doc in enumerate(docs):\n", - " ids.append(str(doc_idx + 1))\n", - " docs[doc_idx].metadata[\"id\"] = str(doc_idx + 1)\n", - " docs[doc_idx].metadata[\"page_number\"] = int(doc_idx + 1)\n", - " docs[doc_idx].metadata[\"president_included\"] = (\n", - " \"president\" in doc.page_content.lower()\n", - " )\n", - "print(f\"# Documents: {len(docs)}\")\n", + "logging.basicConfig()\n", + "logging.getLogger(\"langchain_vdms.vectorstores\").setLevel(logging.INFO)\n", "\n", + "from langchain_core.documents import Document\n", "\n", - "# create the open-source embedding function\n", - "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n", - "embedding = HuggingFaceEmbeddings(model_name=model_name)\n", - "print(\n", - " f\"# Embedding Dimensions: {len(embedding.embed_query('This is a test document.'))}\"\n", - ")" + "document_1 = Document(\n", + " page_content=\"I had chocolate chip pancakes and scrambled eggs for breakfast this morning.\",\n", + " metadata={\"source\": \"tweet\"},\n", + " id=1,\n", + ")\n", + "\n", + "document_2 = Document(\n", + " page_content=\"The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.\",\n", + " metadata={\"source\": \"news\"},\n", + " id=2,\n", + ")\n", + "\n", + "document_3 = Document(\n", + " page_content=\"Building an exciting new project with LangChain - come check it out!\",\n", + " metadata={\"source\": \"tweet\"},\n", + " id=3,\n", + ")\n", + "\n", + "document_4 = Document(\n", + " page_content=\"Robbers broke into the city bank and stole $1 million in cash.\",\n", + " metadata={\"source\": \"news\"},\n", + " id=4,\n", + ")\n", + "\n", + "document_5 = Document(\n", + " page_content=\"Wow! That was an amazing movie. I can't wait to see it again.\",\n", + " metadata={\"source\": \"tweet\"},\n", + " id=5,\n", + ")\n", + "\n", + "document_6 = Document(\n", + " page_content=\"Is the new iPhone worth the price? Read this review to find out.\",\n", + " metadata={\"source\": \"website\"},\n", + " id=6,\n", + ")\n", + "\n", + "document_7 = Document(\n", + " page_content=\"The top 10 soccer players in the world right now.\",\n", + " metadata={\"source\": \"website\"},\n", + " id=7,\n", + ")\n", + "\n", + "document_8 = Document(\n", + " page_content=\"LangGraph is the best framework for building stateful, agentic applications!\",\n", + " metadata={\"source\": \"tweet\"},\n", + " id=8,\n", + ")\n", + "\n", + "document_9 = Document(\n", + " page_content=\"The stock market is down 500 points today due to fears of a recession.\",\n", + " metadata={\"source\": \"news\"},\n", + " id=9,\n", + ")\n", + "\n", + "document_10 = Document(\n", + " page_content=\"I have a bad feeling I am going to get deleted :(\",\n", + " metadata={\"source\": \"tweet\"},\n", + " id=10,\n", + ")\n", + "\n", + "documents = [\n", + " document_1,\n", + " document_2,\n", + " document_3,\n", + " document_4,\n", + " document_5,\n", + " document_6,\n", + " document_7,\n", + " document_8,\n", + " document_9,\n", + " document_10,\n", + "]\n", + "\n", + "doc_ids = [str(i) for i in range(1, 11)]\n", + "vector_store.add_documents(documents=documents, ids=doc_ids)" ] }, { "cell_type": "markdown", - "id": "a6a596f0", + "id": "37f4ad56", "metadata": {}, "source": [ - "### Similarity Search using Faiss Flat and Euclidean Distance (Default)\n", - "\n", - "In this section, we add the documents to VDMS using FAISS IndexFlat indexing (default) and Euclidena distance (default) as the distance metric for simiarity search. We search for three documents (`k=3`) related to the query `What did the president say about Ketanji Brown Jackson`." + "If an id is provided multiple times, `add_documents` does not check whether the ids are unique. For this reason, use `upsert` to delete existing id entries prior to adding." ] }, { "cell_type": "code", "execution_count": 6, - "id": "1f3f43d4", + "id": "cb6a9f86", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "--------------------------------------------------\n", - "\n", - "Content:\n", - "\tTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", - "\n", - "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", - "\n", - "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", - "\n", - "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", - "\n", - "Metadata:\n", - "\tid:\t32\n", - "\tpage_number:\t32\n", - "\tpresident_included:\tTrue\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n", - "Content:\n", - "\tAs Frances Haugen, who is here with us tonight, has shown, we must hold social media platforms accountable for the national experiment they’re conducting on our children for profit. \n", - "\n", - "It’s time to strengthen privacy protections, ban targeted advertising to children, demand tech companies stop collecting personal data on our children. \n", - "\n", - "And let’s get all Americans the mental health services they need. More people they can turn to for help, and full parity between physical and mental health care. \n", - "\n", - "Third, support our veterans. \n", - "\n", - "Veterans are the best of us. \n", - "\n", - "I’ve always believed that we have a sacred obligation to equip all those we send to war and care for them and their families when they come home. \n", - "\n", - "My administration is providing assistance with job training and housing, and now helping lower-income veterans get VA care debt-free. \n", - "\n", - "Our troops in Iraq and Afghanistan faced many dangers.\n", - "\n", - "Metadata:\n", - "\tid:\t37\n", - "\tpage_number:\t37\n", - "\tpresident_included:\tFalse\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n", - "Content:\n", - "\tA former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n", - "\n", - "And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n", - "\n", - "We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \n", - "\n", - "We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n", - "\n", - "We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n", - "\n", - "We’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n", - "\n", - "Metadata:\n", - "\tid:\t33\n", - "\tpage_number:\t33\n", - "\tpresident_included:\tFalse\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n" - ] + "data": { + "text/plain": [ + "{'succeeded': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],\n", + " 'failed': []}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "# add data\n", - "collection_name = \"my_collection_faiss_L2\"\n", - "db_FaissFlat = VDMS.from_documents(\n", - " docs,\n", - " client=vdms_client,\n", - " ids=ids,\n", - " collection_name=collection_name,\n", - " embedding=embedding,\n", - ")\n", - "\n", - "# Query (No metadata filtering)\n", - "k = 3\n", - "query = \"What did the president say about Ketanji Brown Jackson\"\n", - "returned_docs = db_FaissFlat.similarity_search(query, k=k, filter=None)\n", - "print_results(returned_docs, score=False)" + "vector_store.upsert(documents, ids=doc_ids)" + ] + }, + { + "cell_type": "markdown", + "id": "c738c3e0", + "metadata": {}, + "source": [ + "### Update items in vector store\n" ] }, { "cell_type": "code", "execution_count": 7, - "id": "c2e36c18", + "id": "f0aa8b71", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--------------------------------------------------\n", - "\n", - "Content:\n", - "\tTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", - "\n", - "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", - "\n", - "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", - "\n", - "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", - "\n", - "Metadata:\n", - "\tid:\t32\n", - "\tpage_number:\t32\n", - "\tpresident_included:\tTrue\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n", - "Content:\n", - "\tAnd for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \n", - "\n", - "As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n", - "\n", - "While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \n", - "\n", - "And soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \n", - "\n", - "So tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \n", - "\n", - "First, beat the opioid epidemic.\n", - "\n", - "Metadata:\n", - "\tid:\t35\n", - "\tpage_number:\t35\n", - "\tpresident_included:\tTrue\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n", - "Content:\n", - "\tLast month, I announced our plan to supercharge \n", - "the Cancer Moonshot that President Obama asked me to lead six years ago. \n", - "\n", - "Our goal is to cut the cancer death rate by at least 50% over the next 25 years, turn more cancers from death sentences into treatable diseases. \n", - "\n", - "More support for patients and families. \n", - "\n", - "To get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for Health. \n", - "\n", - "It’s based on DARPA—the Defense Department project that led to the Internet, GPS, and so much more. \n", - "\n", - "ARPA-H will have a singular purpose—to drive breakthroughs in cancer, Alzheimer’s, diabetes, and more. \n", - "\n", - "A unity agenda for the nation. \n", - "\n", - "We can do this. \n", - "\n", - "My fellow Americans—tonight , we have gathered in a sacred space—the citadel of our democracy. \n", - "\n", - "In this Capitol, generation after generation, Americans have debated great questions amid great strife, and have done great things. \n", - "\n", - "We have fought for freedom, expanded liberty, defeated totalitarianism and terror.\n", - "\n", - "Metadata:\n", - "\tid:\t40\n", - "\tpage_number:\t40\n", - "\tpresident_included:\tTrue\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "# Query (with filtering)\n", - "k = 3\n", - "constraints = {\"page_number\": [\">\", 30], \"president_included\": [\"==\", True]}\n", - "query = \"What did the president say about Ketanji Brown Jackson\"\n", - "returned_docs = db_FaissFlat.similarity_search(query, k=k, filter=constraints)\n", - "print_results(returned_docs, score=False)" - ] - }, - { - "cell_type": "markdown", - "id": "92ab3370", - "metadata": {}, - "source": [ - "### Similarity Search using Faiss IVFFlat and Inner Product (IP) Distance\n", - "\n", - "In this section, we add the documents to VDMS using Faiss IndexIVFFlat indexing and IP as the distance metric for similarity search. We search for three documents (`k=3`) related to the query `What did the president say about Ketanji Brown Jackson` and also return the score along with the document.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "78f502cf", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--------------------------------------------------\n", - "\n", - "Score:\t1.2032090425\n", - "\n", - "Content:\n", - "\tTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", - "\n", - "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", - "\n", - "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", - "\n", - "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", - "\n", - "Metadata:\n", - "\tid:\t32\n", - "\tpage_number:\t32\n", - "\tpresident_included:\tTrue\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n", - "Score:\t1.4952471256\n", - "\n", - "Content:\n", - "\tAs Frances Haugen, who is here with us tonight, has shown, we must hold social media platforms accountable for the national experiment they’re conducting on our children for profit. \n", - "\n", - "It’s time to strengthen privacy protections, ban targeted advertising to children, demand tech companies stop collecting personal data on our children. \n", - "\n", - "And let’s get all Americans the mental health services they need. More people they can turn to for help, and full parity between physical and mental health care. \n", - "\n", - "Third, support our veterans. \n", - "\n", - "Veterans are the best of us. \n", - "\n", - "I’ve always believed that we have a sacred obligation to equip all those we send to war and care for them and their families when they come home. \n", - "\n", - "My administration is providing assistance with job training and housing, and now helping lower-income veterans get VA care debt-free. \n", - "\n", - "Our troops in Iraq and Afghanistan faced many dangers.\n", - "\n", - "Metadata:\n", - "\tid:\t37\n", - "\tpage_number:\t37\n", - "\tpresident_included:\tFalse\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n", - "Score:\t1.5008399487\n", - "\n", - "Content:\n", - "\tA former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n", - "\n", - "And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n", - "\n", - "We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \n", - "\n", - "We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n", - "\n", - "We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n", - "\n", - "We’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n", - "\n", - "Metadata:\n", - "\tid:\t33\n", - "\tpage_number:\t33\n", - "\tpresident_included:\tFalse\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n" - ] - } - ], - "source": [ - "db_FaissIVFFlat = VDMS.from_documents(\n", - " docs,\n", - " client=vdms_client,\n", - " ids=ids,\n", - " collection_name=\"my_collection_FaissIVFFlat_IP\",\n", - " embedding=embedding,\n", - " engine=\"FaissIVFFlat\",\n", - " distance_strategy=\"IP\",\n", - ")\n", - "# Query\n", - "k = 3\n", - "query = \"What did the president say about Ketanji Brown Jackson\"\n", - "docs_with_score = db_FaissIVFFlat.similarity_search_with_score(query, k=k, filter=None)\n", - "print_results(docs_with_score)" - ] - }, - { - "cell_type": "markdown", - "id": "e66d9125", - "metadata": {}, - "source": [ - "### Similarity Search using FLINNG and IP Distance\n", - "\n", - "In this section, we add the documents to VDMS using Filters to Identify Near-Neighbor Groups (FLINNG) indexing and IP as the distance metric for similarity search. We search for three documents (`k=3`) related to the query `What did the president say about Ketanji Brown Jackson` and also return the score along with the document." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "add81beb", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--------------------------------------------------\n", - "\n", - "Score:\t1.2032090425\n", - "\n", - "Content:\n", - "\tTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", - "\n", - "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", - "\n", - "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", - "\n", - "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", - "\n", - "Metadata:\n", - "\tid:\t32\n", - "\tpage_number:\t32\n", - "\tpresident_included:\tTrue\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n", - "Score:\t1.4952471256\n", - "\n", - "Content:\n", - "\tAs Frances Haugen, who is here with us tonight, has shown, we must hold social media platforms accountable for the national experiment they’re conducting on our children for profit. \n", - "\n", - "It’s time to strengthen privacy protections, ban targeted advertising to children, demand tech companies stop collecting personal data on our children. \n", - "\n", - "And let’s get all Americans the mental health services they need. More people they can turn to for help, and full parity between physical and mental health care. \n", - "\n", - "Third, support our veterans. \n", - "\n", - "Veterans are the best of us. \n", - "\n", - "I’ve always believed that we have a sacred obligation to equip all those we send to war and care for them and their families when they come home. \n", - "\n", - "My administration is providing assistance with job training and housing, and now helping lower-income veterans get VA care debt-free. \n", - "\n", - "Our troops in Iraq and Afghanistan faced many dangers.\n", - "\n", - "Metadata:\n", - "\tid:\t37\n", - "\tpage_number:\t37\n", - "\tpresident_included:\tFalse\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n", - "Score:\t1.5008399487\n", - "\n", - "Content:\n", - "\tA former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n", - "\n", - "And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n", - "\n", - "We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \n", - "\n", - "We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n", - "\n", - "We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n", - "\n", - "We’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n", - "\n", - "Metadata:\n", - "\tid:\t33\n", - "\tpage_number:\t33\n", - "\tpresident_included:\tFalse\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n" - ] - } - ], - "source": [ - "db_Flinng = VDMS.from_documents(\n", - " docs,\n", - " client=vdms_client,\n", - " ids=ids,\n", - " collection_name=\"my_collection_Flinng_IP\",\n", - " embedding=embedding,\n", - " engine=\"Flinng\",\n", - " distance_strategy=\"IP\",\n", - ")\n", - "# Query\n", - "k = 3\n", - "query = \"What did the president say about Ketanji Brown Jackson\"\n", - "docs_with_score = db_Flinng.similarity_search_with_score(query, k=k, filter=None)\n", - "print_results(docs_with_score)" - ] - }, - { - "cell_type": "markdown", - "id": "a5984766", - "metadata": {}, - "source": [ - "### Similarity Search using TileDBDense and Euclidean Distance\n", - "\n", - "In this section, we add the documents to VDMS using TileDB Dense indexing and L2 as the distance metric for similarity search. We search for three documents (`k=3`) related to the query `What did the president say about Ketanji Brown Jackson` and also return the score along with the document.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "3001ba6e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--------------------------------------------------\n", - "\n", - "Score:\t1.2032090425\n", - "\n", - "Content:\n", - "\tTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", - "\n", - "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", - "\n", - "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", - "\n", - "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", - "\n", - "Metadata:\n", - "\tid:\t32\n", - "\tpage_number:\t32\n", - "\tpresident_included:\tTrue\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n", - "Score:\t1.4952471256\n", - "\n", - "Content:\n", - "\tAs Frances Haugen, who is here with us tonight, has shown, we must hold social media platforms accountable for the national experiment they’re conducting on our children for profit. \n", - "\n", - "It’s time to strengthen privacy protections, ban targeted advertising to children, demand tech companies stop collecting personal data on our children. \n", - "\n", - "And let’s get all Americans the mental health services they need. More people they can turn to for help, and full parity between physical and mental health care. \n", - "\n", - "Third, support our veterans. \n", - "\n", - "Veterans are the best of us. \n", - "\n", - "I’ve always believed that we have a sacred obligation to equip all those we send to war and care for them and their families when they come home. \n", - "\n", - "My administration is providing assistance with job training and housing, and now helping lower-income veterans get VA care debt-free. \n", - "\n", - "Our troops in Iraq and Afghanistan faced many dangers.\n", - "\n", - "Metadata:\n", - "\tid:\t37\n", - "\tpage_number:\t37\n", - "\tpresident_included:\tFalse\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n", - "Score:\t1.5008399487\n", - "\n", - "Content:\n", - "\tA former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n", - "\n", - "And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n", - "\n", - "We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \n", - "\n", - "We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n", - "\n", - "We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n", - "\n", - "We’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n", - "\n", - "Metadata:\n", - "\tid:\t33\n", - "\tpage_number:\t33\n", - "\tpresident_included:\tFalse\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n" - ] - } - ], - "source": [ - "db_tiledbD = VDMS.from_documents(\n", - " docs,\n", - " client=vdms_client,\n", - " ids=ids,\n", - " collection_name=\"my_collection_tiledbD_L2\",\n", - " embedding=embedding,\n", - " engine=\"TileDBDense\",\n", - " distance_strategy=\"L2\",\n", + "updated_document_1 = Document(\n", + " page_content=\"I had chocolate chip pancakes and fried eggs for breakfast this morning.\",\n", + " metadata={\"source\": \"tweet\"},\n", + " id=1,\n", ")\n", "\n", - "k = 3\n", - "query = \"What did the president say about Ketanji Brown Jackson\"\n", - "docs_with_score = db_tiledbD.similarity_search_with_score(query, k=k, filter=None)\n", - "print_results(docs_with_score)" - ] - }, - { - "cell_type": "markdown", - "id": "9ed3ec50", - "metadata": {}, - "source": [ - "### Update and Delete\n", - "\n", - "While building toward a real application, you want to go beyond adding data, and also update and delete data.\n", - "\n", - "Here is a basic example showing how to do so. First, we will update the metadata for the document most relevant to the query by adding a date. " - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "81a02810", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Original metadata: \n", - "\t{'id': '32', 'page_number': 32, 'president_included': True, 'source': '../../how_to/state_of_the_union.txt'}\n", - "new metadata: \n", - "\t{'id': '32', 'page_number': 32, 'president_included': True, 'source': '../../how_to/state_of_the_union.txt', 'last_date_read': {'_date': '2024-05-01T14:30:00'}}\n", - "--------------------------------------------------\n", - "\n", - "UPDATED ENTRY (id=32):\n", - "\n", - "content:\n", - "\tTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", - "\n", - "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", - "\n", - "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", - "\n", - "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", - "\n", - "id:\n", - "\t32\n", - "\n", - "last_date_read:\n", - "\t2024-05-01T14:30:00+00:00\n", - "\n", - "page_number:\n", - "\t32\n", - "\n", - "president_included:\n", - "\tTrue\n", - "\n", - "source:\n", - "\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n" - ] - } - ], - "source": [ - "from datetime import datetime\n", - "\n", - "doc = db_FaissFlat.similarity_search(query)[0]\n", - "print(f\"Original metadata: \\n\\t{doc.metadata}\")\n", - "\n", - "# Update the metadata for a document by adding last datetime document read\n", - "datetime_str = datetime(2024, 5, 1, 14, 30, 0).isoformat()\n", - "doc.metadata[\"last_date_read\"] = {\"_date\": datetime_str}\n", - "print(f\"new metadata: \\n\\t{doc.metadata}\")\n", - "print(f\"{DELIMITER}\\n\")\n", - "\n", - "# Update document in VDMS\n", - "id_to_update = doc.metadata[\"id\"]\n", - "db_FaissFlat.update_document(collection_name, id_to_update, doc)\n", - "response, response_array = db_FaissFlat.get(\n", - " collection_name,\n", - " constraints={\n", - " \"id\": [\"==\", id_to_update],\n", - " \"last_date_read\": [\">=\", {\"_date\": \"2024-05-01T00:00:00\"}],\n", - " },\n", + "updated_document_2 = Document(\n", + " page_content=\"The weather forecast for tomorrow is sunny and warm, with a high of 82 degrees.\",\n", + " metadata={\"source\": \"news\"},\n", + " id=2,\n", ")\n", "\n", - "# Display Results\n", - "print(f\"UPDATED ENTRY (id={id_to_update}):\")\n", - "print_response([response[0][\"FindDescriptor\"][\"entities\"][0]])" - ] - }, - { - "cell_type": "markdown", - "id": "872a7dff", - "metadata": {}, - "source": [ - "Next we will delete the last document by ID (id=42)." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "95537fe8", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Documents before deletion: 42\n", - "Documents after deletion (id=42): 41\n" - ] - } - ], - "source": [ - "print(\"Documents before deletion: \", db_FaissFlat.count(collection_name))\n", - "\n", - "id_to_remove = ids[-1]\n", - "db_FaissFlat.delete(collection_name=collection_name, ids=[id_to_remove])\n", - "print(\n", - " f\"Documents after deletion (id={id_to_remove}): {db_FaissFlat.count(collection_name)}\"\n", + "vector_store.update_documents(\n", + " ids=doc_ids[:2],\n", + " documents=[updated_document_1, updated_document_2],\n", + " batch_size=2,\n", ")" ] }, { "cell_type": "markdown", - "id": "18152965", + "id": "dcf1b905", "metadata": {}, "source": [ - "## Other Information\n", - "VDMS supports various types of visual data and operations. Some of the capabilities are integrated in the LangChain interface but additional workflow improvements will be added as VDMS is under continuous development.\n", + "### Delete items from vector store\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ef61e188", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vector_store.delete(ids=doc_ids[-1])" + ] + }, + { + "cell_type": "markdown", + "id": "c3620501", + "metadata": {}, + "source": [ + "## Query vector store\n", "\n", - "Addtional capabilities integrated into LangChain are below.\n", + "Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n", "\n", - "### Similarity search by vector\n", - "Instead of searching by string query, you can also search by embedding/vector." + "### Query directly\n", + "\n", + "Performing a simple similarity search can be done as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "aa0a16fa", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain_vdms.vectorstores:VDMS similarity search took 0.0063 seconds\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* ID=3: Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n", + "* ID=8: LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n" + ] + } + ], + "source": [ + "results = vector_store.similarity_search(\n", + " \"LangChain provides abstractions to make working with LLMs easy\",\n", + " k=2,\n", + " filter={\"source\": [\"==\", \"tweet\"]},\n", + ")\n", + "for doc in results:\n", + " print(f\"* ID={doc.id}: {doc.page_content} [{doc.metadata}]\")" + ] + }, + { + "cell_type": "markdown", + "id": "3ed9d733", + "metadata": {}, + "source": [ + "If you want to execute a similarity search and receive the corresponding scores you can run:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "5efd2eaa", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain_vdms.vectorstores:VDMS similarity search took 0.0460 seconds\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* [SIM=0.753577] The weather forecast for tomorrow is sunny and warm, with a high of 82 degrees. [{'source': 'news'}]\n" + ] + } + ], + "source": [ + "results = vector_store.similarity_search_with_score(\n", + " \"Will it be hot tomorrow?\", k=1, filter={\"source\": [\"==\", \"news\"]}\n", + ")\n", + "for doc, score in results:\n", + " print(f\"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]\")" + ] + }, + { + "cell_type": "markdown", + "id": "496501e8", + "metadata": {}, + "source": [ + "If you want to execute a similarity search using an embedding you can run:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "dfa010e5", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain_vdms.vectorstores:VDMS similarity search took 0.0044 seconds\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* The weather forecast for tomorrow is sunny and warm, with a high of 82 degrees. [{'source': 'news'}]\n" + ] + } + ], + "source": [ + "results = vector_store.similarity_search_by_vector(\n", + " embedding=embeddings.embed_query(\"I love green eggs and ham!\"), k=1\n", + ")\n", + "for doc in results:\n", + " print(f\"* {doc.page_content} [{doc.metadata}]\")" + ] + }, + { + "cell_type": "markdown", + "id": "0c235cdc", + "metadata": {}, + "source": [ + "### Query by turning into retriever\n", + "\n", + "You can also transform the vector store into a retriever for easier usage in your chains. " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "bf66cf31", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain_vdms.vectorstores:VDMS similarity search took 0.0042 seconds\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Robbers broke into the city bank and stole $1 million in cash. [{'source': 'news'}]\n", + "* The stock market is down 500 points today due to fears of a recession. [{'source': 'news'}]\n", + "* Is the new iPhone worth the price? Read this review to find out. [{'source': 'website'}]\n" + ] + } + ], + "source": [ + "retriever = vector_store.as_retriever(\n", + " search_type=\"similarity\",\n", + " search_kwargs={\"k\": 3},\n", + ")\n", + "results = retriever.invoke(\"Stealing from the bank is a crime\")\n", + "for doc in results:\n", + " print(f\"* {doc.page_content} [{doc.metadata}]\")" ] }, { "cell_type": "code", "execution_count": 13, - "id": "1db4d6ed", + "id": "f3460093", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain_vdms.vectorstores:VDMS similarity search took 0.0042 seconds\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "Content:\n", - "\tTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", - "\n", - "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", - "\n", - "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", - "\n", - "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", - "\n", - "Metadata:\n", - "\tid:\t32\n", - "\tlast_date_read:\t2024-05-01T14:30:00+00:00\n", - "\tpage_number:\t32\n", - "\tpresident_included:\tTrue\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n" + "* Robbers broke into the city bank and stole $1 million in cash. [{'source': 'news'}]\n" ] } ], "source": [ - "embedding_vector = embedding.embed_query(query)\n", - "returned_docs = db_FaissFlat.similarity_search_by_vector(embedding_vector)\n", - "\n", - "# Print Results\n", - "print_document_details(returned_docs[0])" - ] - }, - { - "cell_type": "markdown", - "id": "daf718b2", - "metadata": {}, - "source": [ - "### Filtering on metadata\n", - "\n", - "It can be helpful to narrow down the collection before working with it.\n", - "\n", - "For example, collections can be filtered on metadata using the get method. A dictionary is used to filter metadata. Here we retrieve the document where `id = 2` and remove it from the vector store." + "retriever = vector_store.as_retriever(\n", + " search_type=\"similarity_score_threshold\",\n", + " search_kwargs={\n", + " \"k\": 1,\n", + " \"score_threshold\": 0.0, # >= score_threshold\n", + " },\n", + ")\n", + "results = retriever.invoke(\"Stealing from the bank is a crime\")\n", + "for doc in results:\n", + " print(f\"* {doc.page_content} [{doc.metadata}]\")" ] }, { "cell_type": "code", "execution_count": 14, - "id": "2bc0313b", + "id": "6e971ae8", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain_vdms.vectorstores:VDMS mmr search took 0.0042 secs\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "Deleted entry:\n", - "\n", - "blob:\n", - "\tTrue\n", - "\n", - "content:\n", - "\tGroups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \n", - "\n", - "In this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \n", - "\n", - "Let each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. \n", - "\n", - "Please rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. \n", - "\n", - "Throughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos. \n", - "\n", - "They keep moving. \n", - "\n", - "And the costs and the threats to America and the world keep rising. \n", - "\n", - "That’s why the NATO Alliance was created to secure peace and stability in Europe after World War 2. \n", - "\n", - "The United States is a member along with 29 other nations. \n", - "\n", - "It matters. American diplomacy matters. American resolve matters.\n", - "\n", - "id:\n", - "\t2\n", - "\n", - "page_number:\n", - "\t2\n", - "\n", - "president_included:\n", - "\tTrue\n", - "\n", - "source:\n", - "\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n" + "* Robbers broke into the city bank and stole $1 million in cash. [{'source': 'news'}]\n" ] } ], "source": [ - "response, response_array = db_FaissFlat.get(\n", - " collection_name,\n", - " limit=1,\n", - " include=[\"metadata\", \"embeddings\"],\n", - " constraints={\"id\": [\"==\", \"2\"]},\n", + "retriever = vector_store.as_retriever(\n", + " search_type=\"mmr\",\n", + " search_kwargs={\"k\": 1, \"fetch_k\": 10},\n", ")\n", - "\n", - "# Delete id=2\n", - "db_FaissFlat.delete(collection_name=collection_name, ids=[\"2\"])\n", - "\n", - "print(\"Deleted entry:\")\n", - "print_response([response[0][\"FindDescriptor\"][\"entities\"][0]])" + "results = retriever.invoke(\n", + " \"Stealing from the bank is a crime\", filter={\"source\": [\"==\", \"news\"]}\n", + ")\n", + "for doc in results:\n", + " print(f\"* {doc.page_content} [{doc.metadata}]\")" ] }, { "cell_type": "markdown", - "id": "794a7552", - "metadata": {}, - "source": [ - "### Retriever options\n", - "\n", - "This section goes over different options for how to use VDMS as a retriever.\n", - "\n", - "\n", - "#### Simiarity Search\n", - "\n", - "Here we use similarity search in the retriever object.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "120f55eb", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Content:\n", - "\tTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", - "\n", - "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", - "\n", - "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", - "\n", - "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", - "\n", - "Metadata:\n", - "\tid:\t32\n", - "\tlast_date_read:\t2024-05-01T14:30:00+00:00\n", - "\tpage_number:\t32\n", - "\tpresident_included:\tTrue\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n" - ] - } - ], - "source": [ - "retriever = db_FaissFlat.as_retriever()\n", - "relevant_docs = retriever.invoke(query)[0]\n", - "\n", - "print_document_details(relevant_docs)" - ] - }, - { - "cell_type": "markdown", - "id": "e8c0fb24", - "metadata": {}, - "source": [ - "#### Maximal Marginal Relevance Search (MMR)\n", - "\n", - "In addition to using similarity search in the retriever object, you can also use `mmr`." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "f00be6d0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Content:\n", - "\tTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", - "\n", - "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", - "\n", - "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", - "\n", - "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", - "\n", - "Metadata:\n", - "\tid:\t32\n", - "\tlast_date_read:\t2024-05-01T14:30:00+00:00\n", - "\tpage_number:\t32\n", - "\tpresident_included:\tTrue\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n" - ] - } - ], - "source": [ - "retriever = db_FaissFlat.as_retriever(search_type=\"mmr\")\n", - "relevant_docs = retriever.invoke(query)[0]\n", - "\n", - "print_document_details(relevant_docs)" - ] - }, - { - "cell_type": "markdown", - "id": "ffadbafc", - "metadata": {}, - "source": [ - "We can also use MMR directly." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "ab911470", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--------------------------------------------------\n", - "\n", - "Score:\t1.2032091618\n", - "\n", - "Content:\n", - "\tTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", - "\n", - "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", - "\n", - "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", - "\n", - "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", - "\n", - "Metadata:\n", - "\tid:\t32\n", - "\tlast_date_read:\t2024-05-01T14:30:00+00:00\n", - "\tpage_number:\t32\n", - "\tpresident_included:\tTrue\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n", - "Score:\t1.50705266\n", - "\n", - "Content:\n", - "\tBut cancer from prolonged exposure to burn pits ravaged Heath’s lungs and body. \n", - "\n", - "Danielle says Heath was a fighter to the very end. \n", - "\n", - "He didn’t know how to stop fighting, and neither did she. \n", - "\n", - "Through her pain she found purpose to demand we do better. \n", - "\n", - "Tonight, Danielle—we are. \n", - "\n", - "The VA is pioneering new ways of linking toxic exposures to diseases, already helping more veterans get benefits. \n", - "\n", - "And tonight, I’m announcing we’re expanding eligibility to veterans suffering from nine respiratory cancers. \n", - "\n", - "I’m also calling on Congress: pass a law to make sure veterans devastated by toxic exposures in Iraq and Afghanistan finally get the benefits and comprehensive health care they deserve. \n", - "\n", - "And fourth, let’s end cancer as we know it. \n", - "\n", - "This is personal to me and Jill, to Kamala, and to so many of you. \n", - "\n", - "Cancer is the #2 cause of death in America–second only to heart disease.\n", - "\n", - "Metadata:\n", - "\tid:\t39\n", - "\tpage_number:\t39\n", - "\tpresident_included:\tFalse\n", - "\tsource:\t../../how_to/state_of_the_union.txt\n", - "--------------------------------------------------\n", - "\n" - ] - } - ], - "source": [ - "mmr_resp = db_FaissFlat.max_marginal_relevance_search_with_score(query, k=2, fetch_k=10)\n", - "print_results(mmr_resp)" - ] - }, - { - "cell_type": "markdown", - "id": "190bc4b5", + "id": "5f790d1b", "metadata": {}, "source": [ "### Delete collection\n", @@ -1159,30 +593,323 @@ }, { "cell_type": "code", - "execution_count": 18, - "id": "874e7af9", + "execution_count": 15, + "id": "4bfac767", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Documents before deletion: 40\n", + "Documents before deletion: 10\n", "Documents after deletion: 0\n" ] } ], "source": [ - "print(\"Documents before deletion: \", db_FaissFlat.count(collection_name))\n", + "print(\"Documents before deletion: \", vector_store.count())\n", "\n", - "db_FaissFlat.delete(collection_name=collection_name)\n", + "vector_store.delete(collection_name=collection_name)\n", "\n", - "print(\"Documents after deletion: \", db_FaissFlat.count(collection_name))" + "print(\"Documents after deletion: \", vector_store.count())" ] }, { "cell_type": "markdown", - "id": "68b7a400", + "id": "901c75dc", + "metadata": {}, + "source": [ + "## Usage for retrieval-augmented generation\n", + "\n", + "For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n", + "\n", + "- [Multi-modal RAG using VDMS](https://github.com/langchain-ai/langchain/blob/master/cookbook/multi_modal_RAG_vdms.ipynb)\n", + "- [Visual RAG using VDMS](https://github.com/langchain-ai/langchain/blob/master/cookbook/visual_RAG_vdms.ipynb)\n", + "- [Tutorials](/docs/tutorials/)\n", + "- [How-to: Question and answer with RAG](https://python.langchain.com/docs/how_to/#qa-with-rag)\n", + "- [Retrieval conceptual docs](https://python.langchain.com/docs/concepts/#retrieval)" + ] + }, + { + "cell_type": "markdown", + "id": "069f1b5f", + "metadata": {}, + "source": [ + "## Similarity Search using other engines\n", + "\n", + "VDMS supports various libraries for indexing and computing distances: FaissFlat (Default), FaissHNSWFlat, FaissIVFFlat, Flinng, TileDBDense, and TileDBSparse.\n", + "By default, the vectorstore uses FaissFlat. Below we show a few examples using the other engines." + ] + }, + { + "cell_type": "markdown", + "id": "68ab4d5d", + "metadata": {}, + "source": [ + "### Similarity Search using Faiss HNSWFlat and Euclidean Distance\n", + "\n", + "Here, we add the documents to VDMS using Faiss IndexHNSWFlat indexing and L2 as the distance metric for similarity search. We search for three documents (`k=3`) related to a query and also return the score along with the document." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "75af55fa", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain_vdms.vectorstores:Descriptor set my_collection_FaissHNSWFlat_L2 created\n", + "INFO:langchain_vdms.vectorstores:VDMS similarity search took 0.1272 seconds\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* [SIM=0.716791] Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n", + "* [SIM=0.936718] LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n", + "* [SIM=1.834110] Is the new iPhone worth the price? Read this review to find out. [{'source': 'website'}]\n" + ] + } + ], + "source": [ + "db_FaissHNSWFlat = VDMS.from_documents(\n", + " documents,\n", + " client=vdms_client,\n", + " ids=doc_ids,\n", + " collection_name=\"my_collection_FaissHNSWFlat_L2\",\n", + " embedding=embeddings,\n", + " engine=\"FaissHNSWFlat\",\n", + " distance_strategy=\"L2\",\n", + ")\n", + "# Query\n", + "k = 3\n", + "query = \"LangChain provides abstractions to make working with LLMs easy\"\n", + "docs_with_score = db_FaissHNSWFlat.similarity_search_with_score(query, k=k, filter=None)\n", + "\n", + "for res, score in docs_with_score:\n", + " print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")" + ] + }, + { + "cell_type": "markdown", + "id": "37f62525", + "metadata": {}, + "source": [ + "### Similarity Search using Faiss IVFFlat and Inner Product (IP) Distance\n", + "\n", + "We add the documents to VDMS using Faiss IndexIVFFlat indexing and IP as the distance metric for similarity search. We search for three documents (`k=3`) related to a query and also return the score along with the document." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "b6c07d2f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain_vdms.vectorstores:Descriptor set my_collection_FaissIVFFlat_IP created\n", + "INFO:langchain_vdms.vectorstores:VDMS similarity search took 0.0052 seconds\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* [SIM=0.641605] Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n", + "* [SIM=0.531641] LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n", + "* [SIM=0.082945] Is the new iPhone worth the price? Read this review to find out. [{'source': 'website'}]\n" + ] + } + ], + "source": [ + "db_FaissIVFFlat = VDMS.from_documents(\n", + " documents,\n", + " client=vdms_client,\n", + " ids=doc_ids,\n", + " collection_name=\"my_collection_FaissIVFFlat_IP\",\n", + " embedding=embeddings,\n", + " engine=\"FaissIVFFlat\",\n", + " distance_strategy=\"IP\",\n", + ")\n", + "\n", + "k = 3\n", + "query = \"LangChain provides abstractions to make working with LLMs easy\"\n", + "docs_with_score = db_FaissIVFFlat.similarity_search_with_score(query, k=k, filter=None)\n", + "for res, score in docs_with_score:\n", + " print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")" + ] + }, + { + "cell_type": "markdown", + "id": "f8efce5d", + "metadata": {}, + "source": [ + "### Similarity Search using FLINNG and IP Distance\n", + "\n", + "In this section, we add the documents to VDMS using Filters to Identify Near-Neighbor Groups (FLINNG) indexing and IP as the distance metric for similarity search. We search for three documents (`k=3`) related to a query and also return the score along with the document." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "69154f31", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain_vdms.vectorstores:Descriptor set my_collection_Flinng_IP created\n", + "INFO:langchain_vdms.vectorstores:VDMS similarity search took 0.0042 seconds\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* [SIM=0.000000] I had chocolate chip pancakes and scrambled eggs for breakfast this morning. [{'source': 'tweet'}]\n", + "* [SIM=0.000000] I had chocolate chip pancakes and scrambled eggs for breakfast this morning. [{'source': 'tweet'}]\n", + "* [SIM=0.000000] I had chocolate chip pancakes and scrambled eggs for breakfast this morning. [{'source': 'tweet'}]\n" + ] + } + ], + "source": [ + "db_Flinng = VDMS.from_documents(\n", + " documents,\n", + " client=vdms_client,\n", + " ids=doc_ids,\n", + " collection_name=\"my_collection_Flinng_IP\",\n", + " embedding=embeddings,\n", + " engine=\"Flinng\",\n", + " distance_strategy=\"IP\",\n", + ")\n", + "# Query\n", + "k = 3\n", + "query = \"LangChain provides abstractions to make working with LLMs easy\"\n", + "docs_with_score = db_Flinng.similarity_search_with_score(query, k=k, filter=None)\n", + "for res, score in docs_with_score:\n", + " print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")" + ] + }, + { + "cell_type": "markdown", + "id": "375a9eef", + "metadata": {}, + "source": [ + "## Filtering on metadata\n", + "\n", + "It can be helpful to narrow down the collection before working with it.\n", + "\n", + "For example, collections can be filtered on metadata using the `get_by_constraints` method. A dictionary is used to filter metadata. Here we retrieve the document where `langchain_id = \"2\"` and remove it from the vector store. \n", + "\n", + "***NOTE:*** `id` was generated as additional metadata as an integer while `langchain_id` (the internal ID) is an unique string for each entry. " + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "fea51565", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Deleted entry:\n", + "* ID=2: The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]\n" + ] + } + ], + "source": [ + "response, response_array = db_FaissIVFFlat.get_by_constraints(\n", + " db_FaissIVFFlat.collection_name,\n", + " limit=1,\n", + " include=[\"metadata\", \"embeddings\"],\n", + " constraints={\"langchain_id\": [\"==\", \"2\"]},\n", + ")\n", + "\n", + "# Delete id=2\n", + "db_FaissIVFFlat.delete(collection_name=db_FaissIVFFlat.collection_name, ids=[\"2\"])\n", + "\n", + "print(\"Deleted entry:\")\n", + "for doc in response:\n", + " print(f\"* ID={doc.id}: {doc.page_content} [{doc.metadata}]\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "af7bffc1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* ID=10: I have a bad feeling I am going to get deleted :( [{'source': 'tweet'}]\n", + "* ID=9: The stock market is down 500 points today due to fears of a recession. [{'source': 'news'}]\n", + "* ID=8: LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n", + "* ID=7: The top 10 soccer players in the world right now. [{'source': 'website'}]\n", + "* ID=6: Is the new iPhone worth the price? Read this review to find out. [{'source': 'website'}]\n", + "* ID=5: Wow! That was an amazing movie. I can't wait to see it again. [{'source': 'tweet'}]\n", + "* ID=4: Robbers broke into the city bank and stole $1 million in cash. [{'source': 'news'}]\n", + "* ID=3: Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n", + "* ID=1: I had chocolate chip pancakes and scrambled eggs for breakfast this morning. [{'source': 'tweet'}]\n" + ] + } + ], + "source": [ + "response, response_array = db_FaissIVFFlat.get_by_constraints(\n", + " db_FaissIVFFlat.collection_name,\n", + " include=[\"metadata\"],\n", + ")\n", + "for doc in response:\n", + " print(f\"* ID={doc.id}: {doc.page_content} [{doc.metadata}]\")" + ] + }, + { + "cell_type": "markdown", + "id": "3c3edde4", + "metadata": {}, + "source": [ + "Here we use `id` to filter for a range of IDs since it is an integer." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "6cacfcc6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* ID=9: The stock market is down 500 points today due to fears of a recession. [{'source': 'news'}]\n", + "* ID=4: Robbers broke into the city bank and stole $1 million in cash. [{'source': 'news'}]\n" + ] + } + ], + "source": [ + "response, response_array = db_FaissIVFFlat.get_by_constraints(\n", + " db_FaissIVFFlat.collection_name,\n", + " include=[\"metadata\", \"embeddings\"],\n", + " constraints={\"source\": [\"==\", \"news\"]},\n", + ")\n", + "for doc in response:\n", + " print(f\"* ID={doc.id}: {doc.page_content} [{doc.metadata}]\")" + ] + }, + { + "cell_type": "markdown", + "id": "f42c279e", "metadata": {}, "source": [ "## Stop VDMS Server" @@ -1190,20 +917,10 @@ }, { "cell_type": "code", - "execution_count": 19, - "id": "08931796", + "execution_count": 22, + "id": "a838c50b", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" - ] - }, { "name": "stdout", "output_type": "stream", @@ -1217,17 +934,25 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "a60725a6", + "cell_type": "markdown", + "id": "8a27244f", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "TODO: add API reference" + ] + }, + { + "cell_type": "markdown", + "id": "af4a2189", "metadata": {}, - "outputs": [], "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": ".langchain-venv", "language": "python", "name": "python3" }, @@ -1241,7 +966,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.10" } }, "nbformat": 4, diff --git a/libs/community/langchain_community/vectorstores/vdms.py b/libs/community/langchain_community/vectorstores/vdms.py index b77c7ff2979..9c010c7c364 100644 --- a/libs/community/langchain_community/vectorstores/vdms.py +++ b/libs/community/langchain_community/vectorstores/vdms.py @@ -22,6 +22,7 @@ from typing import ( ) import numpy as np +from langchain_core._api.deprecation import deprecated from langchain_core.documents import Document from langchain_core.embeddings import Embeddings from langchain_core.vectorstores import VectorStore @@ -135,6 +136,7 @@ def VDMS_Client(host: str = "localhost", port: int = 55555) -> vdms.vdms: return client +@deprecated(since="0.3.18", removal="1.0.0", alternative_import="langchain_vdms.VDMS") class VDMS(VectorStore): """Intel Lab's VDMS for vector-store workloads. diff --git a/libs/packages.yml b/libs/packages.yml index 4181c928d15..cc4e0f6a770 100644 --- a/libs/packages.yml +++ b/libs/packages.yml @@ -445,6 +445,10 @@ packages: repo: Shikenso-Analytics/langchain-discord downloads: 1 downloads_updated_at: '2025-02-15T16:00:00.000000+00:00' +- name: langchain-vdms + repo: IntelLabs/langchain-vdms + path: . + name_title: VDMS - name: langchain-deeplake path: . repo: activeloopai/langchain-deeplake