From 1a1a1a883f0f76b6b9535eec87712735e6c77fae Mon Sep 17 00:00:00 2001 From: Andrew Zhou <44193474+adrwz@users.noreply.github.com> Date: Fri, 10 Nov 2023 17:53:57 -0500 Subject: [PATCH] fleet_context docs update (#13221) - **Description:** Changed the fleet_context documentation to use `context.download_embeddings()` from the latest release from our package. More details here: https://github.com/fleet-ai/context/tree/main#api - **Issue:** n/a - **Dependencies:** n/a - **Tag maintainer:** @baskaryan - **Twitter handle:** @andrewthezhou --- .../integrations/retrievers/fleet_context.ipynb | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/docs/docs/integrations/retrievers/fleet_context.ipynb b/docs/docs/integrations/retrievers/fleet_context.ipynb index 10f4145b7c1..4a57d3f7953 100644 --- a/docs/docs/integrations/retrievers/fleet_context.ipynb +++ b/docs/docs/integrations/retrievers/fleet_context.ipynb @@ -19,7 +19,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install langchain openai pandas faiss-cpu # faiss-gpu for CUDA supported GPU" + "!pip install langchain fleet-context openai pandas faiss-cpu # faiss-gpu for CUDA supported GPU" ] }, { @@ -43,13 +43,12 @@ "\n", "\n", "def load_fleet_retriever(\n", - " url: str,\n", + " df: pd.DataFrame,\n", " *,\n", " vectorstore_cls: Type[VectorStore] = FAISS,\n", " docstore: Optional[BaseStore] = None,\n", " **kwargs: Any,\n", "):\n", - " df = pd.read_parquet(url)\n", " vectorstore = _populate_vectorstore(df, vectorstore_cls)\n", " if docstore is None:\n", " return vectorstore.as_retriever(**kwargs)\n", @@ -106,7 +105,10 @@ "source": [ "## Retriever chunks\n", "\n", - "As part of their embedding process, the Fleet AI team first chunked long documents before embedding them. This means the vectors correspond to sections of pages in the LangChain docs, not entire pages. By default, when we spin up a retriever from these embeddings, we'll be retrieving these embedded chunks:" + "As part of their embedding process, the Fleet AI team first chunked long documents before embedding them. This means the vectors correspond to sections of pages in the LangChain docs, not entire pages. By default, when we spin up a retriever from these embeddings, we'll be retrieving these embedded chunks.", + "\n", + "\n", + "We will be using Fleet Context's `download_embeddings()` to grab Langchain's documentation embeddings. You can view all supported libraries' documentation at https://fleet.so/context." ] }, { @@ -116,9 +118,10 @@ "metadata": {}, "outputs": [], "source": [ - "vecstore_retriever = load_fleet_retriever(\n", - " \"https://www.dropbox.com/scl/fi/4rescpkrg9970s3huz47l/libraries_langchain_release.parquet?rlkey=283knw4wamezfwiidgpgptkep&dl=1\",\n", - ")" + "from context import download_embeddings\n", + "\n", + "df = download_embeddings(\"langchain\")\n", + "vecstore_retriever = load_fleet_retriever(df)" ] }, {