FEATURE: Add retriever for Outline (#13889)

- **Description:** Added a retriever for the Outline API to ask
questions on knowledge base
  - **Issue:** resolves #11814
  - **Dependencies:** None
  - **Tag maintainer:** @baskaryan
This commit is contained in:
Yusuf Khan 2023-11-26 21:56:12 -05:00 committed by GitHub
parent f2af82058f
commit 935f78c944
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 427 additions and 0 deletions

View File

@ -0,0 +1,182 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Outline\n",
"\n",
">[Outline](https://www.getoutline.com/) is an open-source collaborative knowledge base platform designed for team information sharing.\n",
"\n",
"This notebook shows how to retrieve documents from your Outline instance into the Document format that is used downstream."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setup"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You first need to [create an api key](https://www.getoutline.com/developers#section/Authentication) for your Outline instance. Then you need to set the following environment variables:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"OUTLINE_API_KEY\"] = \"xxx\"\n",
"os.environ[\"OUTLINE_INSTANCE_URL\"] = \"https://app.getoutline.com\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`OutlineRetriever` has these arguments:\n",
"- optional `top_k_results`: default=3. Use it to limit number of documents retrieved.\n",
"- optional `load_all_available_meta`: default=False. By default only the most important fields retrieved: `title`, `source` (the url of the document). If True, other fields also retrieved.\n",
"- optional `doc_content_chars_max` default=4000. Use it to limit the number of characters for each document retrieved.\n",
"\n",
"`get_relevant_documents()` has one argument, `query`: free text which used to find documents in your Outline instance."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Examples"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Running retriever"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from langchain.retrievers import OutlineRetriever"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"retriever = OutlineRetriever()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='This walkthrough demonstrates how to use an agent optimized for conversation. Other agents are often optimized for using tools to figure out the best response, which is not ideal in a conversational setting where you may want the agent to be able to chat with the user as well.\\n\\nIf we compare it to the standard ReAct agent, the main difference is the prompt. We want it to be much more conversational.\\n\\nfrom langchain.agents import AgentType, Tool, initialize_agent\\n\\nfrom langchain.llms import OpenAI\\n\\nfrom langchain.memory import ConversationBufferMemory\\n\\nfrom langchain.utilities import SerpAPIWrapper\\n\\nsearch = SerpAPIWrapper() tools = \\\\[ Tool( name=\"Current Search\", func=search.run, description=\"useful for when you need to answer questions about current events or the current state of the world\", ), \\\\]\\n\\n\\\\\\nllm = OpenAI(temperature=0)\\n\\nUsing LCEL\\n\\nWe will first show how to create this agent using LCEL\\n\\nfrom langchain import hub\\n\\nfrom langchain.agents.format_scratchpad import format_log_to_str\\n\\nfrom langchain.agents.output_parsers import ReActSingleInputOutputParser\\n\\nfrom langchain.tools.render import render_text_description\\n\\nprompt = hub.pull(\"hwchase17/react-chat\")\\n\\nprompt = prompt.partial( tools=render_text_description(tools), tool_names=\", \".join(\\\\[[t.name](http://t.name) for t in tools\\\\]), )\\n\\nllm_with_stop = llm.bind(stop=\\\\[\"\\\\nObservation\"\\\\])\\n\\nagent = ( { \"input\": lambda x: x\\\\[\"input\"\\\\], \"agent_scratchpad\": lambda x: format_log_to_str(x\\\\[\"intermediate_steps\"\\\\]), \"chat_history\": lambda x: x\\\\[\"chat_history\"\\\\], } | prompt | llm_with_stop | ReActSingleInputOutputParser() )\\n\\nfrom langchain.agents import AgentExecutor\\n\\nmemory = ConversationBufferMemory(memory_key=\"chat_history\") agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, memory=memory)\\n\\nagent_executor.invoke({\"input\": \"hi, i am bob\"})\\\\[\"output\"\\\\]\\n\\n```\\n> Entering new AgentExecutor chain...\\n\\nThought: Do I need to use a tool? No\\nFinal Answer: Hi Bob, nice to meet you! How can I help you today?\\n\\n> Finished chain.\\n```\\n\\n\\\\\\n\\'Hi Bob, nice to meet you! How can I help you today?\\'\\n\\nagent_executor.invoke({\"input\": \"whats my name?\"})\\\\[\"output\"\\\\]\\n\\n```\\n> Entering new AgentExecutor chain...\\n\\nThought: Do I need to use a tool? No\\nFinal Answer: Your name is Bob.\\n\\n> Finished chain.\\n```\\n\\n\\\\\\n\\'Your name is Bob.\\'\\n\\nagent_executor.invoke({\"input\": \"what are some movies showing 9/21/2023?\"})\\\\[\"output\"\\\\]\\n\\n```\\n> Entering new AgentExecutor chain...\\n\\nThought: Do I need to use a tool? Yes\\nAction: Current Search\\nAction Input: Movies showing 9/21/2023[\\'September 2023 Movies: The Creator • Dumb Money • Expend4bles • The Kill Room • The Inventor • The Equalizer 3 • PAW Patrol: The Mighty Movie, ...\\'] Do I need to use a tool? No\\nFinal Answer: According to current search, some movies showing on 9/21/2023 are The Creator, Dumb Money, Expend4bles, The Kill Room, The Inventor, The Equalizer 3, and PAW Patrol: The Mighty Movie.\\n\\n> Finished chain.\\n```\\n\\n\\\\\\n\\'According to current search, some movies showing on 9/21/2023 are The Creator, Dumb Money, Expend4bles, The Kill Room, The Inventor, The Equalizer 3, and PAW Patrol: The Mighty Movie.\\'\\n\\n\\\\\\nUse the off-the-shelf agent\\n\\nWe can also create this agent using the off-the-shelf agent class\\n\\nagent_executor = initialize_agent( tools, llm, agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION, verbose=True, memory=memory, )\\n\\nUse a chat model\\n\\nWe can also use a chat model here. The main difference here is in the prompts used.\\n\\nfrom langchain import hub\\n\\nfrom langchain.chat_models import ChatOpenAI\\n\\nprompt = hub.pull(\"hwchase17/react-chat-json\") chat_model = ChatOpenAI(temperature=0, model=\"gpt-4\")\\n\\nprompt = prompt.partial( tools=render_text_description(tools), tool_names=\", \".join(\\\\[[t.name](http://t.name) for t in tools\\\\]), )\\n\\nchat_model_with_stop = chat_model.bind(stop=\\\\[\"\\\\nObservation\"\\\\])\\n\\nfrom langchain.agents.format_scratchpad import format_log_to_messages\\n\\nfrom langchain.agents.output_parsers import JSONAgentOutputParser\\n\\n# We need some extra steering, or the c', metadata={'title': 'Conversational', 'source': 'https://d01.getoutline.com/doc/conversational-B5dBkUgQ4b'}),\n",
" Document(page_content='Quickstart\\n\\nIn this quickstart we\\'ll show you how to:\\n\\nGet setup with LangChain, LangSmith and LangServe\\n\\nUse the most basic and common components of LangChain: prompt templates, models, and output parsers\\n\\nUse LangChain Expression Language, the protocol that LangChain is built on and which facilitates component chaining\\n\\nBuild a simple application with LangChain\\n\\nTrace your application with LangSmith\\n\\nServe your application with LangServe\\n\\nThat\\'s a fair amount to cover! Let\\'s dive in.\\n\\nSetup\\n\\nInstallation\\n\\nTo install LangChain run:\\n\\nPip\\n\\nConda\\n\\npip install langchain\\n\\nFor more details, see our Installation guide.\\n\\nEnvironment\\n\\nUsing LangChain will usually require integrations with one or more model providers, data stores, APIs, etc. For this example, we\\'ll use OpenAI\\'s model APIs.\\n\\nFirst we\\'ll need to install their Python package:\\n\\npip install openai\\n\\nAccessing the API requires an API key, which you can get by creating an account and heading here. Once we have a key we\\'ll want to set it as an environment variable by running:\\n\\nexport OPENAI_API_KEY=\"...\"\\n\\nIf you\\'d prefer not to set an environment variable you can pass the key in directly via the openai_api_key named parameter when initiating the OpenAI LLM class:\\n\\nfrom langchain.chat_models import ChatOpenAI\\n\\nllm = ChatOpenAI(openai_api_key=\"...\")\\n\\nLangSmith\\n\\nMany of the applications you build with LangChain will contain multiple steps with multiple invocations of LLM calls. As these applications get more and more complex, it becomes crucial to be able to inspect what exactly is going on inside your chain or agent. The best way to do this is with LangSmith.\\n\\nNote that LangSmith is not needed, but it is helpful. If you do want to use LangSmith, after you sign up at the link above, make sure to set your environment variables to start logging traces:\\n\\nexport LANGCHAIN_TRACING_V2=\"true\" export LANGCHAIN_API_KEY=...\\n\\nLangServe\\n\\nLangServe helps developers deploy LangChain chains as a REST API. You do not need to use LangServe to use LangChain, but in this guide we\\'ll show how you can deploy your app with LangServe.\\n\\nInstall with:\\n\\npip install \"langserve\\\\[all\\\\]\"\\n\\nBuilding with LangChain\\n\\nLangChain provides many modules that can be used to build language model applications. Modules can be used as standalones in simple applications and they can be composed for more complex use cases. Composition is powered by LangChain Expression Language (LCEL), which defines a unified Runnable interface that many modules implement, making it possible to seamlessly chain components.\\n\\nThe simplest and most common chain contains three things:\\n\\nLLM/Chat Model: The language model is the core reasoning engine here. In order to work with LangChain, you need to understand the different types of language models and how to work with them. Prompt Template: This provides instructions to the language model. This controls what the language model outputs, so understanding how to construct prompts and different prompting strategies is crucial. Output Parser: These translate the raw response from the language model to a more workable format, making it easy to use the output downstream. In this guide we\\'ll cover those three components individually, and then go over how to combine them. Understanding these concepts will set you up well for being able to use and customize LangChain applications. Most LangChain applications allow you to configure the model and/or the prompt, so knowing how to take advantage of this will be a big enabler.\\n\\nLLM / Chat Model\\n\\nThere are two types of language models:\\n\\nLLM: underlying model takes a string as input and returns a string\\n\\nChatModel: underlying model takes a list of messages as input and returns a message\\n\\nStrings are simple, but what exactly are messages? The base message interface is defined by BaseMessage, which has two required attributes:\\n\\ncontent: The content of the message. Usually a string. role: The entity from which the BaseMessage is coming. LangChain provides several ob', metadata={'title': 'Quick Start', 'source': 'https://d01.getoutline.com/doc/quick-start-jGuGGGOTuL'}),\n",
" Document(page_content='This walkthrough showcases using an agent to implement the [ReAct](https://react-lm.github.io/) logic.\\n\\n```javascript\\nfrom langchain.agents import AgentType, initialize_agent, load_tools\\nfrom langchain.llms import OpenAI\\n```\\n\\nFirst, let\\'s load the language model we\\'re going to use to control the agent.\\n\\n```javascript\\nllm = OpenAI(temperature=0)\\n```\\n\\nNext, let\\'s load some tools to use. Note that the llm-math tool uses an LLM, so we need to pass that in.\\n\\n```javascript\\ntools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\\n```\\n\\n## Using LCEL[\\u200b](https://python.langchain.com/docs/modules/agents/agent_types/react#using-lcel \"Direct link to Using LCEL\")\\n\\nWe will first show how to create the agent using LCEL\\n\\n```javascript\\nfrom langchain import hub\\nfrom langchain.agents.format_scratchpad import format_log_to_str\\nfrom langchain.agents.output_parsers import ReActSingleInputOutputParser\\nfrom langchain.tools.render import render_text_description\\n```\\n\\n```javascript\\nprompt = hub.pull(\"hwchase17/react\")\\nprompt = prompt.partial(\\n tools=render_text_description(tools),\\n tool_names=\", \".join([t.name for t in tools]),\\n)\\n```\\n\\n```javascript\\nllm_with_stop = llm.bind(stop=[\"\\\\nObservation\"])\\n```\\n\\n```javascript\\nagent = (\\n {\\n \"input\": lambda x: x[\"input\"],\\n \"agent_scratchpad\": lambda x: format_log_to_str(x[\"intermediate_steps\"]),\\n }\\n | prompt\\n | llm_with_stop\\n | ReActSingleInputOutputParser()\\n)\\n```\\n\\n```javascript\\nfrom langchain.agents import AgentExecutor\\n```\\n\\n```javascript\\nagent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\\n```\\n\\n```javascript\\nagent_executor.invoke(\\n {\\n \"input\": \"Who is Leo DiCaprio\\'s girlfriend? What is her current age raised to the 0.43 power?\"\\n }\\n)\\n```\\n\\n```javascript\\n \\n \\n > Entering new AgentExecutor chain...\\n I need to find out who Leo DiCaprio\\'s girlfriend is and then calculate her age raised to the 0.43 power.\\n Action: Search\\n Action Input: \"Leo DiCaprio girlfriend\"model Vittoria Ceretti I need to find out Vittoria Ceretti\\'s age\\n Action: Search\\n Action Input: \"Vittoria Ceretti age\"25 years I need to calculate 25 raised to the 0.43 power\\n Action: Calculator\\n Action Input: 25^0.43Answer: 3.991298452658078 I now know the final answer\\n Final Answer: Leo DiCaprio\\'s girlfriend is Vittoria Ceretti and her current age raised to the 0.43 power is 3.991298452658078.\\n \\n > Finished chain.\\n\\n\\n\\n\\n\\n {\\'input\\': \"Who is Leo DiCaprio\\'s girlfriend? What is her current age raised to the 0.43 power?\",\\n \\'output\\': \"Leo DiCaprio\\'s girlfriend is Vittoria Ceretti and her current age raised to the 0.43 power is 3.991298452658078.\"}\\n```\\n\\n## Using ZeroShotReactAgent[\\u200b](https://python.langchain.com/docs/modules/agents/agent_types/react#using-zeroshotreactagent \"Direct link to Using ZeroShotReactAgent\")\\n\\nWe will now show how to use the agent with an off-the-shelf agent implementation\\n\\n```javascript\\nagent_executor = initialize_agent(\\n tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\\n)\\n```\\n\\n```javascript\\nagent_executor.invoke(\\n {\\n \"input\": \"Who is Leo DiCaprio\\'s girlfriend? What is her current age raised to the 0.43 power?\"\\n }\\n)\\n```\\n\\n```javascript\\n \\n \\n > Entering new AgentExecutor chain...\\n I need to find out who Leo DiCaprio\\'s girlfriend is and then calculate her age raised to the 0.43 power.\\n Action: Search\\n Action Input: \"Leo DiCaprio girlfriend\"\\n Observation: model Vittoria Ceretti\\n Thought: I need to find out Vittoria Ceretti\\'s age\\n Action: Search\\n Action Input: \"Vittoria Ceretti age\"\\n Observation: 25 years\\n Thought: I need to calculate 25 raised to the 0.43 power\\n Action: Calculator\\n Action Input: 25^0.43\\n Observation: Answer: 3.991298452658078\\n Thought: I now know the final answer\\n Final Answer: Leo DiCaprio\\'s girlfriend is Vittoria Ceretti and her current age raised to the 0.43 power is 3.991298452658078.\\n \\n > Finished chain.\\n\\n\\n\\n\\n\\n {\\'input\\': \"Who is L', metadata={'title': 'ReAct', 'source': 'https://d01.getoutline.com/doc/react-d6rxRS1MHk'})]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retriever.get_relevant_documents(query=\"LangChain\", doc_content_chars_max=100)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Answering Questions on Outline Documents"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from getpass import getpass\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass(\"OpenAI API Key:\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains import ConversationalRetrievalChain\n",
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"model = ChatOpenAI(model_name=\"gpt-3.5-turbo\")\n",
"qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'question': 'what is langchain?',\n",
" 'chat_history': {},\n",
" 'answer': \"LangChain is a framework for developing applications powered by language models. It provides a set of libraries and tools that enable developers to build context-aware and reasoning-based applications. LangChain allows you to connect language models to various sources of context, such as prompt instructions, few-shot examples, and content, to enhance the model's responses. It also supports the composition of multiple language model components using LangChain Expression Language (LCEL). Additionally, LangChain offers off-the-shelf chains, templates, and integrations for easy application development. LangChain can be used in conjunction with LangSmith for debugging and monitoring chains, and with LangServe for deploying applications as a REST API.\"}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"qa({\"question\": \"what is langchain?\", \"chat_history\": {}})"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -50,6 +50,7 @@ from langchain.retrievers.metal import MetalRetriever
from langchain.retrievers.milvus import MilvusRetriever
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.retrievers.outline import OutlineRetriever
from langchain.retrievers.parent_document_retriever import ParentDocumentRetriever
from langchain.retrievers.pinecone_hybrid_search import PineconeHybridSearchRetriever
from langchain.retrievers.pubmed import PubMedRetriever
@ -92,6 +93,7 @@ __all__ = [
"MetalRetriever",
"MilvusRetriever",
"MultiQueryRetriever",
"OutlineRetriever",
"PineconeHybridSearchRetriever",
"PubMedRetriever",
"RemoteLangChainRetriever",

View File

@ -0,0 +1,20 @@
from typing import List
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.utilities.outline import OutlineAPIWrapper
class OutlineRetriever(BaseRetriever, OutlineAPIWrapper):
"""Retriever for Outline API.
It wraps run() to get_relevant_documents().
It uses all OutlineAPIWrapper arguments without any change.
"""
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
return self.run(query=query)

View File

@ -122,6 +122,12 @@ def _import_openweathermap() -> Any:
return OpenWeatherMapAPIWrapper
def _import_outline() -> Any:
from langchain.utilities.outline import OutlineAPIWrapper
return OutlineAPIWrapper
def _import_portkey() -> Any:
from langchain.utilities.portkey import Portkey
@ -251,6 +257,8 @@ def __getattr__(name: str) -> Any:
return _import_metaphor_search()
elif name == "OpenWeatherMapAPIWrapper":
return _import_openweathermap()
elif name == "OutlineAPIWrapper":
return _import_outline()
elif name == "Portkey":
return _import_portkey()
elif name == "PowerBIDataset":
@ -305,6 +313,7 @@ __all__ = [
"MaxComputeAPIWrapper",
"MetaphorSearchAPIWrapper",
"OpenWeatherMapAPIWrapper",
"OutlineAPIWrapper",
"Portkey",
"PowerBIDataset",
"PubMedAPIWrapper",

View File

@ -0,0 +1,96 @@
"""Util that calls Outline."""
import logging
from typing import Any, Dict, List, Optional
import requests
from langchain_core.documents import Document
from langchain_core.pydantic_v1 import BaseModel, root_validator
from langchain.utils import get_from_dict_or_env
logger = logging.getLogger(__name__)
OUTLINE_MAX_QUERY_LENGTH = 300
class OutlineAPIWrapper(BaseModel):
"""Wrapper around OutlineAPI.
This wrapper will use the Outline API to query the documents of your instance.
By default it will return the document content of the top-k results.
It limits the document content by doc_content_chars_max.
"""
top_k_results: int = 3
load_all_available_meta: bool = False
doc_content_chars_max: int = 4000
outline_instance_url: Optional[str] = None
outline_api_key: Optional[str] = None
outline_search_endpoint: str = "/api/documents.search"
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that instance url and api key exists in environment."""
outline_instance_url = get_from_dict_or_env(
values, "outline_instance_url", "OUTLINE_INSTANCE_URL"
)
values["outline_instance_url"] = outline_instance_url
outline_api_key = get_from_dict_or_env(
values, "outline_api_key", "OUTLINE_API_KEY"
)
values["outline_api_key"] = outline_api_key
return values
def _result_to_document(self, outline_res: Any) -> Document:
main_meta = {
"title": outline_res["document"]["title"],
"source": self.outline_instance_url + outline_res["document"]["url"],
}
add_meta = (
{
"id": outline_res["document"]["id"],
"ranking": outline_res["ranking"],
"collection_id": outline_res["document"]["collectionId"],
"parent_document_id": outline_res["document"]["parentDocumentId"],
"revision": outline_res["document"]["revision"],
"created_by": outline_res["document"]["createdBy"]["name"],
}
if self.load_all_available_meta
else {}
)
doc = Document(
page_content=outline_res["document"]["text"][: self.doc_content_chars_max],
metadata={
**main_meta,
**add_meta,
},
)
return doc
def _outline_api_query(self, query: str) -> List:
raw_result = requests.post(
f"{self.outline_instance_url}{self.outline_search_endpoint}",
data={"query": query, "limit": self.top_k_results},
headers={"Authorization": f"Bearer {self.outline_api_key}"},
)
if not raw_result.ok:
raise ValueError("Outline API returned an error: ", raw_result.text)
return raw_result.json()["data"]
def run(self, query: str) -> List[Document]:
"""
Run Outline search and get the document content plus the meta information.
Returns: a list of documents.
"""
results = self._outline_api_query(query[:OUTLINE_MAX_QUERY_LENGTH])
docs = []
for result in results[: self.top_k_results]:
if doc := self._result_to_document(result):
docs.append(doc)
return docs

View File

@ -0,0 +1,116 @@
"""Integration test for Outline API Wrapper."""
from typing import List
import pytest
import responses
from langchain_core.documents import Document
from langchain.utilities import OutlineAPIWrapper
OUTLINE_INSTANCE_TEST_URL = "https://app.getoutline.com"
OUTLINE_SUCCESS_RESPONSE = {
"data": [
{
"ranking": 0.3911583,
"context": "Testing Context",
"document": {
"id": "abb2bf15-a597-4255-8b19-b742e3d037bf",
"url": "/doc/quick-start-jGuGGGOTuL",
"title": "Test Title",
"text": "Testing Content",
"createdBy": {"name": "John Doe"},
"revision": 3,
"collectionId": "93f182a4-a591-4d47-83f0-752e7bb2065c",
"parentDocumentId": None,
},
},
],
"status": 200,
"ok": True,
}
OUTLINE_EMPTY_RESPONSE = {
"data": [],
"status": 200,
"ok": True,
}
OUTLINE_ERROR_RESPONSE = {
"ok": False,
"error": "authentication_required",
"status": 401,
"message": "Authentication error",
}
@pytest.fixture
def api_client() -> OutlineAPIWrapper:
return OutlineAPIWrapper(
outline_api_key="api_key", outline_instance_url=OUTLINE_INSTANCE_TEST_URL
)
def assert_docs(docs: List[Document], all_meta: bool = False) -> None:
for doc in docs:
assert doc.page_content
assert doc.metadata
main_meta = {"title", "source"}
assert set(doc.metadata).issuperset(main_meta)
if all_meta:
assert len(set(doc.metadata)) > len(main_meta)
else:
assert len(set(doc.metadata)) == len(main_meta)
@responses.activate
def test_run_success(api_client: OutlineAPIWrapper) -> None:
responses.add(
responses.POST,
api_client.outline_instance_url + api_client.outline_search_endpoint,
json=OUTLINE_SUCCESS_RESPONSE,
status=200,
)
docs = api_client.run("Testing")
assert_docs(docs, all_meta=False)
@responses.activate
def test_run_success_all_meta(api_client: OutlineAPIWrapper) -> None:
api_client.load_all_available_meta = True
responses.add(
responses.POST,
api_client.outline_instance_url + api_client.outline_search_endpoint,
json=OUTLINE_SUCCESS_RESPONSE,
status=200,
)
docs = api_client.run("Testing")
assert_docs(docs, all_meta=True)
@responses.activate
def test_run_no_result(api_client: OutlineAPIWrapper) -> None:
responses.add(
responses.POST,
api_client.outline_instance_url + api_client.outline_search_endpoint,
json=OUTLINE_EMPTY_RESPONSE,
status=200,
)
docs = api_client.run("No Result Test")
assert not docs
@responses.activate
def test_run_error(api_client: OutlineAPIWrapper) -> None:
responses.add(
responses.POST,
api_client.outline_instance_url + api_client.outline_search_endpoint,
json=OUTLINE_ERROR_RESPONSE,
status=401,
)
try:
api_client.run("Testing")
except Exception as e:
assert "Outline API returned an error:" in str(e)

View File

@ -23,6 +23,7 @@ EXPECTED_ALL = [
"MetalRetriever",
"MilvusRetriever",
"MultiQueryRetriever",
"OutlineRetriever",
"PineconeHybridSearchRetriever",
"PubMedRetriever",
"RemoteLangChainRetriever",

View File

@ -20,6 +20,7 @@ EXPECTED_ALL = [
"MaxComputeAPIWrapper",
"MetaphorSearchAPIWrapper",
"OpenWeatherMapAPIWrapper",
"OutlineAPIWrapper",
"Portkey",
"PowerBIDataset",
"PubMedAPIWrapper",