mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-03 03:38:06 +00:00
Harrison/databerry (#2688)
Co-authored-by: Georges Petrov <georgesm.petrov@gmail.com>
This commit is contained in:
parent
b286d0e63f
commit
ad3c5dd186
BIN
docs/_static/DataberryDashboard.png
vendored
Normal file
BIN
docs/_static/DataberryDashboard.png
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 157 KiB |
25
docs/ecosystem/databerry.md
Normal file
25
docs/ecosystem/databerry.md
Normal file
@ -0,0 +1,25 @@
|
||||
# Databerry
|
||||
|
||||
This page covers how to use the [Databerry](https://databerry.ai) within LangChain.
|
||||
|
||||
## What is Databerry?
|
||||
|
||||
Databerry is an [open source](https://github.com/gmpetrov/databerry) document retrievial platform that helps to connect your personal data with Large Language Models.
|
||||
|
||||

|
||||
|
||||
## Quick start
|
||||
|
||||
Retrieving documents stored in Databerry from LangChain is very easy!
|
||||
|
||||
```python
|
||||
from langchain.retrievers import DataberryRetriever
|
||||
|
||||
retriever = DataberryRetriever(
|
||||
datastore_url="https://api.databerry.ai/query/clg1xg2h80000l708dymr0fxc",
|
||||
# api_key="DATABERRY_API_KEY", # optional if datastore is public
|
||||
# top_k=10 # optional
|
||||
)
|
||||
|
||||
docs = retriever.get_relevant_documents("What's Databerry?")
|
||||
```
|
95
docs/modules/indexes/retrievers/examples/databerry.ipynb
Normal file
95
docs/modules/indexes/retrievers/examples/databerry.ipynb
Normal file
@ -0,0 +1,95 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "9fc6205b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Databerry\n",
|
||||
"\n",
|
||||
"This notebook shows how to use [Databerry's](https://www.databerry.ai/) retriever.\n",
|
||||
"\n",
|
||||
"First, you will need to sign up for Databerry, create a datastore, add some data and get your datastore api endpoint url"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "944e172b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query\n",
|
||||
"\n",
|
||||
"Now that our index is set up, we can set up a retriever and start querying it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "d0e6f506",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.retrievers import DataberryRetriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "f381f642",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = DataberryRetriever(\n",
|
||||
" datastore_url=\"https://clg1xg2h80000l708dymr0fxc.databerry.ai/query\",\n",
|
||||
" # api_key=\"DATABERRY_API_KEY\", # optional if datastore is public\n",
|
||||
" # top_k=10 # optional\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "20ae1a74",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='✨ Made with DaftpageOpen main menuPricingTemplatesLoginSearchHelpGetting StartedFeaturesAffiliate ProgramGetting StartedDaftpage is a new type of website builder that works like a doc.It makes website building easy, fun and offers tons of powerful features for free. Just type / in your page to get started!DaftpageCopyright © 2022 Daftpage, Inc.All rights reserved.ProductPricingTemplatesHelp & SupportHelp CenterGetting startedBlogCompanyAboutRoadmapTwitterAffiliate Program👾 Discord', metadata={'source': 'https:/daftpage.com/help/getting-started', 'score': 0.8697265}),\n",
|
||||
" Document(page_content=\"✨ Made with DaftpageOpen main menuPricingTemplatesLoginSearchHelpGetting StartedFeaturesAffiliate ProgramHelp CenterWelcome to Daftpage’s help center—the one-stop shop for learning everything about building websites with Daftpage.Daftpage is the simplest way to create websites for all purposes in seconds. Without knowing how to code, and for free!Get StartedDaftpage is a new type of website builder that works like a doc.It makes website building easy, fun and offers tons of powerful features for free. Just type / in your page to get started!Start here✨ Create your first site🧱 Add blocks🚀 PublishGuides🔖 Add a custom domainFeatures🔥 Drops🎨 Drawings👻 Ghost mode💀 Skeleton modeCant find the answer you're looking for?mail us at support@daftpage.comJoin the awesome Daftpage community on: 👾 DiscordDaftpageCopyright © 2022 Daftpage, Inc.All rights reserved.ProductPricingTemplatesHelp & SupportHelp CenterGetting startedBlogCompanyAboutRoadmapTwitterAffiliate Program👾 Discord\", metadata={'source': 'https:/daftpage.com/help', 'score': 0.86570895}),\n",
|
||||
" Document(page_content=\" is the simplest way to create websites for all purposes in seconds. Without knowing how to code, and for free!Get StartedDaftpage is a new type of website builder that works like a doc.It makes website building easy, fun and offers tons of powerful features for free. Just type / in your page to get started!Start here✨ Create your first site🧱 Add blocks🚀 PublishGuides🔖 Add a custom domainFeatures🔥 Drops🎨 Drawings👻 Ghost mode💀 Skeleton modeCant find the answer you're looking for?mail us at support@daftpage.comJoin the awesome Daftpage community on: 👾 DiscordDaftpageCopyright © 2022 Daftpage, Inc.All rights reserved.ProductPricingTemplatesHelp & SupportHelp CenterGetting startedBlogCompanyAboutRoadmapTwitterAffiliate Program👾 Discord\", metadata={'source': 'https:/daftpage.com/help', 'score': 0.8645384})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever.get_relevant_documents(\"What is Daftpage?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -1,4 +1,5 @@
|
||||
from langchain.retrievers.chatgpt_plugin_retriever import ChatGPTPluginRetriever
|
||||
from langchain.retrievers.databerry import DataberryRetriever
|
||||
from langchain.retrievers.elastic_search_bm25 import ElasticSearchBM25Retriever
|
||||
from langchain.retrievers.metal import MetalRetriever
|
||||
from langchain.retrievers.pinecone_hybrid_search import PineconeHybridSearchRetriever
|
||||
@ -14,4 +15,5 @@ __all__ = [
|
||||
"ElasticSearchBM25Retriever",
|
||||
"TFIDFRetriever",
|
||||
"WeaviateHybridSearchRetriever",
|
||||
"DataberryRetriever",
|
||||
]
|
||||
|
74
langchain/retrievers/databerry.py
Normal file
74
langchain/retrievers/databerry.py
Normal file
@ -0,0 +1,74 @@
|
||||
from typing import List, Optional
|
||||
|
||||
import aiohttp
|
||||
import requests
|
||||
|
||||
from langchain.schema import BaseRetriever, Document
|
||||
|
||||
|
||||
class DataberryRetriever(BaseRetriever):
|
||||
datastore_url: str
|
||||
top_k: Optional[int]
|
||||
api_key: Optional[str]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
datastore_url: str,
|
||||
top_k: Optional[int] = None,
|
||||
api_key: Optional[str] = None,
|
||||
):
|
||||
self.datastore_url = datastore_url
|
||||
self.api_key = api_key
|
||||
self.top_k = top_k
|
||||
|
||||
def get_relevant_documents(self, query: str) -> List[Document]:
|
||||
response = requests.post(
|
||||
self.datastore_url,
|
||||
json={
|
||||
"query": query,
|
||||
**({"topK": self.top_k} if self.top_k is not None else {}),
|
||||
},
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
**(
|
||||
{"Authorization": f"Bearer {self.api_key}"}
|
||||
if self.api_key is not None
|
||||
else {}
|
||||
),
|
||||
},
|
||||
)
|
||||
data = response.json()
|
||||
return [
|
||||
Document(
|
||||
page_content=r["text"],
|
||||
metadata={"source": r["source"], "score": r["score"]},
|
||||
)
|
||||
for r in data["results"]
|
||||
]
|
||||
|
||||
async def aget_relevant_documents(self, query: str) -> List[Document]:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.request(
|
||||
"POST",
|
||||
self.datastore_url,
|
||||
json={
|
||||
"query": query,
|
||||
**({"topK": self.top_k} if self.top_k is not None else {}),
|
||||
},
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
**(
|
||||
{"Authorization": f"Bearer {self.api_key}"}
|
||||
if self.api_key is not None
|
||||
else {}
|
||||
),
|
||||
},
|
||||
) as response:
|
||||
data = await response.json()
|
||||
return [
|
||||
Document(
|
||||
page_content=r["text"],
|
||||
metadata={"source": r["source"], "score": r["score"]},
|
||||
)
|
||||
for r in data["results"]
|
||||
]
|
@ -12,13 +12,20 @@ class RemoteLangChainRetriever(BaseRetriever, BaseModel):
|
||||
headers: Optional[dict] = None
|
||||
input_key: str = "message"
|
||||
response_key: str = "response"
|
||||
page_content_key: str = "page_content"
|
||||
metadata_key: str = "metadata"
|
||||
|
||||
def get_relevant_documents(self, query: str) -> List[Document]:
|
||||
response = requests.post(
|
||||
self.url, json={self.input_key: query}, headers=self.headers
|
||||
)
|
||||
result = response.json()
|
||||
return [Document(**r) for r in result[self.response_key]]
|
||||
return [
|
||||
Document(
|
||||
page_content=r[self.page_content_key], metadata=r[self.metadata_key]
|
||||
)
|
||||
for r in result[self.response_key]
|
||||
]
|
||||
|
||||
async def aget_relevant_documents(self, query: str) -> List[Document]:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
@ -26,4 +33,9 @@ class RemoteLangChainRetriever(BaseRetriever, BaseModel):
|
||||
"POST", self.url, headers=self.headers, json={self.input_key: query}
|
||||
) as response:
|
||||
result = await response.json()
|
||||
return [Document(**r) for r in result[self.response_key]]
|
||||
return [
|
||||
Document(
|
||||
page_content=r[self.page_content_key], metadata=r[self.metadata_key]
|
||||
)
|
||||
for r in result[self.response_key]
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user