Harrison/bookend ai (#14258)

Co-authored-by: stvhu-bookend <142813359+stvhu-bookend@users.noreply.github.com>
This commit is contained in:
Harrison Chase 2023-12-04 19:42:15 -08:00 committed by GitHub
parent 0d47d15a9f
commit 2213fc9711
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 210 additions and 0 deletions

View File

@ -0,0 +1,89 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "2c591a6a42ac7f0",
"metadata": {},
"source": [
"# Bookend AI\n",
"\n",
"Let's load the Bookend AI Embeddings class."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d94c62b4",
"metadata": {},
"outputs": [],
"source": [
"from langchain.embeddings import BookendEmbeddings"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "523a09e3",
"metadata": {},
"outputs": [],
"source": [
"embeddings = BookendEmbeddings(\n",
" domain=\"your_domain\",\n",
" api_token=\"your_api_token\",\n",
" model_id=\"your_embeddings_model_id\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b212bd5a",
"metadata": {},
"outputs": [],
"source": [
"text = \"This is a test document.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "57db66bd",
"metadata": {},
"outputs": [],
"source": [
"query_result = embeddings.embed_query(text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b790fd09",
"metadata": {},
"outputs": [],
"source": [
"doc_result = embeddings.embed_documents([text])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -22,6 +22,7 @@ from langchain.embeddings.awa import AwaEmbeddings
from langchain.embeddings.azure_openai import AzureOpenAIEmbeddings from langchain.embeddings.azure_openai import AzureOpenAIEmbeddings
from langchain.embeddings.baidu_qianfan_endpoint import QianfanEmbeddingsEndpoint from langchain.embeddings.baidu_qianfan_endpoint import QianfanEmbeddingsEndpoint
from langchain.embeddings.bedrock import BedrockEmbeddings from langchain.embeddings.bedrock import BedrockEmbeddings
from langchain.embeddings.bookend import BookendEmbeddings
from langchain.embeddings.cache import CacheBackedEmbeddings from langchain.embeddings.cache import CacheBackedEmbeddings
from langchain.embeddings.clarifai import ClarifaiEmbeddings from langchain.embeddings.clarifai import ClarifaiEmbeddings
from langchain.embeddings.cohere import CohereEmbeddings from langchain.embeddings.cohere import CohereEmbeddings
@ -127,6 +128,7 @@ __all__ = [
"QianfanEmbeddingsEndpoint", "QianfanEmbeddingsEndpoint",
"JohnSnowLabsEmbeddings", "JohnSnowLabsEmbeddings",
"VoyageEmbeddings", "VoyageEmbeddings",
"BookendEmbeddings",
] ]

View File

@ -0,0 +1,91 @@
"""Wrapper around Bookend AI embedding models."""
import json
from typing import Any, List
import requests
from langchain.pydantic_v1 import BaseModel, Field
from langchain.schema.embeddings import Embeddings
API_URL = "https://api.bookend.ai/"
DEFAULT_TASK = "embeddings"
PATH = "/models/predict"
class BookendEmbeddings(BaseModel, Embeddings):
"""Bookend AI sentence_transformers embedding models.
Example:
.. code-block:: python
from langchain.embeddings import BookendEmbeddings
bookend = BookendEmbeddings(
domain={domain}
api_token={api_token}
model_id={model_id}
)
bookend.embed_documents([
"Please put on these earmuffs because I can't you hear.",
"Baby wipes are made of chocolate stardust.",
])
bookend.embed_query(
"She only paints with bold colors; she does not like pastels."
)
"""
domain: str
"""Request for a domain at https://bookend.ai/ to use this embeddings module."""
api_token: str
"""Request for an API token at https://bookend.ai/ to use this embeddings module."""
model_id: str
"""Embeddings model ID to use."""
auth_header: dict = Field(default_factory=dict)
def __init__(self, **kwargs: Any):
super().__init__(**kwargs)
self.auth_header = {"Authorization": "Basic {}".format(self.api_token)}
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Embed documents using a Bookend deployed embeddings model.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
result = []
headers = self.auth_header
headers["Content-Type"] = "application/json; charset=utf-8"
params = {
"model_id": self.model_id,
"task": DEFAULT_TASK,
}
for text in texts:
data = json.dumps(
{"text": text, "question": None, "context": None, "instruction": None}
)
r = requests.request(
"POST",
API_URL + self.domain + PATH,
headers=headers,
params=params,
data=data,
)
result.append(r.json()[0]["data"])
return result
def embed_query(self, text: str) -> List[float]:
"""Embed a query using a Bookend deployed embeddings model.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
return self.embed_documents([text])[0]

View File

@ -0,0 +1,27 @@
"""Test Bookend AI embeddings."""
from langchain.embeddings.bookend import BookendEmbeddings
def test_bookend_embedding_documents() -> None:
"""Test Bookend AI embeddings for documents."""
documents = ["foo bar", "bar foo"]
embedding = BookendEmbeddings(
domain="<bookend_domain>",
api_token="<bookend_api_token>",
model_id="<bookend_embeddings_model_id>",
)
output = embedding.embed_documents(documents)
assert len(output) == 2
assert len(output[0]) == 768
def test_bookend_embedding_query() -> None:
"""Test Bookend AI embeddings for query."""
document = "foo bar"
embedding = BookendEmbeddings(
domain="<bookend_domain>",
api_token="<bookend_api_token>",
model_id="<bookend_embeddings_model_id>",
)
output = embedding.embed_query(document)
assert len(output) == 768

View File

@ -53,6 +53,7 @@ EXPECTED_ALL = [
"QianfanEmbeddingsEndpoint", "QianfanEmbeddingsEndpoint",
"JohnSnowLabsEmbeddings", "JohnSnowLabsEmbeddings",
"VoyageEmbeddings", "VoyageEmbeddings",
"BookendEmbeddings",
] ]