mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-06 13:33:37 +00:00
Merge branch 'master' into deepsense/text-to-speech
This commit is contained in:
@@ -18,7 +18,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -93,8 +93,22 @@
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "langchain",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
|
@@ -31,11 +31,16 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get new tokens: https://app.banana.dev/\n",
|
||||
"# We need two tokens, not just an `api_key`: `BANANA_API_KEY` and `YOUR_MODEL_KEY`\n",
|
||||
"# We need three parameters to make a Banana.dev API call:\n",
|
||||
"# * a team api key\n",
|
||||
"# * the model's unique key\n",
|
||||
"# * the model's url slug\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"# You can get this from the main dashboard\n",
|
||||
"# at https://app.banana.dev\n",
|
||||
"os.environ[\"BANANA_API_KEY\"] = \"YOUR_API_KEY\"\n",
|
||||
"# OR\n",
|
||||
"# BANANA_API_KEY = getpass()"
|
||||
@@ -70,7 +75,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = Banana(model_key=\"YOUR_MODEL_KEY\")"
|
||||
"# Both of these are found in your model's \n",
|
||||
"# detail page in https://app.banana.dev\n",
|
||||
"llm = Banana(model_key=\"YOUR_MODEL_KEY\", model_url_slug=\"YOUR_MODEL_URL_SLUG\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@@ -236,7 +236,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_oss = VertexAIModelGarden(\n",
|
||||
"llm = VertexAIModelGarden(\n",
|
||||
" project=\"YOUR PROJECT\",\n",
|
||||
" endpoint_id=\"YOUR ENDPOINT_ID\"\n",
|
||||
")"
|
||||
@@ -248,14 +248,25 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_oss(\"What is the meaning of life?\")"
|
||||
"llm(\"What is the meaning of life?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also use it as a chain:"
|
||||
"Like all LLMs, we can then compose it with other components:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate.from_template(\"What is the meaning of {thing}?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -264,17 +275,17 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_oss_chain = LLMChain(prompt=prompt, llm=llm_oss(\"What is the meaning of life?\")\n",
|
||||
")\n",
|
||||
"llm_oss_chain.run(question)"
|
||||
"llm_oss_chain = prompt | llm\n",
|
||||
"\n",
|
||||
"llm_oss_chain.invoke({\"thing\": \"life\"})"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "poetry-venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "poetry-venv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -286,7 +297,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.9.1"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
@@ -1,79 +1,72 @@
|
||||
# Banana
|
||||
|
||||
This page covers how to use the Banana ecosystem within LangChain.
|
||||
It is broken into two parts: installation and setup, and then references to specific Banana wrappers.
|
||||
Banana provided serverless GPU inference for AI models, including a CI/CD build pipeline and a simple Python framework (Potassium) to server your models.
|
||||
|
||||
This page covers how to use the [Banana](https://www.banana.dev) ecosystem within LangChain.
|
||||
|
||||
It is broken into two parts:
|
||||
* installation and setup,
|
||||
* and then references to specific Banana wrappers.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Install with `pip install banana-dev`
|
||||
- Get an Banana api key and set it as an environment variable (`BANANA_API_KEY`)
|
||||
- Get an Banana api key from the [Banana.dev dashboard](https://app.banana.dev) and set it as an environment variable (`BANANA_API_KEY`)
|
||||
- Get your model's key and url slug from the model's details page
|
||||
|
||||
## Define your Banana Template
|
||||
|
||||
If you want to use an available language model template you can find one [here](https://app.banana.dev/templates/conceptofmind/serverless-template-palmyra-base).
|
||||
This template uses the Palmyra-Base model by [Writer](https://writer.com/product/api/).
|
||||
You can check out an example Banana repository [here](https://github.com/conceptofmind/serverless-template-palmyra-base).
|
||||
You'll need to set up a Github repo for your Banana app. You can get started in 5 minutes using [this guide](https://docs.banana.dev/banana-docs/).
|
||||
|
||||
Alternatively, for a ready-to-go LLM example, you can check out Banana's [CodeLlama-7B-Instruct-GPTQ](https://github.com/bananaml/demo-codellama-7b-instruct-gptq) GitHub repository. Just fork it and deploy it within Banana.
|
||||
|
||||
Other starter repos are available [here](https://github.com/orgs/bananaml/repositories?q=demo-&type=all&language=&sort=).
|
||||
|
||||
## Build the Banana app
|
||||
|
||||
Banana Apps must include the "output" key in the return json.
|
||||
There is a rigid response structure.
|
||||
To use Banana apps within Langchain, they must include the `outputs` key
|
||||
in the returned json, and the value must be a string.
|
||||
|
||||
```python
|
||||
# Return the results as a dictionary
|
||||
result = {'output': result}
|
||||
result = {'outputs': result}
|
||||
```
|
||||
|
||||
An example inference function would be:
|
||||
|
||||
```python
|
||||
def inference(model_inputs:dict) -> dict:
|
||||
global model
|
||||
global tokenizer
|
||||
|
||||
# Parse out your arguments
|
||||
prompt = model_inputs.get('prompt', None)
|
||||
if prompt == None:
|
||||
return {'message': "No prompt provided"}
|
||||
|
||||
# Run the model
|
||||
input_ids = tokenizer.encode(prompt, return_tensors='pt').cuda()
|
||||
output = model.generate(
|
||||
input_ids,
|
||||
max_length=100,
|
||||
do_sample=True,
|
||||
top_k=50,
|
||||
top_p=0.95,
|
||||
num_return_sequences=1,
|
||||
temperature=0.9,
|
||||
early_stopping=True,
|
||||
no_repeat_ngram_size=3,
|
||||
num_beams=5,
|
||||
length_penalty=1.5,
|
||||
repetition_penalty=1.5,
|
||||
bad_words_ids=[[tokenizer.encode(' ', add_prefix_space=True)[0]]]
|
||||
)
|
||||
|
||||
result = tokenizer.decode(output[0], skip_special_tokens=True)
|
||||
# Return the results as a dictionary
|
||||
result = {'output': result}
|
||||
return result
|
||||
@app.handler("/")
|
||||
def handler(context: dict, request: Request) -> Response:
|
||||
"""Handle a request to generate code from a prompt."""
|
||||
model = context.get("model")
|
||||
tokenizer = context.get("tokenizer")
|
||||
max_new_tokens = request.json.get("max_new_tokens", 512)
|
||||
temperature = request.json.get("temperature", 0.7)
|
||||
prompt = request.json.get("prompt")
|
||||
prompt_template=f'''[INST] Write code to solve the following coding problem that obeys the constraints and passes the example test cases. Please wrap your code answer using ```:
|
||||
{prompt}
|
||||
[/INST]
|
||||
'''
|
||||
input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
|
||||
output = model.generate(inputs=input_ids, temperature=temperature, max_new_tokens=max_new_tokens)
|
||||
result = tokenizer.decode(output[0])
|
||||
return Response(json={"outputs": result}, status=200)
|
||||
```
|
||||
|
||||
You can find a full example of a Banana app [here](https://github.com/conceptofmind/serverless-template-palmyra-base/blob/main/app.py).
|
||||
This example is from the `app.py` file in [CodeLlama-7B-Instruct-GPTQ](https://github.com/bananaml/demo-codellama-7b-instruct-gptq).
|
||||
|
||||
## Wrappers
|
||||
|
||||
### LLM
|
||||
|
||||
There exists an Banana LLM wrapper, which you can access with
|
||||
Within Langchain, there exists a Banana LLM wrapper, which you can access with
|
||||
|
||||
```python
|
||||
from langchain.llms import Banana
|
||||
```
|
||||
|
||||
You need to provide a model key located in the dashboard:
|
||||
You need to provide a model key and model url slug, which you can get from the model's details page in the [Banana.dev dashboard](https://app.banana.dev).
|
||||
|
||||
```python
|
||||
llm = Banana(model_key="YOUR_MODEL_KEY")
|
||||
llm = Banana(model_key="YOUR_MODEL_KEY", model_url_slug="YOUR_MODEL_URL_SLUG")
|
||||
```
|
||||
|
@@ -5,13 +5,23 @@
|
||||
"id": "ed47bb62",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Hugging Face Hub\n",
|
||||
"# Hugging Face\n",
|
||||
"Let's load the Hugging Face Embedding class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"id": "16b20335-da1d-46ba-aa23-fbf3e2c6fe60",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install langchain sentence_transformers"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "861521a9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -21,7 +31,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 3,
|
||||
"id": "ff9be586",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -31,7 +41,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 3,
|
||||
"id": "d0a98ae9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -41,7 +51,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 5,
|
||||
"id": "5d6c682b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -51,7 +61,28 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 6,
|
||||
"id": "b57b8ce9-ef7d-4e63-979e-aa8763d1f9a8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[-0.04895168915390968, -0.03986193612217903, -0.021562768146395683]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query_result[:3]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "bb5e74c0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -60,19 +91,71 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aaad49f8",
|
||||
"cell_type": "markdown",
|
||||
"id": "92019ef1-5d30-4985-b4e6-c0d98bdfe265",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"## Hugging Face Inference API\n",
|
||||
"We can also access embedding models via the Hugging Face Inference API, which does not require us to install ``sentence_transformers`` and download models locally."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "66f5c6ba-1446-43e1-b012-800d17cef300",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Enter your HF Inference API Key:\n",
|
||||
"\n",
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"inference_api_key = getpass.getpass(\"Enter your HF Inference API Key:\\n\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "d0623c1f-cd82-4862-9bce-3655cb9b66ac",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[-0.038338541984558105, 0.1234646737575531, -0.028642963618040085]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = HuggingFaceInferenceAPIEmbeddings(\n",
|
||||
" api_key=inference_api_key,\n",
|
||||
" model_name=\"sentence-transformers/all-MiniLM-l6-v2\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"query_result = embeddings.embed_query(text)\n",
|
||||
"query_result[:3]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "poetry-venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "poetry-venv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
126
docs/extras/integrations/vectorstores/nucliadb.ipynb
Normal file
126
docs/extras/integrations/vectorstores/nucliadb.ipynb
Normal file
@@ -0,0 +1,126 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# NucliaDB\n",
|
||||
"\n",
|
||||
"You can use a local NucliaDB instance or use [Nuclia Cloud](https://nuclia.cloud).\n",
|
||||
"\n",
|
||||
"When using a local instance, you need a Nuclia Understanding API key, so your texts are properly vectorized and indexed. You can get a key by creating a free account at [https://nuclia.cloud](https://nuclia.cloud), and then [create a NUA key](https://docs.nuclia.dev/docs/docs/using/understanding/intro)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install langchain nuclia"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage with nuclia.cloud"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.vectorstores.nucliadb import NucliaDB\n",
|
||||
"API_KEY = \"YOUR_API_KEY\"\n",
|
||||
"\n",
|
||||
"ndb = NucliaDB(knowledge_box=\"YOUR_KB_ID\", local=False, api_key=API_KEY)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage with a local instance\n",
|
||||
"\n",
|
||||
"Note: By default `backend` is set to `http://localhost:8080`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.vectorstores.nucliadb import NucliaDB\n",
|
||||
"\n",
|
||||
"ndb = NucliaDB(knowledge_box=\"YOUR_KB_ID\", local=True, backend=\"http://my-local-server\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Add and delete texts to your Knowledge Box"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ids = ndb.add_texts([\"This is a new test\", \"This is a second test\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ndb.delete(ids=ids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Search in your Knowledge Box"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = ndb.similarity_search(\"Who was inspired by Ada Lovelace?\")\n",
|
||||
"print(res.page_content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
207
docs/extras/integrations/vectorstores/sqlitevss.ipynb
Normal file
207
docs/extras/integrations/vectorstores/sqlitevss.ipynb
Normal file
@@ -0,0 +1,207 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# sqlite-vss\n",
|
||||
"\n",
|
||||
">[sqlite-vss](https://alexgarcia.xyz/sqlite-vss/) is an SQLite extension designed for vector search, emphasizing local-first operations and easy integration into applications without external servers. Leveraging the Faiss library, it offers efficient similarity search and clustering capabilities.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the `SQLiteVSS` vector database."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You need to install sqlite-vss as a dependency.\n",
|
||||
"%pip install sqlite-vss"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Quickstart"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.'"
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import SQLiteVSS\n",
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"# load the document and split it into chunks\n",
|
||||
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"\n",
|
||||
"# split it into chunks\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"texts = [doc.page_content for doc in docs]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# create the open-source embedding function\n",
|
||||
"embedding_function = SentenceTransformerEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# load it in sqlite-vss in a table named state_union.\n",
|
||||
"# the db_file parameter is the name of the file you want\n",
|
||||
"# as your sqlite database.\n",
|
||||
"db = SQLiteVSS.from_texts(\n",
|
||||
" texts=texts,\n",
|
||||
" embedding=embedding_function,\n",
|
||||
" table=\"state_union\",\n",
|
||||
" db_file=\"/tmp/vss.db\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# query it\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"data = db.similarity_search(query)\n",
|
||||
"\n",
|
||||
"# print results\n",
|
||||
"data[0].page_content"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-06T14:55:55.370351Z",
|
||||
"start_time": "2023-09-06T14:55:53.547755Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Using existing sqlite connection"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'Ketanji Brown Jackson is awesome'"
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import SQLiteVSS\n",
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"# load the document and split it into chunks\n",
|
||||
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"\n",
|
||||
"# split it into chunks\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"texts = [doc.page_content for doc in docs]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# create the open-source embedding function\n",
|
||||
"embedding_function = SentenceTransformerEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",
|
||||
"connection = SQLiteVSS.create_connection(db_file=\"/tmp/vss.db\")\n",
|
||||
"\n",
|
||||
"db1 = SQLiteVSS(\n",
|
||||
" table=\"state_union\",\n",
|
||||
" embedding=embedding_function,\n",
|
||||
" connection=connection\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"db1.add_texts([\"Ketanji Brown Jackson is awesome\"])\n",
|
||||
"# query it again\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"data = db1.similarity_search(query)\n",
|
||||
"\n",
|
||||
"# print results\n",
|
||||
"data[0].page_content"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-06T14:59:22.086252Z",
|
||||
"start_time": "2023-09-06T14:59:21.693237Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Cleaning up\n",
|
||||
"import os\n",
|
||||
"os.remove(\"/tmp/vss.db\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-06T15:01:15.550318Z",
|
||||
"start_time": "2023-09-06T15:01:15.546428Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@@ -167,7 +167,7 @@
|
||||
"Tables necessary to determine the places of the planets are not less\r\n",
|
||||
"necessary than those for the sun, moon, and stars. Some notion of the\r\n",
|
||||
"number and complexity of these tables may be formed, when we state that\r\n",
|
||||
"the positions of the two principal planets, (and these the most\r\n",
|
||||
"the positions of the two principal planets, (and these are the most\r\n",
|
||||
"necessary for the navigator,) Jupiter and Saturn, require each not less\r\n",
|
||||
"than one hundred and sixteen tables. Yet it is not only necessary to\r\n",
|
||||
"predict the position of these bodies, but it is likewise expedient to -> 0.8998482592744614 \n",
|
||||
|
Reference in New Issue
Block a user