mirror of
https://github.com/hwchase17/langchain.git
synced 2025-04-27 11:41:51 +00:00
text-splitters[minor], langchain[minor], community[patch], templates, docs: langchain-text-splitters 0.0.1 (#18346)
This commit is contained in:
parent
7891934173
commit
5efb5c099f
1
.github/scripts/check_diff.py
vendored
1
.github/scripts/check_diff.py
vendored
@ -5,6 +5,7 @@ from typing import Dict
|
||||
|
||||
LANGCHAIN_DIRS = [
|
||||
"libs/core",
|
||||
"libs/text-splitters",
|
||||
"libs/community",
|
||||
"libs/langchain",
|
||||
"libs/experimental",
|
||||
|
2
.github/scripts/get_min_versions.py
vendored
2
.github/scripts/get_min_versions.py
vendored
@ -4,7 +4,7 @@ import tomllib
|
||||
from packaging.version import parse as parse_version
|
||||
import re
|
||||
|
||||
MIN_VERSION_LIBS = ["langchain-core", "langchain-community", "langchain"]
|
||||
MIN_VERSION_LIBS = ["langchain-core", "langchain-community", "langchain", "langchain-text-splitters"]
|
||||
|
||||
|
||||
def get_min_version(version: str) -> str:
|
||||
|
@ -116,7 +116,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"from unstructured.partition.pdf import partition_pdf\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
@ -68,7 +68,7 @@
|
||||
"pdf_pages = loader.load()\n",
|
||||
"\n",
|
||||
"# Split\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||
"all_splits_pypdf = text_splitter.split_documents(pdf_pages)\n",
|
||||
|
@ -28,9 +28,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)"
|
||||
]
|
||||
|
@ -227,8 +227,8 @@
|
||||
" BaseCombineDocumentsChain,\n",
|
||||
" load_qa_with_sources_chain,\n",
|
||||
")\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain.tools import BaseTool, DuckDuckGoSearchRun\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"from pydantic import Field\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
@ -24,7 +24,7 @@
|
||||
"source": [
|
||||
"1. Prepare data:\n",
|
||||
" 1. Upload all python project files using the `langchain_community.document_loaders.TextLoader`. We will call these files the **documents**.\n",
|
||||
" 2. Split all documents to chunks using the `langchain.text_splitter.CharacterTextSplitter`.\n",
|
||||
" 2. Split all documents to chunks using the `langchain_text_splitters.CharacterTextSplitter`.\n",
|
||||
" 3. Embed chunks and upload them into the DeepLake using `langchain.embeddings.openai.OpenAIEmbeddings` and `langchain_community.vectorstores.DeepLake`\n",
|
||||
"2. Question-Answering:\n",
|
||||
" 1. Build a chain from `langchain.chat_models.ChatOpenAI` and `langchain.chains.ConversationalRetrievalChain`\n",
|
||||
@ -621,7 +621,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_documents(docs)\n",
|
||||
|
@ -52,12 +52,12 @@
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.text_splitter import (\n",
|
||||
"from langchain_community.vectorstores import DeepLake\n",
|
||||
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import (\n",
|
||||
" CharacterTextSplitter,\n",
|
||||
" RecursiveCharacterTextSplitter,\n",
|
||||
")\n",
|
||||
"from langchain_community.vectorstores import DeepLake\n",
|
||||
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
|
||||
"activeloop_token = getpass.getpass(\"Activeloop Token:\")\n",
|
||||
|
@ -132,7 +132,7 @@
|
||||
"data = loader.load()\n",
|
||||
"\n",
|
||||
"# Split\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)\n",
|
||||
"all_splits = text_splitter.split_documents(data)\n",
|
||||
|
@ -170,8 +170,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"with open(\"../../state_of_the_union.txt\") as f:\n",
|
||||
" state_of_the_union = f.read()\n",
|
||||
|
@ -124,7 +124,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = CharacterTextSplitter.from_tiktoken_encoder(\n",
|
||||
" chunk_size=7500, chunk_overlap=100\n",
|
||||
|
@ -20,10 +20,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -59,13 +59,13 @@
|
||||
"from baidubce.auth.bce_credentials import BceCredentials\n",
|
||||
"from baidubce.bce_client_configuration import BceClientConfiguration\n",
|
||||
"from langchain.chains.retrieval_qa import RetrievalQA\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders.baiducloud_bos_directory import (\n",
|
||||
" BaiduBOSDirectoryLoader,\n",
|
||||
")\n",
|
||||
"from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings\n",
|
||||
"from langchain_community.llms.baidu_qianfan_endpoint import QianfanLLMEndpoint\n",
|
||||
"from langchain_community.vectorstores import BESVectorStore"
|
||||
"from langchain_community.vectorstores import BESVectorStore\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -36,9 +36,6 @@
|
||||
"from bs4 import BeautifulSoup as Soup\n",
|
||||
"from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
|
||||
"from langchain.storage import InMemoryByteStore, LocalFileStore\n",
|
||||
"\n",
|
||||
"# For our example, we'll load docs from the web\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter # noqa\n",
|
||||
"from langchain_community.document_loaders.recursive_url_loader import (\n",
|
||||
" RecursiveUrlLoader,\n",
|
||||
")\n",
|
||||
@ -46,6 +43,9 @@
|
||||
"# noqa\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"\n",
|
||||
"# For our example, we'll load docs from the web\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter # noqa\n",
|
||||
"\n",
|
||||
"DOCSTORE_DIR = \".\"\n",
|
||||
"DOCSTORE_ID_KEY = \"doc_id\""
|
||||
]
|
||||
|
@ -51,11 +51,11 @@
|
||||
"from langchain.chains.base import Chain\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain.prompts.base import StringPromptTemplate\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.llms import BaseLLM\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_core.agents import AgentAction, AgentFinish\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"from pydantic import BaseModel, Field"
|
||||
]
|
||||
},
|
||||
|
@ -39,7 +39,7 @@
|
||||
"data = loader.load()\n",
|
||||
"\n",
|
||||
"# Split\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)\n",
|
||||
"all_splits = text_splitter.split_documents(data)\n",
|
||||
|
@ -2610,7 +2610,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_documents(docs)"
|
||||
|
@ -281,7 +281,7 @@ Then we can build our index:
|
||||
|
||||
```python
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
|
||||
|
||||
text_splitter = RecursiveCharacterTextSplitter()
|
||||
@ -531,7 +531,7 @@ from langchain_openai import ChatOpenAI
|
||||
from langchain_community.document_loaders import WebBaseLoader
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
from langchain.tools.retriever import create_retriever_tool
|
||||
from langchain_community.tools.tavily_search import TavilySearchResults
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
@ -643,9 +643,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"# 2. Load the data: In our case data's already loaded\n",
|
||||
"# 3. Anonymize the data before indexing\n",
|
||||
|
@ -215,10 +215,10 @@
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_file_url = \"https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt\"\n",
|
||||
"\n",
|
||||
|
@ -78,9 +78,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQAWithSourcesChain\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_openai import OpenAI, OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -62,9 +62,9 @@
|
||||
"warnings.filterwarnings(\"ignore\")\n",
|
||||
"from pprint import pprint\n",
|
||||
"\n",
|
||||
"from langchain.text_splitter import Language\n",
|
||||
"from langchain_community.document_loaders.generic import GenericLoader\n",
|
||||
"from langchain_community.document_loaders.parsers import LanguageParser"
|
||||
"from langchain_community.document_loaders.parsers import LanguageParser\n",
|
||||
"from langchain_text_splitters import Language"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -323,7 +323,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import (\n",
|
||||
"from langchain_text_splitters import (\n",
|
||||
" Language,\n",
|
||||
" RecursiveCharacterTextSplitter,\n",
|
||||
")"
|
||||
@ -426,6 +426,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7fb27b941602401d91542211134fc71a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Adding Languages using Tree-sitter Template\n",
|
||||
|
@ -168,9 +168,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings"
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1463,7 +1463,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = CharacterTextSplitter()"
|
||||
]
|
||||
|
@ -82,7 +82,7 @@
|
||||
"# Map reduce example\n",
|
||||
"from langchain.chains.mapreduce import MapReduceChain\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"_prompt = \"\"\"Write a concise summary of the following:\n",
|
||||
"\n",
|
||||
|
@ -68,7 +68,7 @@ for OpenAI LLMs.
|
||||
|
||||
You can also use it to count tokens when splitting documents with
|
||||
```python
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
from langchain_text_splitters import CharacterTextSplitter
|
||||
CharacterTextSplitter.from_tiktoken_encoder(...)
|
||||
```
|
||||
For a more detailed walkthrough of this, see [this notebook](/docs/modules/data_connection/document_transformers/split_by_token#tiktoken)
|
||||
|
@ -34,7 +34,7 @@ The vector store is a simple wrapper around Elasticsearch. It provides a simple
|
||||
from langchain_elasticsearch import ElasticsearchStore
|
||||
|
||||
from langchain_community.document_loaders import TextLoader
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
from langchain_text_splitters import CharacterTextSplitter
|
||||
|
||||
loader = TextLoader("./state_of_the_union.txt")
|
||||
documents = loader.load()
|
||||
|
@ -87,9 +87,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_wikipedia_page(title: str):\n",
|
||||
|
@ -16,7 +16,7 @@ pip install spacy
|
||||
See a [usage example](/docs/modules/data_connection/document_transformers/split_by_token#spacy).
|
||||
|
||||
```python
|
||||
from langchain.text_splitter import SpacyTextSplitter
|
||||
from langchain_text_splitters import SpacyTextSplitter
|
||||
```
|
||||
|
||||
## Text Embedding Models
|
||||
|
@ -192,7 +192,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"chunk_size = 4096\n",
|
||||
"docs_new = []\n",
|
||||
|
@ -301,10 +301,10 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.embeddings import CohereEmbeddings\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",
|
||||
|
@ -288,10 +288,10 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"documents = TextLoader(\n",
|
||||
" \"../../modules/state_of_the_union.txt\",\n",
|
||||
|
@ -52,10 +52,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores.jaguar import Jaguar\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"\"\"\" \n",
|
||||
"Load a text file into a set of documents \n",
|
||||
|
@ -282,10 +282,10 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"documents = TextLoader(\n",
|
||||
" \"../../modules/state_of_the_union.txt\",\n",
|
||||
|
@ -28,10 +28,10 @@
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"from langchain.retrievers import RePhraseQueryRetriever\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import WebBaseLoader\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings"
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -89,12 +89,12 @@
|
||||
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||
"from langchain.chains.query_constructor.base import AttributeInfo\n",
|
||||
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.embeddings import FakeEmbeddings\n",
|
||||
"from langchain_community.vectorstores import Vectara\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_openai import OpenAI"
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -50,10 +50,10 @@
|
||||
"# We want to use OpenAIEmbeddings so we have to get the OpenAI API Key.\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
|
||||
"\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import SingleStoreDB\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -21,10 +21,10 @@
|
||||
"source": [
|
||||
"from langchain.agents import Tool\n",
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import PyPDFLoader\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"from pydantic import BaseModel, Field"
|
||||
]
|
||||
},
|
||||
|
@ -51,9 +51,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import DeepLake\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -129,12 +129,12 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import (\n",
|
||||
" AlibabaCloudOpenSearch,\n",
|
||||
" AlibabaCloudOpenSearchSettings,\n",
|
||||
")\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -23,9 +23,9 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import AnalyticDB\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -148,8 +148,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txtn.txtn.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -70,7 +70,6 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.text_splitter import TokenTextSplitter\n",
|
||||
"from langchain_community.document_loaders import (\n",
|
||||
" DirectoryLoader,\n",
|
||||
" UnstructuredMarkdownLoader,\n",
|
||||
@ -80,6 +79,7 @@
|
||||
" ApacheDorisSettings,\n",
|
||||
")\n",
|
||||
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import TokenTextSplitter\n",
|
||||
"\n",
|
||||
"update_vectordb = False"
|
||||
]
|
||||
|
@ -91,13 +91,13 @@
|
||||
"from datasets import (\n",
|
||||
" load_dataset,\n",
|
||||
")\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import PyPDFLoader\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings"
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -71,9 +71,9 @@
|
||||
"source": [
|
||||
"import time\n",
|
||||
"\n",
|
||||
"from langchain.text_splitter import SpacyTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import AtlasDB"
|
||||
"from langchain_community.vectorstores import AtlasDB\n",
|
||||
"from langchain_text_splitters import SpacyTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -28,9 +28,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import AwaDB"
|
||||
"from langchain_community.vectorstores import AwaDB\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -130,13 +130,13 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores.azure_cosmos_db import (\n",
|
||||
" AzureCosmosDBVectorSearch,\n",
|
||||
" CosmosDBSimilarityType,\n",
|
||||
")\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"SOURCE_FILE_NAME = \"../../modules/state_of_the_union.txt\"\n",
|
||||
"\n",
|
||||
|
@ -234,8 +234,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\", encoding=\"utf-8\")\n",
|
||||
"\n",
|
||||
|
@ -108,8 +108,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -77,8 +77,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -74,13 +74,13 @@
|
||||
"from datasets import (\n",
|
||||
" load_dataset,\n",
|
||||
")\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import PyPDFLoader\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings"
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -65,12 +65,12 @@
|
||||
],
|
||||
"source": [
|
||||
"# import\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.embeddings.sentence_transformer import (\n",
|
||||
" SentenceTransformerEmbeddings,\n",
|
||||
")\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"# load the document and split it into chunks\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
|
@ -79,9 +79,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Import the required modules\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import Clarifai"
|
||||
"from langchain_community.vectorstores import Clarifai\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -101,9 +101,9 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import Clickhouse, ClickhouseSettings\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -101,9 +101,9 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.embeddings.dashscope import DashScopeEmbeddings\n",
|
||||
"from langchain_community.vectorstores import DashVector"
|
||||
"from langchain_community.vectorstores import DashVector\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -59,9 +59,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -68,10 +68,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import Dingo\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -130,10 +130,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import Dingo\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -73,10 +73,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import DocArrayHnswSearch\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -70,10 +70,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import DocArrayInMemorySearch\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -216,8 +216,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -67,8 +67,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -88,10 +88,10 @@
|
||||
"# Uncomment the following line if you need to initialize FAISS with no AVX2 optimization\n",
|
||||
"# os.environ['FAISS_NO_AVX2'] = '1'\n",
|
||||
"\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -56,10 +56,10 @@
|
||||
"# Uncomment the following line if you need to initialize FAISS with no AVX2 optimization\n",
|
||||
"# os.environ['FAISS_NO_AVX2'] = '1'\n",
|
||||
"\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../../extras/modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -184,8 +184,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"./state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -97,10 +97,10 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores.hippo import Hippo\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings"
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -33,9 +33,9 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import Hologres\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -56,13 +56,13 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQAWithSourcesChain\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores.jaguar import Jaguar\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"\"\"\" \n",
|
||||
"Load a text file into a set of documents \n",
|
||||
|
@ -114,14 +114,14 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import (\n",
|
||||
" DistanceStrategy,\n",
|
||||
" Kinetica,\n",
|
||||
" KineticaSettings,\n",
|
||||
")\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -104,7 +104,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -107,11 +107,11 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain_community.vectorstores import Lantern\n",
|
||||
"from langchain_core.documents import Document"
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -38,9 +38,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import Marqo"
|
||||
"from langchain_community.vectorstores import Marqo\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -126,9 +126,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import Meilisearch\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
|
@ -66,10 +66,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import Milvus\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -204,6 +204,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7fb27b941602401d91542211134fc71a",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
@ -221,6 +222,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "acae54e37e7d407bbb7b55eff062a284",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
@ -246,6 +248,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a63283cbaf04dbcab1f6479b197f3a8",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
@ -267,6 +270,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "8dd0d8092fe74a7c96281538738b07e2",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
@ -295,6 +299,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "72eea5119410473aa328ad9291626812",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
|
@ -143,10 +143,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import MomentoVectorIndex\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -171,7 +171,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)\n",
|
||||
"docs = text_splitter.split_documents(data)"
|
||||
|
@ -98,10 +98,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import MyScale\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -73,10 +73,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import Neo4jVector\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -68,10 +68,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import OpenSearchVectorSearch\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -82,10 +82,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import PGEmbedding\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -27,10 +27,10 @@
|
||||
"from typing import List\n",
|
||||
"\n",
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.embeddings.fake import FakeEmbeddings\n",
|
||||
"from langchain_community.vectorstores.pgvecto_rs import PGVecto_rs"
|
||||
"from langchain_community.vectorstores.pgvecto_rs import PGVecto_rs\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -101,10 +101,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores.pgvector import PGVector\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -52,9 +52,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -78,10 +78,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import Qdrant\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -108,10 +108,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import Rockset\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -110,10 +110,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores.hanavector import HanaDB\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||
|
@ -59,10 +59,10 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.embeddings import HuggingFaceEmbeddings\n",
|
||||
"from langchain_community.vectorstores import ScaNN\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -61,8 +61,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -46,10 +46,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import SingleStoreDB\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -60,10 +60,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import SKLearnVectorStore\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -69,12 +69,12 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.embeddings.sentence_transformer import (\n",
|
||||
" SentenceTransformerEmbeddings,\n",
|
||||
")\n",
|
||||
"from langchain_community.vectorstores import SQLiteVSS\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"# load the document and split it into chunks\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
@ -146,12 +146,12 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.embeddings.sentence_transformer import (\n",
|
||||
" SentenceTransformerEmbeddings,\n",
|
||||
")\n",
|
||||
"from langchain_community.vectorstores import SQLiteVSS\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"# load the document and split it into chunks\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
|
@ -58,7 +58,6 @@
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.text_splitter import TokenTextSplitter\n",
|
||||
"from langchain_community.document_loaders import (\n",
|
||||
" DirectoryLoader,\n",
|
||||
" UnstructuredMarkdownLoader,\n",
|
||||
@ -66,6 +65,7 @@
|
||||
"from langchain_community.vectorstores import StarRocks\n",
|
||||
"from langchain_community.vectorstores.starrocks import StarRocksSettings\n",
|
||||
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import TokenTextSplitter\n",
|
||||
"\n",
|
||||
"update_vectordb = False"
|
||||
]
|
||||
|
@ -183,8 +183,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
|
@ -73,10 +73,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.embeddings import HuggingFaceEmbeddings\n",
|
||||
"from langchain_community.vectorstores import SurrealDBStore"
|
||||
"from langchain_community.vectorstores import SurrealDBStore\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -20,9 +20,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.embeddings.fake import FakeEmbeddings\n",
|
||||
"from langchain_community.vectorstores import Tair"
|
||||
"from langchain_community.vectorstores import Tair\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -33,11 +33,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.embeddings.fake import FakeEmbeddings\n",
|
||||
"from langchain_community.vectorstores import TencentVectorDB\n",
|
||||
"from langchain_community.vectorstores.tencentvectordb import ConnectionParams"
|
||||
"from langchain_community.vectorstores.tencentvectordb import ConnectionParams\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -85,10 +85,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import Tigris\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -43,10 +43,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.embeddings import HuggingFaceEmbeddings\n",
|
||||
"from langchain_community.vectorstores import TileDB\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"raw_documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
|
@ -123,11 +123,11 @@
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"\n",
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.document_loaders.json_loader import JSONLoader\n",
|
||||
"from langchain_community.vectorstores.timescalevector import TimescaleVector\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -84,10 +84,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import Typesense\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -55,10 +55,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import USearch\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user