mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-11-03 17:54:10 +00:00 
			
		
		
		
	ran ```bash g grep -l "langchain.vectorstores" | xargs -L 1 sed -i '' "s/langchain\.vectorstores/langchain_community.vectorstores/g" g grep -l "langchain.document_loaders" | xargs -L 1 sed -i '' "s/langchain\.document_loaders/langchain_community.document_loaders/g" g grep -l "langchain.chat_loaders" | xargs -L 1 sed -i '' "s/langchain\.chat_loaders/langchain_community.chat_loaders/g" g grep -l "langchain.document_transformers" | xargs -L 1 sed -i '' "s/langchain\.document_transformers/langchain_community.document_transformers/g" g grep -l "langchain\.graphs" | xargs -L 1 sed -i '' "s/langchain\.graphs/langchain_community.graphs/g" g grep -l "langchain\.memory\.chat_message_histories" | xargs -L 1 sed -i '' "s/langchain\.memory\.chat_message_histories/langchain_community.chat_message_histories/g" gco master libs/langchain/tests/unit_tests/*/test_imports.py gco master libs/langchain/tests/unit_tests/**/test_public_api.py ```
		
			
				
	
	
		
			54 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			54 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import os
 | 
						|
 | 
						|
from langchain.text_splitter import RecursiveCharacterTextSplitter
 | 
						|
from langchain_community.document_loaders import JSONLoader
 | 
						|
from langchain_community.embeddings import HuggingFaceEmbeddings
 | 
						|
from langchain_community.vectorstores.elasticsearch import ElasticsearchStore
 | 
						|
 | 
						|
ELASTIC_CLOUD_ID = os.getenv("ELASTIC_CLOUD_ID")
 | 
						|
ELASTIC_USERNAME = os.getenv("ELASTIC_USERNAME", "elastic")
 | 
						|
ELASTIC_PASSWORD = os.getenv("ELASTIC_PASSWORD")
 | 
						|
ES_URL = os.getenv("ES_URL", "http://localhost:9200")
 | 
						|
 | 
						|
if ELASTIC_CLOUD_ID and ELASTIC_USERNAME and ELASTIC_PASSWORD:
 | 
						|
    es_connection_details = {
 | 
						|
        "es_cloud_id": ELASTIC_CLOUD_ID,
 | 
						|
        "es_user": ELASTIC_USERNAME,
 | 
						|
        "es_password": ELASTIC_PASSWORD,
 | 
						|
    }
 | 
						|
else:
 | 
						|
    es_connection_details = {"es_url": ES_URL}
 | 
						|
 | 
						|
 | 
						|
# Metadata extraction function
 | 
						|
def metadata_func(record: dict, metadata: dict) -> dict:
 | 
						|
    metadata["name"] = record.get("name")
 | 
						|
    metadata["summary"] = record.get("summary")
 | 
						|
    metadata["url"] = record.get("url")
 | 
						|
    metadata["category"] = record.get("category")
 | 
						|
    metadata["updated_at"] = record.get("updated_at")
 | 
						|
 | 
						|
    return metadata
 | 
						|
 | 
						|
 | 
						|
## Load Data
 | 
						|
loader = JSONLoader(
 | 
						|
    file_path="./data/documents.json",
 | 
						|
    jq_schema=".[]",
 | 
						|
    content_key="content",
 | 
						|
    metadata_func=metadata_func,
 | 
						|
)
 | 
						|
 | 
						|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=250)
 | 
						|
all_splits = text_splitter.split_documents(loader.load())
 | 
						|
 | 
						|
# Add to vectorDB
 | 
						|
vectorstore = ElasticsearchStore.from_documents(
 | 
						|
    documents=all_splits,
 | 
						|
    embedding=HuggingFaceEmbeddings(
 | 
						|
        model_name="all-MiniLM-L6-v2", model_kwargs={"device": "cpu"}
 | 
						|
    ),
 | 
						|
    **es_connection_details,
 | 
						|
    index_name="workplace-search-example",
 | 
						|
)
 |