mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-10-30 23:29:54 +00:00 
			
		
		
		
	ran ```bash g grep -l "langchain.vectorstores" | xargs -L 1 sed -i '' "s/langchain\.vectorstores/langchain_community.vectorstores/g" g grep -l "langchain.document_loaders" | xargs -L 1 sed -i '' "s/langchain\.document_loaders/langchain_community.document_loaders/g" g grep -l "langchain.chat_loaders" | xargs -L 1 sed -i '' "s/langchain\.chat_loaders/langchain_community.chat_loaders/g" g grep -l "langchain.document_transformers" | xargs -L 1 sed -i '' "s/langchain\.document_transformers/langchain_community.document_transformers/g" g grep -l "langchain\.graphs" | xargs -L 1 sed -i '' "s/langchain\.graphs/langchain_community.graphs/g" g grep -l "langchain\.memory\.chat_message_histories" | xargs -L 1 sed -i '' "s/langchain\.memory\.chat_message_histories/langchain_community.chat_message_histories/g" gco master libs/langchain/tests/unit_tests/*/test_imports.py gco master libs/langchain/tests/unit_tests/**/test_public_api.py ```
		
			
				
	
	
		
			63 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			63 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import json
 | |
| from pathlib import Path
 | |
| 
 | |
| from langchain.prompts import ChatPromptTemplate
 | |
| from langchain.schema import Document
 | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter
 | |
| from langchain_community.chat_models import ChatOpenAI
 | |
| from langchain_community.embeddings import OpenAIEmbeddings
 | |
| from langchain_community.vectorstores import Chroma
 | |
| from langchain_core.output_parsers import StrOutputParser
 | |
| from langchain_core.pydantic_v1 import BaseModel
 | |
| from langchain_core.runnables import RunnableParallel, RunnablePassthrough
 | |
| 
 | |
| # Load output from gpt crawler
 | |
| path_to_gptcrawler = Path(__file__).parent.parent / "output.json"
 | |
| data = json.loads(Path(path_to_gptcrawler).read_text())
 | |
| docs = [
 | |
|     Document(
 | |
|         page_content=dict_["html"],
 | |
|         metadata={"title": dict_["title"], "url": dict_["url"]},
 | |
|     )
 | |
|     for dict_ in data
 | |
| ]
 | |
| 
 | |
| # Split
 | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
 | |
| all_splits = text_splitter.split_documents(docs)
 | |
| 
 | |
| # Add to vectorDB
 | |
| vectorstore = Chroma.from_documents(
 | |
|     documents=all_splits,
 | |
|     collection_name="rag-gpt-builder",
 | |
|     embedding=OpenAIEmbeddings(),
 | |
| )
 | |
| retriever = vectorstore.as_retriever()
 | |
| 
 | |
| # RAG prompt
 | |
| template = """Answer the question based only on the following context:
 | |
| {context}
 | |
| 
 | |
| Question: {question}
 | |
| """
 | |
| prompt = ChatPromptTemplate.from_template(template)
 | |
| 
 | |
| # LLM
 | |
| model = ChatOpenAI()
 | |
| 
 | |
| # RAG chain
 | |
| chain = (
 | |
|     RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
 | |
|     | prompt
 | |
|     | model
 | |
|     | StrOutputParser()
 | |
| )
 | |
| 
 | |
| 
 | |
| # Add typing for input
 | |
| class Question(BaseModel):
 | |
|     __root__: str
 | |
| 
 | |
| 
 | |
| chain = chain.with_types(input_type=Question)
 |