mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-15 01:13:48 +00:00
add ingest for mongo (#12897)
This commit is contained in:
parent
ce21308f29
commit
99ffeb239f
@ -40,6 +40,13 @@ from rag_mongo import chain as rag_mongo_chain
|
|||||||
add_routes(app, rag_mongo_chain, path="/rag-mongo")
|
add_routes(app, rag_mongo_chain, path="/rag-mongo")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If you want to set up an ingestion pipeline, you can add the following code to your `server.py` file:
|
||||||
|
```python
|
||||||
|
from rag_mongo import ingest as rag_mongo_ingest
|
||||||
|
|
||||||
|
add_routes(app, rag_mongo_ingest, path="/rag-mongo-ingest")
|
||||||
|
```
|
||||||
|
|
||||||
(Optional) Let's now configure LangSmith.
|
(Optional) Let's now configure LangSmith.
|
||||||
LangSmith will help us trace, monitor and debug LangChain applications.
|
LangSmith will help us trace, monitor and debug LangChain applications.
|
||||||
LangSmith is currently in private beta, you can sign up [here](https://smith.langchain.com/).
|
LangSmith is currently in private beta, you can sign up [here](https://smith.langchain.com/).
|
||||||
|
@ -1,11 +1,17 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from langchain.chat_models import ChatOpenAI
|
from langchain.chat_models import ChatOpenAI
|
||||||
|
from langchain.document_loaders import PyPDFLoader
|
||||||
from langchain.embeddings import OpenAIEmbeddings
|
from langchain.embeddings import OpenAIEmbeddings
|
||||||
from langchain.prompts import ChatPromptTemplate
|
from langchain.prompts import ChatPromptTemplate
|
||||||
from langchain.pydantic_v1 import BaseModel
|
from langchain.pydantic_v1 import BaseModel
|
||||||
from langchain.schema.output_parser import StrOutputParser
|
from langchain.schema.output_parser import StrOutputParser
|
||||||
from langchain.schema.runnable import RunnableParallel, RunnablePassthrough
|
from langchain.schema.runnable import (
|
||||||
|
RunnableLambda,
|
||||||
|
RunnableParallel,
|
||||||
|
RunnablePassthrough,
|
||||||
|
)
|
||||||
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
from langchain.vectorstores import MongoDBAtlasVectorSearch
|
from langchain.vectorstores import MongoDBAtlasVectorSearch
|
||||||
from pymongo import MongoClient
|
from pymongo import MongoClient
|
||||||
|
|
||||||
@ -54,3 +60,24 @@ class Question(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
chain = chain.with_types(input_type=Question)
|
chain = chain.with_types(input_type=Question)
|
||||||
|
|
||||||
|
|
||||||
|
def _ingest(url: str) -> dict:
|
||||||
|
loader = PyPDFLoader(url)
|
||||||
|
data = loader.load()
|
||||||
|
|
||||||
|
# Split docs
|
||||||
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
|
||||||
|
docs = text_splitter.split_documents(data)
|
||||||
|
|
||||||
|
# Insert the documents in MongoDB Atlas Vector Search
|
||||||
|
_ = MongoDBAtlasVectorSearch.from_documents(
|
||||||
|
documents=docs,
|
||||||
|
embedding=OpenAIEmbeddings(disallowed_special=()),
|
||||||
|
collection=MONGODB_COLLECTION,
|
||||||
|
index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
|
||||||
|
)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
ingest = RunnableLambda(_ingest)
|
||||||
|
Loading…
Reference in New Issue
Block a user