mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-14 08:56:27 +00:00
add ingest for mongo (#12897)
This commit is contained in:
parent
ce21308f29
commit
99ffeb239f
@ -40,6 +40,13 @@ from rag_mongo import chain as rag_mongo_chain
|
||||
add_routes(app, rag_mongo_chain, path="/rag-mongo")
|
||||
```
|
||||
|
||||
If you want to set up an ingestion pipeline, you can add the following code to your `server.py` file:
|
||||
```python
|
||||
from rag_mongo import ingest as rag_mongo_ingest
|
||||
|
||||
add_routes(app, rag_mongo_ingest, path="/rag-mongo-ingest")
|
||||
```
|
||||
|
||||
(Optional) Let's now configure LangSmith.
|
||||
LangSmith will help us trace, monitor and debug LangChain applications.
|
||||
LangSmith is currently in private beta, you can sign up [here](https://smith.langchain.com/).
|
||||
|
@ -1,11 +1,17 @@
|
||||
import os
|
||||
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.document_loaders import PyPDFLoader
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.prompts import ChatPromptTemplate
|
||||
from langchain.pydantic_v1 import BaseModel
|
||||
from langchain.schema.output_parser import StrOutputParser
|
||||
from langchain.schema.runnable import RunnableParallel, RunnablePassthrough
|
||||
from langchain.schema.runnable import (
|
||||
RunnableLambda,
|
||||
RunnableParallel,
|
||||
RunnablePassthrough,
|
||||
)
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain.vectorstores import MongoDBAtlasVectorSearch
|
||||
from pymongo import MongoClient
|
||||
|
||||
@ -54,3 +60,24 @@ class Question(BaseModel):
|
||||
|
||||
|
||||
chain = chain.with_types(input_type=Question)
|
||||
|
||||
|
||||
def _ingest(url: str) -> dict:
|
||||
loader = PyPDFLoader(url)
|
||||
data = loader.load()
|
||||
|
||||
# Split docs
|
||||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
|
||||
docs = text_splitter.split_documents(data)
|
||||
|
||||
# Insert the documents in MongoDB Atlas Vector Search
|
||||
_ = MongoDBAtlasVectorSearch.from_documents(
|
||||
documents=docs,
|
||||
embedding=OpenAIEmbeddings(disallowed_special=()),
|
||||
collection=MONGODB_COLLECTION,
|
||||
index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
|
||||
)
|
||||
return {}
|
||||
|
||||
|
||||
ingest = RunnableLambda(_ingest)
|
||||
|
Loading…
Reference in New Issue
Block a user