mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-10-22 09:41:52 +00:00 
			
		
		
		
	Adds: * methods `aload()` and `alazy_load()` to interface `BaseLoader` * implementation for class `MergedDataLoader ` * support for class `BaseLoader` in async function `aindex()` with unit tests Note: this is compatible with existing `aload()` methods that some loaders already had. **Twitter handle:** @cbornet_ --------- Co-authored-by: Eugene Yurtsev <eugene@langchain.dev>
		
			
				
	
	
		
			35 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			35 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from typing import AsyncIterator, Iterator, List
 | |
| 
 | |
| from langchain_core.documents import Document
 | |
| 
 | |
| from langchain_community.document_loaders.base import BaseLoader
 | |
| 
 | |
| 
 | |
| class MergedDataLoader(BaseLoader):
 | |
|     """Merge documents from a list of loaders"""
 | |
| 
 | |
|     def __init__(self, loaders: List):
 | |
|         """Initialize with a list of loaders"""
 | |
|         self.loaders = loaders
 | |
| 
 | |
|     def lazy_load(self) -> Iterator[Document]:
 | |
|         """Lazy load docs from each individual loader."""
 | |
|         for loader in self.loaders:
 | |
|             # Check if lazy_load is implemented
 | |
|             try:
 | |
|                 data = loader.lazy_load()
 | |
|             except NotImplementedError:
 | |
|                 data = loader.load()
 | |
|             for document in data:
 | |
|                 yield document
 | |
| 
 | |
|     def load(self) -> List[Document]:
 | |
|         """Load docs."""
 | |
|         return list(self.lazy_load())
 | |
| 
 | |
|     async def alazy_load(self) -> AsyncIterator[Document]:
 | |
|         """Lazy load docs from each individual loader."""
 | |
|         for loader in self.loaders:
 | |
|             async for document in loader.alazy_load():
 | |
|                 yield document
 |