mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-05 13:06:03 +00:00
langchain[minor],community[minor]: Add async methods in BaseLoader (#16634)
Adds: * methods `aload()` and `alazy_load()` to interface `BaseLoader` * implementation for class `MergedDataLoader ` * support for class `BaseLoader` in async function `aindex()` with unit tests Note: this is compatible with existing `aload()` methods that some loaders already had. **Twitter handle:** @cbornet_ --------- Co-authored-by: Eugene Yurtsev <eugene@langchain.dev>
This commit is contained in:
committed by
GitHub
parent
c37ca45825
commit
af8c5c185b
@@ -2,9 +2,10 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import TYPE_CHECKING, Iterator, List, Optional
|
||||
from typing import TYPE_CHECKING, AsyncIterator, Iterator, List, Optional
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.runnables import run_in_executor
|
||||
|
||||
from langchain_community.document_loaders.blob_loaders import Blob
|
||||
|
||||
@@ -52,14 +53,22 @@ class BaseLoader(ABC):
|
||||
|
||||
# Attention: This method will be upgraded into an abstractmethod once it's
|
||||
# implemented in all the existing subclasses.
|
||||
def lazy_load(
|
||||
self,
|
||||
) -> Iterator[Document]:
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""A lazy loader for Documents."""
|
||||
raise NotImplementedError(
|
||||
f"{self.__class__.__name__} does not implement lazy_load()"
|
||||
)
|
||||
|
||||
async def alazy_load(self) -> AsyncIterator[Document]:
|
||||
"""A lazy loader for Documents."""
|
||||
iterator = await run_in_executor(None, self.lazy_load)
|
||||
done = object()
|
||||
while True:
|
||||
doc = await run_in_executor(None, next, iterator, done)
|
||||
if doc is done:
|
||||
break
|
||||
yield doc
|
||||
|
||||
|
||||
class BaseBlobParser(ABC):
|
||||
"""Abstract interface for blob parsers.
|
||||
|
Reference in New Issue
Block a user