langchain[minor],community[minor]: Add async methods in BaseLoader (#16634)

Adds:
* methods `aload()` and `alazy_load()` to interface `BaseLoader`
* implementation for class `MergedDataLoader `
* support for class `BaseLoader` in async function `aindex()` with unit
tests

Note: this is compatible with existing `aload()` methods that some
loaders already had.

**Twitter handle:** @cbornet_

---------

Co-authored-by: Eugene Yurtsev <eugene@langchain.dev>
This commit is contained in:
Christophe Bornet
2024-01-31 20:08:11 +01:00
committed by GitHub
parent c37ca45825
commit af8c5c185b
5 changed files with 71 additions and 52 deletions

View File

@@ -2,9 +2,10 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Iterator, List, Optional
from typing import TYPE_CHECKING, AsyncIterator, Iterator, List, Optional
from langchain_core.documents import Document
from langchain_core.runnables import run_in_executor
from langchain_community.document_loaders.blob_loaders import Blob
@@ -52,14 +53,22 @@ class BaseLoader(ABC):
# Attention: This method will be upgraded into an abstractmethod once it's
# implemented in all the existing subclasses.
def lazy_load(
self,
) -> Iterator[Document]:
def lazy_load(self) -> Iterator[Document]:
"""A lazy loader for Documents."""
raise NotImplementedError(
f"{self.__class__.__name__} does not implement lazy_load()"
)
async def alazy_load(self) -> AsyncIterator[Document]:
"""A lazy loader for Documents."""
iterator = await run_in_executor(None, self.lazy_load)
done = object()
while True:
doc = await run_in_executor(None, next, iterator, done)
if doc is done:
break
yield doc
class BaseBlobParser(ABC):
"""Abstract interface for blob parsers.