diff --git a/libs/community/langchain_community/document_loaders/parsers/images.py b/libs/community/langchain_community/document_loaders/parsers/images.py index eb3da4d2a11..3d977aae973 100644 --- a/libs/community/langchain_community/document_loaders/parsers/images.py +++ b/libs/community/langchain_community/document_loaders/parsers/images.py @@ -53,7 +53,11 @@ class BaseImageBlobParser(BaseBlobParser): with blob.as_bytes_io() as buf: if blob.mimetype == "application/x-npy": - img = Img.fromarray(numpy.load(buf)) + array = numpy.load(buf) + if array.ndim == 3 and array.shape[2] == 1: # Grayscale image + img = Img.fromarray(numpy.squeeze(array, axis=2), mode="L") + else: + img = Img.fromarray(array) else: img = Img.open(buf) content = self._analyze_image(img) diff --git a/libs/community/tests/integration_tests/document_loaders/parsers/test_images.py b/libs/community/tests/integration_tests/document_loaders/parsers/test_images.py index e6d71fae692..e5c6e372b8c 100644 --- a/libs/community/tests/integration_tests/document_loaders/parsers/test_images.py +++ b/libs/community/tests/integration_tests/document_loaders/parsers/test_images.py @@ -1,7 +1,9 @@ import re +from io import BytesIO from pathlib import Path from typing import Any, Type +import numpy as np import pytest from langchain_core.documents.base import Blob from langchain_core.language_models import FakeMessagesListChatModel @@ -18,12 +20,13 @@ building_image = Blob.from_path(path_base / "examples/building.jpg") text_image = Blob.from_path(path_base / "examples/text.png") page_image = Blob.from_path(path_base / "examples/page.png") +_re_in_image = r"(?ms).*MAKE.*TEXT.*STAND.*OUT.*FROM.*" + @pytest.mark.parametrize( "blob,body", [ - (building_image, ""), - (text_image, r"(?ms).*MAKE.*TEXT.*STAND.*OUT.*FROM.*BACKGROUNDS.*"), + (Blob.from_path(path_base / "examples/text-gray.png"), _re_in_image), ], ) @pytest.mark.parametrize( @@ -58,3 +61,39 @@ def test_image_parser_with_differents_files( documents = list(blob_loader(**kw).lazy_parse(blob)) assert len(documents) == 1 assert re.compile(body).match(documents[0].page_content) + + +@pytest.mark.parametrize( + "blob_loader,kw", + [ + (RapidOCRBlobParser, {}), + (TesseractBlobParser, {}), + ( + LLMImageBlobParser, + { + "model": FakeMessagesListChatModel( + responses=[ + ChatMessage( + id="ai1", + role="system", + content="A building. MAKE TEXT STAND OUT FROM BACKGROUNDS", + ), + ] + ) + }, + ), + ], +) +def test_image_parser_with_numpy( + blob_loader: Type, + kw: dict[str, Any], +) -> None: + gray_image = np.empty(shape=(412, 1652, 1)) + with BytesIO() as buffer: + np.save(buffer, gray_image) + buffer.seek(0) + npy_bytes = buffer.getvalue() + + blob = Blob.from_data(npy_bytes, mime_type="application/x-npy") + documents = list(blob_loader(**kw).lazy_parse(blob)) + assert len(documents) == 1