community[patch]: Handle gray scale images in ImageBlobParser (Fixes 30261 and 29586) (#30493)

Fix [29586](https://github.com/langchain-ai/langchain/issues/29586) and
[30261](https://github.com/langchain-ai/langchain/pull/30261)
This commit is contained in:
Philippe PRADOS 2025-03-28 15:15:40 +01:00 committed by GitHub
parent 1f0686db80
commit 92189c8b31
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 46 additions and 3 deletions

View File

@ -53,7 +53,11 @@ class BaseImageBlobParser(BaseBlobParser):
with blob.as_bytes_io() as buf:
if blob.mimetype == "application/x-npy":
img = Img.fromarray(numpy.load(buf))
array = numpy.load(buf)
if array.ndim == 3 and array.shape[2] == 1: # Grayscale image
img = Img.fromarray(numpy.squeeze(array, axis=2), mode="L")
else:
img = Img.fromarray(array)
else:
img = Img.open(buf)
content = self._analyze_image(img)

View File

@ -1,7 +1,9 @@
import re
from io import BytesIO
from pathlib import Path
from typing import Any, Type
import numpy as np
import pytest
from langchain_core.documents.base import Blob
from langchain_core.language_models import FakeMessagesListChatModel
@ -18,12 +20,13 @@ building_image = Blob.from_path(path_base / "examples/building.jpg")
text_image = Blob.from_path(path_base / "examples/text.png")
page_image = Blob.from_path(path_base / "examples/page.png")
_re_in_image = r"(?ms).*MAKE.*TEXT.*STAND.*OUT.*FROM.*"
@pytest.mark.parametrize(
"blob,body",
[
(building_image, ""),
(text_image, r"(?ms).*MAKE.*TEXT.*STAND.*OUT.*FROM.*BACKGROUNDS.*"),
(Blob.from_path(path_base / "examples/text-gray.png"), _re_in_image),
],
)
@pytest.mark.parametrize(
@ -58,3 +61,39 @@ def test_image_parser_with_differents_files(
documents = list(blob_loader(**kw).lazy_parse(blob))
assert len(documents) == 1
assert re.compile(body).match(documents[0].page_content)
@pytest.mark.parametrize(
"blob_loader,kw",
[
(RapidOCRBlobParser, {}),
(TesseractBlobParser, {}),
(
LLMImageBlobParser,
{
"model": FakeMessagesListChatModel(
responses=[
ChatMessage(
id="ai1",
role="system",
content="A building. MAKE TEXT STAND OUT FROM BACKGROUNDS",
),
]
)
},
),
],
)
def test_image_parser_with_numpy(
blob_loader: Type,
kw: dict[str, Any],
) -> None:
gray_image = np.empty(shape=(412, 1652, 1))
with BytesIO() as buffer:
np.save(buffer, gray_image)
buffer.seek(0)
npy_bytes = buffer.getvalue()
blob = Blob.from_data(npy_bytes, mime_type="application/x-npy")
documents = list(blob_loader(**kw).lazy_parse(blob))
assert len(documents) == 1