community[patch]: Handle gray scale images in ImageBlobParser (Fixes 30261 and 29586) (#30493)

Fix [29586](https://github.com/langchain-ai/langchain/issues/29586) and
[30261](https://github.com/langchain-ai/langchain/pull/30261)
This commit is contained in:
Philippe PRADOS 2025-03-28 15:15:40 +01:00 committed by GitHub
parent 1f0686db80
commit 92189c8b31
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 46 additions and 3 deletions

View File

@ -53,7 +53,11 @@ class BaseImageBlobParser(BaseBlobParser):
with blob.as_bytes_io() as buf: with blob.as_bytes_io() as buf:
if blob.mimetype == "application/x-npy": if blob.mimetype == "application/x-npy":
img = Img.fromarray(numpy.load(buf)) array = numpy.load(buf)
if array.ndim == 3 and array.shape[2] == 1: # Grayscale image
img = Img.fromarray(numpy.squeeze(array, axis=2), mode="L")
else:
img = Img.fromarray(array)
else: else:
img = Img.open(buf) img = Img.open(buf)
content = self._analyze_image(img) content = self._analyze_image(img)

View File

@ -1,7 +1,9 @@
import re import re
from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import Any, Type from typing import Any, Type
import numpy as np
import pytest import pytest
from langchain_core.documents.base import Blob from langchain_core.documents.base import Blob
from langchain_core.language_models import FakeMessagesListChatModel from langchain_core.language_models import FakeMessagesListChatModel
@ -18,12 +20,13 @@ building_image = Blob.from_path(path_base / "examples/building.jpg")
text_image = Blob.from_path(path_base / "examples/text.png") text_image = Blob.from_path(path_base / "examples/text.png")
page_image = Blob.from_path(path_base / "examples/page.png") page_image = Blob.from_path(path_base / "examples/page.png")
_re_in_image = r"(?ms).*MAKE.*TEXT.*STAND.*OUT.*FROM.*"
@pytest.mark.parametrize( @pytest.mark.parametrize(
"blob,body", "blob,body",
[ [
(building_image, ""), (Blob.from_path(path_base / "examples/text-gray.png"), _re_in_image),
(text_image, r"(?ms).*MAKE.*TEXT.*STAND.*OUT.*FROM.*BACKGROUNDS.*"),
], ],
) )
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -58,3 +61,39 @@ def test_image_parser_with_differents_files(
documents = list(blob_loader(**kw).lazy_parse(blob)) documents = list(blob_loader(**kw).lazy_parse(blob))
assert len(documents) == 1 assert len(documents) == 1
assert re.compile(body).match(documents[0].page_content) assert re.compile(body).match(documents[0].page_content)
@pytest.mark.parametrize(
"blob_loader,kw",
[
(RapidOCRBlobParser, {}),
(TesseractBlobParser, {}),
(
LLMImageBlobParser,
{
"model": FakeMessagesListChatModel(
responses=[
ChatMessage(
id="ai1",
role="system",
content="A building. MAKE TEXT STAND OUT FROM BACKGROUNDS",
),
]
)
},
),
],
)
def test_image_parser_with_numpy(
blob_loader: Type,
kw: dict[str, Any],
) -> None:
gray_image = np.empty(shape=(412, 1652, 1))
with BytesIO() as buffer:
np.save(buffer, gray_image)
buffer.seek(0)
npy_bytes = buffer.getvalue()
blob = Blob.from_data(npy_bytes, mime_type="application/x-npy")
documents = list(blob_loader(**kw).lazy_parse(blob))
assert len(documents) == 1