mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-13 22:59:05 +00:00
Fix images parser
This commit is contained in:
parent
38b50e3277
commit
09c4c1f867
@ -45,13 +45,19 @@ class BaseImageBlobParser(BaseBlobParser):
|
||||
"""
|
||||
try:
|
||||
from PIL import Image as Img
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`Pillow` package not found, please install it with "
|
||||
"`pip install Pillow`"
|
||||
)
|
||||
|
||||
with blob.as_bytes_io() as buf:
|
||||
if blob.mimetype == "application/x-npy":
|
||||
try:
|
||||
img = Img.fromarray(numpy.load(buf))
|
||||
except EOFError:
|
||||
return # Ignore too small images
|
||||
array = numpy.load(buf)
|
||||
if array.ndim == 3 and array.shape[2] == 1: # Grayscale image
|
||||
img = Img.fromarray(numpy.squeeze(array, axis=2), mode="L")
|
||||
else:
|
||||
img = Img.fromarray(array)
|
||||
else:
|
||||
img = Img.open(buf)
|
||||
content = self._analyze_image(img)
|
||||
@ -60,11 +66,6 @@ class BaseImageBlobParser(BaseBlobParser):
|
||||
page_content=content,
|
||||
metadata={**blob.metadata, **{"source": blob.source}},
|
||||
)
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`Pillow` package not found, please install it with "
|
||||
"`pip install Pillow`"
|
||||
)
|
||||
|
||||
|
||||
class RapidOCRBlobParser(BaseImageBlobParser):
|
||||
|
Loading…
Reference in New Issue
Block a user