mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-08 14:05:16 +00:00
Blob: Add validator and use future annotations (#3650)
Minor changes to the Blob schema. --------- Co-authored-by: Zander Chase <130414180+vowelparrot@users.noreply.github.com>
This commit is contained in:
parent
c5a4b4fea1
commit
708787dddb
@ -4,14 +4,16 @@ The goal is to facilitate decoupling of content loading from content parsing cod
|
|||||||
|
|
||||||
In addition, content loading code should provide a lazy loading interface by default.
|
In addition, content loading code should provide a lazy loading interface by default.
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
import mimetypes
|
import mimetypes
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from io import BufferedReader, BytesIO
|
from io import BufferedReader, BytesIO
|
||||||
from pathlib import PurePath
|
from pathlib import PurePath
|
||||||
from typing import Generator, Iterable, Optional, Union
|
from typing import Any, Generator, Iterable, Mapping, Optional, Union
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel, root_validator
|
||||||
|
|
||||||
PathLike = Union[str, PurePath]
|
PathLike = Union[str, PurePath]
|
||||||
|
|
||||||
@ -44,6 +46,13 @@ class Blob(BaseModel):
|
|||||||
"""The source location of the blob as string if known otherwise none."""
|
"""The source location of the blob as string if known otherwise none."""
|
||||||
return str(self.path) if self.path else None
|
return str(self.path) if self.path else None
|
||||||
|
|
||||||
|
@root_validator(pre=True)
|
||||||
|
def check_blob_is_valid(cls, values: Mapping[str, Any]) -> Mapping[str, Any]:
|
||||||
|
"""Verify that either data or path is provided."""
|
||||||
|
if "data" not in values and "path" not in values:
|
||||||
|
raise ValueError("Either data or path must be provided")
|
||||||
|
return values
|
||||||
|
|
||||||
def as_string(self) -> str:
|
def as_string(self) -> str:
|
||||||
"""Read data as a string."""
|
"""Read data as a string."""
|
||||||
if self.data is None and self.path:
|
if self.data is None and self.path:
|
||||||
@ -87,7 +96,7 @@ class Blob(BaseModel):
|
|||||||
encoding: str = "utf-8",
|
encoding: str = "utf-8",
|
||||||
mime_type: Optional[str] = None,
|
mime_type: Optional[str] = None,
|
||||||
guess_type: bool = True,
|
guess_type: bool = True,
|
||||||
) -> "Blob":
|
) -> Blob:
|
||||||
"""Load the blob from a path like object.
|
"""Load the blob from a path like object.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -116,7 +125,7 @@ class Blob(BaseModel):
|
|||||||
encoding: str = "utf-8",
|
encoding: str = "utf-8",
|
||||||
mime_type: Optional[str] = None,
|
mime_type: Optional[str] = None,
|
||||||
path: Optional[str] = None,
|
path: Optional[str] = None,
|
||||||
) -> "Blob":
|
) -> Blob:
|
||||||
"""Initialize the blob from in-memory data.
|
"""Initialize the blob from in-memory data.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -90,6 +90,15 @@ def test_mime_type_inference(
|
|||||||
assert blob.mimetype == expected_mime_type
|
assert blob.mimetype == expected_mime_type
|
||||||
|
|
||||||
|
|
||||||
|
def test_blob_initialization_validator() -> None:
|
||||||
|
"""Test that blob initialization validates the arguments."""
|
||||||
|
with pytest.raises(ValueError, match="Either data or path must be provided"):
|
||||||
|
Blob()
|
||||||
|
|
||||||
|
assert Blob(data=b"Hello, World!") is not None
|
||||||
|
assert Blob(path="some_path") is not None
|
||||||
|
|
||||||
|
|
||||||
def test_blob_loader() -> None:
|
def test_blob_loader() -> None:
|
||||||
"""Simple test that verifies that we can implement a blob loader."""
|
"""Simple test that verifies that we can implement a blob loader."""
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user