diff --git a/langchain/document_loaders/aws_textract.py b/langchain/document_loaders/aws_textract.py index 6d3269515be..9a1808336db 100644 --- a/langchain/document_loaders/aws_textract.py +++ b/langchain/document_loaders/aws_textract.py @@ -1,12 +1,10 @@ -import os from io import BytesIO -from typing import Any, List, Optional +from typing import List import boto3 from PIL import Image from langchain.docstore.document import Document -from langchain.utils import get_from_dict_or_env class AwsTextractExtraction: @@ -14,8 +12,8 @@ class AwsTextractExtraction: self, aws_region_name: str, aws_secret_key: str, - aws_access_key, - aws_session_token, + aws_access_key: str, + aws_session_token: str, file_path: str, ): self.aws_region_name = aws_region_name @@ -24,7 +22,7 @@ class AwsTextractExtraction: self.aws_session_token = aws_session_token self.file_path = file_path try: - import boto3 + import boto3 # noqa: F401 except ImportError: raise ValueError( @@ -37,11 +35,15 @@ class AwsTextractExtraction: page_no = 0 textract_client = boto3.client( - "textract", region_name, aws_access_key, aws_secret_key, aws_session_token + "textract", + self.aws_region_name, + self.aws_access_key, + self.aws_access_key, + self.aws_session_token, ) - Pil_Image_obj = Image.open(self.file_path) + pil_image_obj = Image.open(self.file_path) buf = BytesIO() - Pil_Image_obj.save(buf, format="PNG") + pil_image_obj.save(buf, format="PNG") image_bytes = buf.getvalue() response = textract_client.detect_document_text(Document={"Bytes": image_bytes})