mirror of
https://github.com/hwchase17/langchain.git
synced 2026-04-25 01:16:55 +00:00
cr
This commit is contained in:
@@ -1,12 +1,10 @@
|
||||
import os
|
||||
from io import BytesIO
|
||||
from typing import Any, List, Optional
|
||||
from typing import List
|
||||
|
||||
import boto3
|
||||
from PIL import Image
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
|
||||
class AwsTextractExtraction:
|
||||
@@ -14,8 +12,8 @@ class AwsTextractExtraction:
|
||||
self,
|
||||
aws_region_name: str,
|
||||
aws_secret_key: str,
|
||||
aws_access_key,
|
||||
aws_session_token,
|
||||
aws_access_key: str,
|
||||
aws_session_token: str,
|
||||
file_path: str,
|
||||
):
|
||||
self.aws_region_name = aws_region_name
|
||||
@@ -24,7 +22,7 @@ class AwsTextractExtraction:
|
||||
self.aws_session_token = aws_session_token
|
||||
self.file_path = file_path
|
||||
try:
|
||||
import boto3
|
||||
import boto3 # noqa: F401
|
||||
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
@@ -37,11 +35,15 @@ class AwsTextractExtraction:
|
||||
page_no = 0
|
||||
|
||||
textract_client = boto3.client(
|
||||
"textract", region_name, aws_access_key, aws_secret_key, aws_session_token
|
||||
"textract",
|
||||
self.aws_region_name,
|
||||
self.aws_access_key,
|
||||
self.aws_access_key,
|
||||
self.aws_session_token,
|
||||
)
|
||||
Pil_Image_obj = Image.open(self.file_path)
|
||||
pil_image_obj = Image.open(self.file_path)
|
||||
buf = BytesIO()
|
||||
Pil_Image_obj.save(buf, format="PNG")
|
||||
pil_image_obj.save(buf, format="PNG")
|
||||
image_bytes = buf.getvalue()
|
||||
|
||||
response = textract_client.detect_document_text(Document={"Bytes": image_bytes})
|
||||
|
||||
Reference in New Issue
Block a user