mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-06-30 17:22:43 +00:00
Try catch added on ingest
This commit is contained in:
parent
a8a10e412e
commit
484b6b7223
@ -44,7 +44,7 @@ async def process_images_and_generate_doc(pdf_path: str, upload_dir: str):
|
|||||||
ocr = GetOCRText()
|
ocr = GetOCRText()
|
||||||
img_tab = ImageToTable()
|
img_tab = ImageToTable()
|
||||||
pdf_doc = fitz.open(pdf_path)
|
pdf_doc = fitz.open(pdf_path)
|
||||||
|
|
||||||
for page_index in range(len(pdf_doc)):
|
for page_index in range(len(pdf_doc)):
|
||||||
page = pdf_doc[page_index]
|
page = pdf_doc[page_index]
|
||||||
image_list = page.get_images()
|
image_list = page.get_images()
|
||||||
|
@ -294,34 +294,42 @@ async def common_ingest_logic(
|
|||||||
)
|
)
|
||||||
# Handling Original File
|
# Handling Original File
|
||||||
if original_file:
|
if original_file:
|
||||||
print("ORIGINAL PDF FILE PATH IS :: ", original_file)
|
try:
|
||||||
file_name = Path(original_file).name
|
print("ORIGINAL PDF FILE PATH IS :: ", original_file)
|
||||||
upload_path = Path(f"{UPLOAD_DIR}/{file_name}")
|
file_name = Path(original_file).name
|
||||||
|
upload_path = Path(f"{UPLOAD_DIR}/{file_name}")
|
||||||
|
|
||||||
file_ingested = crud.documents.get_by_filename(
|
file_ingested = crud.documents.get_by_filename(
|
||||||
db, file_name=file_name)
|
db, file_name=file_name)
|
||||||
if file_ingested:
|
if file_ingested:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_409_CONFLICT,
|
||||||
|
detail="File already exists. Choose a different file.",
|
||||||
|
)
|
||||||
|
|
||||||
|
if file_name is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail="No file name provided",
|
||||||
|
)
|
||||||
|
|
||||||
|
docs_in = schemas.DocumentCreate(
|
||||||
|
filename=file_name, uploaded_by=current_user.id, department_id=current_user.department_id)
|
||||||
|
crud.documents.create(db=db, obj_in=docs_in)
|
||||||
|
|
||||||
|
with open(upload_path, "wb") as f:
|
||||||
|
with open(original_file, "rb") as original_file_reader:
|
||||||
|
f.write(original_file_reader.read())
|
||||||
|
|
||||||
|
with open(upload_path, "rb") as f:
|
||||||
|
ingested_documents = service.ingest_bin_data(file_name, f)
|
||||||
|
except Exception as e:
|
||||||
|
print(traceback.print_exc())
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_409_CONFLICT,
|
status_code=500,
|
||||||
detail="File already exists. Choose a different file.",
|
detail="Internal Server Error: Unable to ingest file.",
|
||||||
)
|
)
|
||||||
|
|
||||||
if file_name is None:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
|
||||||
detail="No file name provided",
|
|
||||||
)
|
|
||||||
|
|
||||||
docs_in = schemas.DocumentCreate(
|
|
||||||
filename=file_name, uploaded_by=current_user.id, department_id=current_user.department_id)
|
|
||||||
crud.documents.create(db=db, obj_in=docs_in)
|
|
||||||
|
|
||||||
with open(upload_path, "wb") as f:
|
|
||||||
with open(original_file, "rb") as original_file_reader:
|
|
||||||
f.write(original_file_reader.read())
|
|
||||||
|
|
||||||
with open(upload_path, "rb") as f:
|
|
||||||
ingested_documents = service.ingest_bin_data(file_name, f)
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"{file_name} is uploaded by the {current_user.fullname}.")
|
f"{file_name} is uploaded by the {current_user.fullname}.")
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user