Try catch added on ingest

2025-08-15 14:13:47 +00:00 · 2024-02-27 10:38:44 +05:45 · 2024-02-27 10:38:44 +05:45 · 484b6b7223
commit 484b6b7223
parent a8a10e412e
2 changed files with 34 additions and 26 deletions
--- a/private_gpt/components/ocr_components/table_ocr_api.py
+++ b/private_gpt/components/ocr_components/table_ocr_api.py
@ -44,7 +44,7 @@ async def process_images_and_generate_doc(pdf_path: str, upload_dir: str):
    ocr = GetOCRText()
    img_tab = ImageToTable()
    pdf_doc = fitz.open(pdf_path)
-
+    
    for page_index in range(len(pdf_doc)):
        page = pdf_doc[page_index]
        image_list = page.get_images()
--- a/private_gpt/server/ingest/ingest_router.py
+++ b/private_gpt/server/ingest/ingest_router.py
@ -294,34 +294,42 @@ async def common_ingest_logic(
            )
        # Handling Original File
        if original_file:
-            print("ORIGINAL PDF FILE PATH IS :: ", original_file)
+            try:
-            file_name = Path(original_file).name
+                print("ORIGINAL PDF FILE PATH IS :: ", original_file)
-            upload_path = Path(f"{UPLOAD_DIR}/{file_name}")
+                file_name = Path(original_file).name
                upload_path = Path(f"{UPLOAD_DIR}/{file_name}")
-            file_ingested = crud.documents.get_by_filename(
+                file_ingested = crud.documents.get_by_filename(
-                db, file_name=file_name)
+                    db, file_name=file_name)
-            if file_ingested:
+                if file_ingested:
                    raise HTTPException(
                        status_code=status.HTTP_409_CONFLICT,
                        detail="File already exists. Choose a different file.",
                    )
                if file_name is None:
                    raise HTTPException(
                        status_code=status.HTTP_400_BAD_REQUEST,
                        detail="No file name provided",
                    )
                docs_in = schemas.DocumentCreate(
                    filename=file_name, uploaded_by=current_user.id, department_id=current_user.department_id)
                crud.documents.create(db=db, obj_in=docs_in)
                with open(upload_path, "wb") as f:
                    with open(original_file, "rb") as original_file_reader:
                        f.write(original_file_reader.read())
                with open(upload_path, "rb") as f:
                    ingested_documents = service.ingest_bin_data(file_name, f)
            except Exception as e:
                print(traceback.print_exc())
                raise HTTPException(
-                    status_code=status.HTTP_409_CONFLICT,
+                    status_code=500,
-                    detail="File already exists. Choose a different file.",
+                    detail="Internal Server Error: Unable to ingest file.",
                )
-
+                    
            if file_name is None:
                raise HTTPException(
                    status_code=status.HTTP_400_BAD_REQUEST,
                    detail="No file name provided",
                )
            docs_in = schemas.DocumentCreate(
                filename=file_name, uploaded_by=current_user.id, department_id=current_user.department_id)
            crud.documents.create(db=db, obj_in=docs_in)
            with open(upload_path, "wb") as f:
                with open(original_file, "rb") as original_file_reader:
                    f.write(original_file_reader.read())
            with open(upload_path, "rb") as f:
                ingested_documents = service.ingest_bin_data(file_name, f)
        logger.info(
            f"{file_name} is uploaded by the {current_user.fullname}.")