Updated single injector for pdf ocr

This commit is contained in:
Saurab-Shrestha 2024-02-29 12:22:25 +05:45
parent 77bf96cc42
commit a87531c141
2 changed files with 19 additions and 17 deletions

View File

@ -39,10 +39,13 @@ async def save_uploaded_file(file: UploadFile, upload_dir: str):
return file_path return file_path
async def process_images_and_generate_doc(pdf_path: str, upload_dir: str): async def process_images_and_generate_doc(request: Request, pdf_path: str, upload_dir: str):
doc = Document() doc = Document()
ocr = GetOCRText()
img_tab = ImageToTable() ocr = request.state.injector.get(GetOCRText)
img_tab = request.state.injector.get(ImageToTable)
# ocr = GetOCRText()
# img_tab = ImageToTable()
pdf_doc = fitz.open(pdf_path) pdf_doc = fitz.open(pdf_path)
for page_index in range(len(pdf_doc)): for page_index in range(len(pdf_doc)):
@ -87,7 +90,7 @@ async def process_pdf_ocr(
print("The file name is: ", file.filename) print("The file name is: ", file.filename)
pdf_path = await save_uploaded_file(file, UPLOAD_DIR) pdf_path = await save_uploaded_file(file, UPLOAD_DIR)
print("The file path: ", pdf_path) print("The file path: ", pdf_path)
ocr_doc_path = await process_images_and_generate_doc(pdf_path, UPLOAD_DIR) ocr_doc_path = await process_images_and_generate_doc(request, pdf_path, UPLOAD_DIR)
ingested_documents = await common_ingest_logic( ingested_documents = await common_ingest_logic(
request=request, db=db, ocr_file=ocr_doc_path, current_user=current_user, original_file=None, log_audit=log_audit request=request, db=db, ocr_file=ocr_doc_path, current_user=current_user, original_file=None, log_audit=log_audit
) )
@ -110,7 +113,7 @@ async def process_both(
UPLOAD_DIR = OCR_UPLOAD UPLOAD_DIR = OCR_UPLOAD
try: try:
pdf_path = await save_uploaded_file(file, UPLOAD_DIR) pdf_path = await save_uploaded_file(file, UPLOAD_DIR)
ocr_doc_path = await process_images_and_generate_doc(pdf_path, UPLOAD_DIR) ocr_doc_path = await process_images_and_generate_doc(request, pdf_path, UPLOAD_DIR)
ingested_documents = await common_ingest_logic( ingested_documents = await common_ingest_logic(
request=request, db=db, ocr_file=ocr_doc_path, current_user=current_user, original_file=pdf_path, log_audit=log_audit request=request, db=db, ocr_file=ocr_doc_path, current_user=current_user, original_file=pdf_path, log_audit=log_audit
) )

View File

@ -1,21 +1,20 @@
"""FastAPI app creation, logger configuration and main API routes.""" """FastAPI app creation, logger configuration and main API routes."""
import logging import logging
from injector import Injector
from fastapi import Depends, FastAPI, Request from fastapi import Depends, FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from injector import Injector
from private_gpt.server.chat.chat_router import chat_router
from private_gpt.server.chunks.chunks_router import chunks_router
from private_gpt.server.completions.completions_router import completions_router
from private_gpt.server.embeddings.embeddings_router import embeddings_router
from private_gpt.server.health.health_router import health_router
from private_gpt.server.ingest.ingest_router import ingest_router
from private_gpt.users.api.v1.api import api_router
from private_gpt.components.ocr_components.table_ocr_api import pdf_router
from private_gpt.settings.settings import Settings from private_gpt.settings.settings import Settings
from private_gpt.home import home_router from private_gpt.users.api.v1.api import api_router
from private_gpt.server.chat.chat_router import chat_router
from private_gpt.server.health.health_router import health_router
from private_gpt.server.chunks.chunks_router import chunks_router
from private_gpt.server.ingest.ingest_router import ingest_router
from private_gpt.components.ocr_components.table_ocr_api import pdf_router
from private_gpt.server.completions.completions_router import completions_router
from private_gpt.server.embeddings.embeddings_router import embeddings_router
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -34,8 +33,8 @@ def create_app(root_injector: Injector) -> FastAPI:
app.include_router(health_router) app.include_router(health_router)
app.include_router(api_router) app.include_router(api_router)
# app.include_router(home_router)
app.include_router(pdf_router) app.include_router(pdf_router)
settings = root_injector.get(Settings) settings = root_injector.get(Settings)
if settings.server.cors.enabled: if settings.server.cors.enabled:
logger.debug("Setting up CORS middleware") logger.debug("Setting up CORS middleware")