extract text from pdf and image

2025-09-26 15:26:19 +00:00 · 2025-05-24 15:59:08 +08:00
parent 6c9109fc89
commit 5639169c2b
11 changed files with 161 additions and 17 deletions
--- a/frontend/src/hooks/metadata-ai-operation.js
+++ b/frontend/src/hooks/metadata-ai-operation.js
@@ -106,6 +106,24 @@ export const MetadataAIOperationsProvider = ({
    });
  }, [repoID]);

+
+  const extractText = useCallback(({ parentDir, fileName }, { success_callback, fail_callback } = {}) => {
+    const filePath = Utils.joinPath(parentDir, fileName);
+    const inProgressToaster = toaster.notifyInProgress(gettext('Extracting text by AI...'), { duration: null });
+    metadataAPI.extractText(repoID, filePath).then(res => {
+      console.log(res)
+      const extractedText = res?.data?.text || res.data.text || '';
+      inProgressToaster.close();
+      success_callback && success_callback({ parentDir, fileName, extractedText });
+    }).catch(error => {
+      inProgressToaster.close();
+      const errorMessage = gettext('Failed to extract text');
+      toaster.danger(errorMessage);
+      fail_callback && fail_callback();
+    });
+  }, [repoID]);
+
+
  return (
    <MetadataAIOperationsContext.Provider value={{
      enableMetadata,
@@ -119,6 +137,7 @@ export const MetadataAIOperationsProvider = ({
      extractFilesDetails,
      extractFileDetails,
      faceRecognition,
+      extractText
    }}>
      {children}
    </MetadataAIOperationsContext.Provider>
--- a/frontend/src/metadata/api.js
+++ b/frontend/src/metadata/api.js
@@ -406,6 +406,15 @@ class MetadataManagerAPI {
    return this.req.post(url, params);
  };

+  extractText = (repoID, filePath) => {
+    const url = this.server + '/api/v2.1/ai/extract-text/';
+    const params = {
+      path: filePath,
+      repo_id: repoID,
+    };
+    return this.req.post(url, params);
+  };
+
 }

 const metadataAPI = new MetadataManagerAPI();
--- a/frontend/src/metadata/components/metadata-details/ai-icon.js
+++ b/frontend/src/metadata/components/metadata-details/ai-icon.js
@@ -17,6 +17,7 @@ const OPERATION = {
  OCR: 'ocr',
  FILE_TAGS: 'file-tags',
  FILE_DETAIL: 'file-detail',
+  EXTRACT_TEXT: 'extract-text',
 };

 const AIIcon = () => {
@@ -24,9 +25,9 @@ const AIIcon = () => {
  const [isMenuShow, setMenuShow] = useState(false);
  const [isFileTagsDialogShow, setFileTagsDialogShow] = useState(false);

-  const { enableMetadata, enableTags, enableOCR } = useMetadataStatus();
+  const { enableMetadata, enableTags } = useMetadataStatus();
  const { canModifyRecord, columns, record, onChange, onLocalRecordChange, updateFileTags } = useMetadataDetails();
-  const { onOCR, generateDescription, extractFileDetails } = useMetadataAIOperations();
+  const { onOCR, generateDescription, extractFileDetails, extractText } = useMetadataAIOperations();

  const options = useMemo(() => {
    if (!canModifyRecord || !record || checkIsDir(record)) return [];
@@ -34,6 +35,7 @@ const AIIcon = () => {
    const fileName = getFileNameFromRecord(record);
    const isImage = Utils.imageCheck(fileName);
    const isVideo = Utils.videoCheck(fileName);
+    const isPdf = Utils.pdfCheck(fileName);
    const isDescribableDoc = Utils.isDescriptionSupportedFile(fileName);
    let list = [];

@@ -45,10 +47,6 @@ const AIIcon = () => {
      });
    }

-    if (enableOCR && isImage) {
-      list.push({ value: OPERATION.OCR, label: gettext('OCR'), record });
-    }
-
    if (isImage || isVideo) {
      list.push({ value: OPERATION.FILE_DETAIL, label: gettext('Extract file detail'), record });
    }
@@ -56,8 +54,12 @@ const AIIcon = () => {
    if (enableTags && isDescribableDoc && !isVideo) {
      list.push({ value: OPERATION.FILE_TAGS, label: gettext('Generate file tags'), record });
    }
+
+    if (isImage || isPdf) {
+      list.push({ value: OPERATION.EXTRACT_TEXT, label: gettext('Extract text'), record });
+    }
    return list;
-  }, [enableOCR, enableTags, canModifyRecord, columns, record]);
+  }, [enableTags, canModifyRecord, columns, record]);

  const onToggle = useCallback((event) => {
    event && event.preventDefault();
@@ -126,6 +128,14 @@ const AIIcon = () => {
        });
        break;
      }
+      case OPERATION.EXTRACT_TEXT: {
+        extractText({ parentDir, fileName }, {
+          success_callback: ({ extractedText }) => {
+            console.log(extractedText)
+          },
+        });
+        break;
+      }
      default: {
        setMenuShow(false);
        break;
--- a/frontend/src/metadata/constants/event-bus-type.js
+++ b/frontend/src/metadata/constants/event-bus-type.js
@@ -46,6 +46,7 @@ export const EVENT_BUS_TYPE = {
  UPDATE_RECORD_DETAILS: 'update_record_details',
  UPDATE_FACE_RECOGNITION: 'update_face_recognition',
  GENERATE_DESCRIPTION: 'generate_description',
+  EXTRACT_TEXT: 'extract_text',
  OCR: 'ocr',

  // metadata
--- a/frontend/src/metadata/hooks/metadata-view.js
+++ b/frontend/src/metadata/hooks/metadata-view.js
@@ -38,7 +38,7 @@ export const MetadataViewProvider = ({

  const { collaborators } = useCollaborators();
  const { isBeingBuilt, setIsBeingBuilt } = useMetadata();
-  const { onOCR, generateDescription, extractFilesDetails, faceRecognition } = useMetadataAIOperations();
+  const { onOCR, generateDescription, extractFilesDetails, faceRecognition, extractText } = useMetadataAIOperations();

  const tableChanged = useCallback(() => {
    setMetadata(storeRef.current.data);
@@ -403,6 +403,17 @@ export const MetadataViewProvider = ({
    });
  }, [modifyRecords, onOCR]);

+  const updateExtractText = useCallback((record) => {
+    const parentDir = getParentDirFromRecord(record);
+    const fileName = getFileNameFromRecord(record);
+    if (!fileName || !parentDir) return;
+    extractText({ parentDir, fileName }, {
+      success_callback: ({ extractedText }) => {
+        console.log(extractedText)
+      }
+    });
+  }, [extractText]);
+
  // init
  useEffect(() => {
    setLoading(true);
@@ -442,6 +453,7 @@ export const MetadataViewProvider = ({
    const unsubscribeUpdateFaceRecognition = eventBus.subscribe(EVENT_BUS_TYPE.UPDATE_FACE_RECOGNITION, updateFaceRecognition);
    const unsubscribeUpdateDescription = eventBus.subscribe(EVENT_BUS_TYPE.GENERATE_DESCRIPTION, updateRecordDescription);
    const unsubscribeOCR = eventBus.subscribe(EVENT_BUS_TYPE.OCR, ocr);
+    const unsubscribeUpdateExtract = eventBus.subscribe(EVENT_BUS_TYPE.EXTRACT_TEXT, updateExtractText);

    return () => {
      if (window.sfMetadataContext) {
@@ -468,6 +480,7 @@ export const MetadataViewProvider = ({
      unsubscribeUpdateFaceRecognition();
      unsubscribeUpdateDescription();
      unsubscribeOCR();
+      unsubscribeUpdateExtract();
      delayReloadDataTimer.current && clearTimeout(delayReloadDataTimer.current);
    };
  // eslint-disable-next-line react-hooks/exhaustive-deps
@@ -507,6 +520,7 @@ export const MetadataViewProvider = ({
        updateRecordDetails,
        updateFaceRecognition,
        updateRecordDescription,
+        updateExtractText,
        ocr,
      }}
    >
--- a/frontend/src/metadata/views/table/context-menu.js
+++ b/frontend/src/metadata/views/table/context-menu.js
@@ -33,6 +33,7 @@ const OPERATION = {
  FILE_DETAILS: 'file-details',
  DETECT_FACES: 'detect-faces',
  MOVE: 'move',
+  EXTRACT_TEXT: 'extract_text',
 };

 const { enableSeafileAI } = window.app.config;
@@ -40,7 +41,7 @@ const { enableSeafileAI } = window.app.config;
 const ContextMenu = ({
  isGroupView, selectedRange, selectedPosition, recordMetrics, recordGetterByIndex, onClearSelected, onCopySelected,
  getTableContentRect, getTableCanvasContainerRect, deleteRecords, selectNone, updateFileTags, moveRecord, addFolder, updateRecordDetails,
-  updateFaceRecognition, updateRecordDescription, ocr,
+  updateFaceRecognition, updateRecordDescription, ocr, updateExtractText
 }) => {
  const currentRecord = useRef(null);

@@ -211,6 +212,7 @@ const ContextMenu = ({
      const isDescribableFile = checkIsDescribableFile(record);
      const isImage = Utils.imageCheck(fileName);
      const isVideo = Utils.videoCheck(fileName);
+      const isPdf = Utils.pdfCheck(fileName);
      const aiOptions = [];

      if (isImage || isVideo) {
@@ -236,6 +238,10 @@ const ContextMenu = ({
        aiOptions.push({ value: OPERATION.OCR, label: gettext('OCR'), record });
      }

+      if (isImage || isPdf) {
+        aiOptions.push({ value: OPERATION.EXTRACT_TEXT, label: gettext('Extract text'), record });
+      }
+
      if (aiOptions.length > 0) {
        list.push('Divider');
        list.push(...aiOptions);
@@ -293,6 +299,11 @@ const ContextMenu = ({
        ocr(record);
        break;
      }
+      case OPERATION.EXTRACT_TEXT: {
+        const { record } = option;
+        updateExtractText(record)
+        break;
+      }
      case OPERATION.DELETE_RECORD: {
        const { record } = option;
        if (!record || !record._id || !deleteRecords) break;
@@ -346,7 +357,7 @@ const ContextMenu = ({
        break;
      }
    }
-  }, [repoID, onCopySelected, onClearSelected, updateRecordDescription, toggleFileTagsRecord, ocr, deleteRecords, toggleDeleteFolderDialog, selectNone, updateRecordDetails, updateFaceRecognition, toggleMoveDialog]);
+  }, [repoID, onCopySelected, onClearSelected, updateRecordDescription, toggleFileTagsRecord, ocr, deleteRecords, toggleDeleteFolderDialog, selectNone, updateRecordDetails, updateFaceRecognition, toggleMoveDialog, updateExtractText]);

  useEffect(() => {
    const unsubscribeToggleMoveDialog = window.sfMetadataContext.eventBus.subscribe(EVENT_BUS_TYPE.TOGGLE_MOVE_DIALOG, toggleMoveDialog);
--- a/frontend/src/metadata/views/table/index.js
+++ b/frontend/src/metadata/views/table/index.js
@@ -32,6 +32,7 @@ const Table = () => {
    updateRecordDetails,
    updateFaceRecognition,
    updateRecordDescription,
+    updateExtractText,
    ocr,
  } = useMetadataView();
  const containerRef = useRef(null);
@@ -185,6 +186,7 @@ const Table = () => {
        updateRecordDetails={updateRecordDetails}
        updateFaceRecognition={updateFaceRecognition}
        updateRecordDescription={updateRecordDescription}
+        updateExtractText={updateExtractText}
        ocr={ocr}
      />
    </div>
--- a/frontend/src/metadata/views/table/table-main/records/index.js
+++ b/frontend/src/metadata/views/table/table-main/records/index.js
@@ -649,6 +649,7 @@ class Records extends Component {
          updateFaceRecognition={this.props.updateFaceRecognition}
          updateRecordDescription={this.props.updateRecordDescription}
          ocr={this.props.ocr}
+          updateExtractText={this.props.updateExtractText}
        />
      ),
      hasSelectedRecord: this.hasSelectedRecord(),
--- a/seahub/ai/apis.py
+++ b/seahub/ai/apis.py
@@ -10,12 +10,13 @@ from rest_framework.permissions import IsAuthenticated
 from rest_framework.response import Response
 from rest_framework import status
 from rest_framework.views import APIView
-from seahub.api2.utils import api_error
+from seahub.api2.utils import api_error, get_file_size
 from seahub.api2.throttling import UserRateThrottle
 from seahub.api2.authentication import TokenAuthentication, SdocJWTTokenAuthentication
 from seahub.utils import get_file_type_and_ext, IMAGE
 from seahub.views import check_folder_permission
-from seahub.ai.utils import image_caption, translate, writing_assistant, verify_ai_config, generate_summary, generate_file_tags, ocr
+from seahub.ai.utils import image_caption, translate, writing_assistant, verify_ai_config, generate_summary, \
+    generate_file_tags, ocr, extract_text

 logger = logging.getLogger(__name__)

@@ -341,6 +342,73 @@ class WritingAssistant(APIView):
            resp = writing_assistant(params)
            resp_json = resp.json()
        except Exception as e:
+            logger.error(e)
+            error_msg = 'Internal Server Error'
+            return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg)
+
+        return Response(resp_json, resp.status_code)
+
+
+class ExtractText(APIView):
+    authentication_classes = (TokenAuthentication, SessionAuthentication)
+    permission_classes = (IsAuthenticated,)
+    throttle_classes = (UserRateThrottle,)
+
+    def post(self, request):
+        if not verify_ai_config():
+            return api_error(status.HTTP_400_BAD_REQUEST, 'AI server not configured')
+
+        repo_id = request.data.get('repo_id')
+        path = request.data.get('path')
+
+        if not repo_id:
+            return api_error(status.HTTP_400_BAD_REQUEST, 'repo_id invalid')
+        if not path:
+            return api_error(status.HTTP_400_BAD_REQUEST, 'path invalid')
+
+        file_type, _ = get_file_type_and_ext(os.path.basename(path))
+        if file_type != IMAGE and not path.lower().endswith('.pdf'):
+            return api_error(status.HTTP_400_BAD_REQUEST, 'file type not image or pdf')
+
+        repo = seafile_api.get_repo(repo_id)
+        if not repo:
+            error_msg = 'Library %s not found.' % repo_id
+            return api_error(status.HTTP_404_NOT_FOUND, error_msg)
+
+        permission = check_folder_permission(request, repo_id, os.path.dirname(path))
+        if not permission:
+            error_msg = 'Permission denied.'
+            return api_error(status.HTTP_403_FORBIDDEN, error_msg)
+
+        try:
+            file_id = seafile_api.get_file_id_by_path(repo_id, path)
+        except SearpcError as e:
+            logger.error(e)
+            return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, 'Internal Server Error')
+
+        if not file_id:
+            return api_error(status.HTTP_404_NOT_FOUND, f"File {path} not found")
+
+        file_size = get_file_size(repo.store_id, repo.version, file_id)
+        if file_size >> 20 > 5:
+            error_msg = 'File size exceed the limit.'
+            return api_error(status.HTTP_400_BAD_REQUEST, error_msg)
+
+        token = seafile_api.get_fileserver_access_token(repo_id, file_id, 'download', request.user.username, use_onetime=True)
+        if not token:
+            error_msg = 'Internal Server Error'
+            return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg)
+
+        params = {
+            'file_name': os.path.basename(path),
+            'download_token': token,
+        }
+
+        try:
+            resp = extract_text(params)
+            resp_json = resp.json()
+        except Exception as e:
+            logger.error(e)
            error_msg = 'Internal Server Error'
            return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg)

--- a/seahub/ai/utils.py
+++ b/seahub/ai/utils.py
@@ -61,3 +61,10 @@ def writing_assistant(params):
    url = urljoin(SEAFILE_AI_SERVER_URL, '/api/v1/writing-assistant/')
    resp = requests.post(url, json=params, headers=headers, timeout=30)
    return resp
+
+
+def extract_text(params):
+    headers = gen_headers()
+    url = urljoin(SEAFILE_AI_SERVER_URL, '/api/v1/extract-text/')
+    resp = requests.post(url, json=params, headers=headers, timeout=30)
+    return resp
--- a/seahub/urls.py
+++ b/seahub/urls.py
@@ -2,7 +2,8 @@
 from django.urls import include, path, re_path
 from django.views.generic import TemplateView

-from seahub.ai.apis import ImageCaption, GenerateSummary, GenerateFileTags, OCR, Translate, WritingAssistant
+from seahub.ai.apis import ImageCaption, GenerateSummary, GenerateFileTags, OCR, Translate, WritingAssistant, \
+    ExtractText
 from seahub.api2.endpoints.file_comments import FileCommentsView, FileCommentView, FileCommentRepliesView, \
    FileCommentReplyView
 from seahub.api2.endpoints.share_link_auth import ShareLinkUserAuthView, ShareLinkEmailAuthView
@@ -1087,4 +1088,5 @@ urlpatterns += [
    re_path(r'^api/v2.1/ai/ocr/$', OCR.as_view(), name='api-v2.1-ocr'),
    re_path(r'^api/v2.1/ai/translate/$', Translate.as_view(), name='api-v2.1-translate'),
    re_path(r'^api/v2.1/ai/writing-assistant/$', WritingAssistant.as_view(), name='api-v2.1-writing-assistant'),
+    re_path(r'^api/v2.1/ai/extract-text/$', ExtractText.as_view(), name='api-v2.1-extract-text'),
 ]