diff --git a/frontend/src/hooks/metadata-ai-operation.js b/frontend/src/hooks/metadata-ai-operation.js index 9b02574c7d..04c3d01f7d 100644 --- a/frontend/src/hooks/metadata-ai-operation.js +++ b/frontend/src/hooks/metadata-ai-operation.js @@ -106,6 +106,24 @@ export const MetadataAIOperationsProvider = ({ }); }, [repoID]); + + const extractText = useCallback(({ parentDir, fileName }, { success_callback, fail_callback } = {}) => { + const filePath = Utils.joinPath(parentDir, fileName); + const inProgressToaster = toaster.notifyInProgress(gettext('Extracting text by AI...'), { duration: null }); + metadataAPI.extractText(repoID, filePath).then(res => { + console.log(res) + const extractedText = res?.data?.text || res.data.text || ''; + inProgressToaster.close(); + success_callback && success_callback({ parentDir, fileName, extractedText }); + }).catch(error => { + inProgressToaster.close(); + const errorMessage = gettext('Failed to extract text'); + toaster.danger(errorMessage); + fail_callback && fail_callback(); + }); + }, [repoID]); + + return ( {children} diff --git a/frontend/src/metadata/api.js b/frontend/src/metadata/api.js index 190f19e840..863c3a4107 100644 --- a/frontend/src/metadata/api.js +++ b/frontend/src/metadata/api.js @@ -406,6 +406,15 @@ class MetadataManagerAPI { return this.req.post(url, params); }; + extractText = (repoID, filePath) => { + const url = this.server + '/api/v2.1/ai/extract-text/'; + const params = { + path: filePath, + repo_id: repoID, + }; + return this.req.post(url, params); + }; + } const metadataAPI = new MetadataManagerAPI(); diff --git a/frontend/src/metadata/components/metadata-details/ai-icon.js b/frontend/src/metadata/components/metadata-details/ai-icon.js index 659ea81bc3..3462b3214d 100644 --- a/frontend/src/metadata/components/metadata-details/ai-icon.js +++ b/frontend/src/metadata/components/metadata-details/ai-icon.js @@ -17,6 +17,7 @@ const OPERATION = { OCR: 'ocr', FILE_TAGS: 'file-tags', FILE_DETAIL: 'file-detail', + EXTRACT_TEXT: 'extract-text', }; const AIIcon = () => { @@ -24,9 +25,9 @@ const AIIcon = () => { const [isMenuShow, setMenuShow] = useState(false); const [isFileTagsDialogShow, setFileTagsDialogShow] = useState(false); - const { enableMetadata, enableTags, enableOCR } = useMetadataStatus(); + const { enableMetadata, enableTags } = useMetadataStatus(); const { canModifyRecord, columns, record, onChange, onLocalRecordChange, updateFileTags } = useMetadataDetails(); - const { onOCR, generateDescription, extractFileDetails } = useMetadataAIOperations(); + const { onOCR, generateDescription, extractFileDetails, extractText } = useMetadataAIOperations(); const options = useMemo(() => { if (!canModifyRecord || !record || checkIsDir(record)) return []; @@ -34,6 +35,7 @@ const AIIcon = () => { const fileName = getFileNameFromRecord(record); const isImage = Utils.imageCheck(fileName); const isVideo = Utils.videoCheck(fileName); + const isPdf = Utils.pdfCheck(fileName); const isDescribableDoc = Utils.isDescriptionSupportedFile(fileName); let list = []; @@ -45,10 +47,6 @@ const AIIcon = () => { }); } - if (enableOCR && isImage) { - list.push({ value: OPERATION.OCR, label: gettext('OCR'), record }); - } - if (isImage || isVideo) { list.push({ value: OPERATION.FILE_DETAIL, label: gettext('Extract file detail'), record }); } @@ -56,8 +54,12 @@ const AIIcon = () => { if (enableTags && isDescribableDoc && !isVideo) { list.push({ value: OPERATION.FILE_TAGS, label: gettext('Generate file tags'), record }); } + + if (isImage || isPdf) { + list.push({ value: OPERATION.EXTRACT_TEXT, label: gettext('Extract text'), record }); + } return list; - }, [enableOCR, enableTags, canModifyRecord, columns, record]); + }, [enableTags, canModifyRecord, columns, record]); const onToggle = useCallback((event) => { event && event.preventDefault(); @@ -126,6 +128,14 @@ const AIIcon = () => { }); break; } + case OPERATION.EXTRACT_TEXT: { + extractText({ parentDir, fileName }, { + success_callback: ({ extractedText }) => { + console.log(extractedText) + }, + }); + break; + } default: { setMenuShow(false); break; diff --git a/frontend/src/metadata/constants/event-bus-type.js b/frontend/src/metadata/constants/event-bus-type.js index 59451c5b9d..91b9dc8c80 100644 --- a/frontend/src/metadata/constants/event-bus-type.js +++ b/frontend/src/metadata/constants/event-bus-type.js @@ -46,6 +46,7 @@ export const EVENT_BUS_TYPE = { UPDATE_RECORD_DETAILS: 'update_record_details', UPDATE_FACE_RECOGNITION: 'update_face_recognition', GENERATE_DESCRIPTION: 'generate_description', + EXTRACT_TEXT: 'extract_text', OCR: 'ocr', // metadata diff --git a/frontend/src/metadata/hooks/metadata-view.js b/frontend/src/metadata/hooks/metadata-view.js index 80297f0d97..6f4abd67d8 100644 --- a/frontend/src/metadata/hooks/metadata-view.js +++ b/frontend/src/metadata/hooks/metadata-view.js @@ -38,7 +38,7 @@ export const MetadataViewProvider = ({ const { collaborators } = useCollaborators(); const { isBeingBuilt, setIsBeingBuilt } = useMetadata(); - const { onOCR, generateDescription, extractFilesDetails, faceRecognition } = useMetadataAIOperations(); + const { onOCR, generateDescription, extractFilesDetails, faceRecognition, extractText } = useMetadataAIOperations(); const tableChanged = useCallback(() => { setMetadata(storeRef.current.data); @@ -403,6 +403,17 @@ export const MetadataViewProvider = ({ }); }, [modifyRecords, onOCR]); + const updateExtractText = useCallback((record) => { + const parentDir = getParentDirFromRecord(record); + const fileName = getFileNameFromRecord(record); + if (!fileName || !parentDir) return; + extractText({ parentDir, fileName }, { + success_callback: ({ extractedText }) => { + console.log(extractedText) + } + }); + }, [extractText]); + // init useEffect(() => { setLoading(true); @@ -442,6 +453,7 @@ export const MetadataViewProvider = ({ const unsubscribeUpdateFaceRecognition = eventBus.subscribe(EVENT_BUS_TYPE.UPDATE_FACE_RECOGNITION, updateFaceRecognition); const unsubscribeUpdateDescription = eventBus.subscribe(EVENT_BUS_TYPE.GENERATE_DESCRIPTION, updateRecordDescription); const unsubscribeOCR = eventBus.subscribe(EVENT_BUS_TYPE.OCR, ocr); + const unsubscribeUpdateExtract = eventBus.subscribe(EVENT_BUS_TYPE.EXTRACT_TEXT, updateExtractText); return () => { if (window.sfMetadataContext) { @@ -468,6 +480,7 @@ export const MetadataViewProvider = ({ unsubscribeUpdateFaceRecognition(); unsubscribeUpdateDescription(); unsubscribeOCR(); + unsubscribeUpdateExtract(); delayReloadDataTimer.current && clearTimeout(delayReloadDataTimer.current); }; // eslint-disable-next-line react-hooks/exhaustive-deps @@ -507,6 +520,7 @@ export const MetadataViewProvider = ({ updateRecordDetails, updateFaceRecognition, updateRecordDescription, + updateExtractText, ocr, }} > diff --git a/frontend/src/metadata/views/table/context-menu.js b/frontend/src/metadata/views/table/context-menu.js index c493cbf2e7..59a400ebf3 100644 --- a/frontend/src/metadata/views/table/context-menu.js +++ b/frontend/src/metadata/views/table/context-menu.js @@ -33,6 +33,7 @@ const OPERATION = { FILE_DETAILS: 'file-details', DETECT_FACES: 'detect-faces', MOVE: 'move', + EXTRACT_TEXT: 'extract_text', }; const { enableSeafileAI } = window.app.config; @@ -40,7 +41,7 @@ const { enableSeafileAI } = window.app.config; const ContextMenu = ({ isGroupView, selectedRange, selectedPosition, recordMetrics, recordGetterByIndex, onClearSelected, onCopySelected, getTableContentRect, getTableCanvasContainerRect, deleteRecords, selectNone, updateFileTags, moveRecord, addFolder, updateRecordDetails, - updateFaceRecognition, updateRecordDescription, ocr, + updateFaceRecognition, updateRecordDescription, ocr, updateExtractText }) => { const currentRecord = useRef(null); @@ -211,6 +212,7 @@ const ContextMenu = ({ const isDescribableFile = checkIsDescribableFile(record); const isImage = Utils.imageCheck(fileName); const isVideo = Utils.videoCheck(fileName); + const isPdf = Utils.pdfCheck(fileName); const aiOptions = []; if (isImage || isVideo) { @@ -236,6 +238,10 @@ const ContextMenu = ({ aiOptions.push({ value: OPERATION.OCR, label: gettext('OCR'), record }); } + if (isImage || isPdf) { + aiOptions.push({ value: OPERATION.EXTRACT_TEXT, label: gettext('Extract text'), record }); + } + if (aiOptions.length > 0) { list.push('Divider'); list.push(...aiOptions); @@ -293,6 +299,11 @@ const ContextMenu = ({ ocr(record); break; } + case OPERATION.EXTRACT_TEXT: { + const { record } = option; + updateExtractText(record) + break; + } case OPERATION.DELETE_RECORD: { const { record } = option; if (!record || !record._id || !deleteRecords) break; @@ -346,7 +357,7 @@ const ContextMenu = ({ break; } } - }, [repoID, onCopySelected, onClearSelected, updateRecordDescription, toggleFileTagsRecord, ocr, deleteRecords, toggleDeleteFolderDialog, selectNone, updateRecordDetails, updateFaceRecognition, toggleMoveDialog]); + }, [repoID, onCopySelected, onClearSelected, updateRecordDescription, toggleFileTagsRecord, ocr, deleteRecords, toggleDeleteFolderDialog, selectNone, updateRecordDetails, updateFaceRecognition, toggleMoveDialog, updateExtractText]); useEffect(() => { const unsubscribeToggleMoveDialog = window.sfMetadataContext.eventBus.subscribe(EVENT_BUS_TYPE.TOGGLE_MOVE_DIALOG, toggleMoveDialog); diff --git a/frontend/src/metadata/views/table/index.js b/frontend/src/metadata/views/table/index.js index a340277b75..dafa935be6 100644 --- a/frontend/src/metadata/views/table/index.js +++ b/frontend/src/metadata/views/table/index.js @@ -32,6 +32,7 @@ const Table = () => { updateRecordDetails, updateFaceRecognition, updateRecordDescription, + updateExtractText, ocr, } = useMetadataView(); const containerRef = useRef(null); @@ -185,6 +186,7 @@ const Table = () => { updateRecordDetails={updateRecordDetails} updateFaceRecognition={updateFaceRecognition} updateRecordDescription={updateRecordDescription} + updateExtractText={updateExtractText} ocr={ocr} /> diff --git a/frontend/src/metadata/views/table/table-main/records/index.js b/frontend/src/metadata/views/table/table-main/records/index.js index a5c9596128..5b7834c96e 100644 --- a/frontend/src/metadata/views/table/table-main/records/index.js +++ b/frontend/src/metadata/views/table/table-main/records/index.js @@ -649,6 +649,7 @@ class Records extends Component { updateFaceRecognition={this.props.updateFaceRecognition} updateRecordDescription={this.props.updateRecordDescription} ocr={this.props.ocr} + updateExtractText={this.props.updateExtractText} /> ), hasSelectedRecord: this.hasSelectedRecord(), diff --git a/seahub/ai/apis.py b/seahub/ai/apis.py index f7c1872aea..90778e898e 100644 --- a/seahub/ai/apis.py +++ b/seahub/ai/apis.py @@ -10,12 +10,13 @@ from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response from rest_framework import status from rest_framework.views import APIView -from seahub.api2.utils import api_error +from seahub.api2.utils import api_error, get_file_size from seahub.api2.throttling import UserRateThrottle from seahub.api2.authentication import TokenAuthentication, SdocJWTTokenAuthentication from seahub.utils import get_file_type_and_ext, IMAGE from seahub.views import check_folder_permission -from seahub.ai.utils import image_caption, translate, writing_assistant, verify_ai_config, generate_summary, generate_file_tags, ocr +from seahub.ai.utils import image_caption, translate, writing_assistant, verify_ai_config, generate_summary, \ + generate_file_tags, ocr, extract_text logger = logging.getLogger(__name__) @@ -341,6 +342,73 @@ class WritingAssistant(APIView): resp = writing_assistant(params) resp_json = resp.json() except Exception as e: + logger.error(e) + error_msg = 'Internal Server Error' + return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg) + + return Response(resp_json, resp.status_code) + + +class ExtractText(APIView): + authentication_classes = (TokenAuthentication, SessionAuthentication) + permission_classes = (IsAuthenticated,) + throttle_classes = (UserRateThrottle,) + + def post(self, request): + if not verify_ai_config(): + return api_error(status.HTTP_400_BAD_REQUEST, 'AI server not configured') + + repo_id = request.data.get('repo_id') + path = request.data.get('path') + + if not repo_id: + return api_error(status.HTTP_400_BAD_REQUEST, 'repo_id invalid') + if not path: + return api_error(status.HTTP_400_BAD_REQUEST, 'path invalid') + + file_type, _ = get_file_type_and_ext(os.path.basename(path)) + if file_type != IMAGE and not path.lower().endswith('.pdf'): + return api_error(status.HTTP_400_BAD_REQUEST, 'file type not image or pdf') + + repo = seafile_api.get_repo(repo_id) + if not repo: + error_msg = 'Library %s not found.' % repo_id + return api_error(status.HTTP_404_NOT_FOUND, error_msg) + + permission = check_folder_permission(request, repo_id, os.path.dirname(path)) + if not permission: + error_msg = 'Permission denied.' + return api_error(status.HTTP_403_FORBIDDEN, error_msg) + + try: + file_id = seafile_api.get_file_id_by_path(repo_id, path) + except SearpcError as e: + logger.error(e) + return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, 'Internal Server Error') + + if not file_id: + return api_error(status.HTTP_404_NOT_FOUND, f"File {path} not found") + + file_size = get_file_size(repo.store_id, repo.version, file_id) + if file_size >> 20 > 5: + error_msg = 'File size exceed the limit.' + return api_error(status.HTTP_400_BAD_REQUEST, error_msg) + + token = seafile_api.get_fileserver_access_token(repo_id, file_id, 'download', request.user.username, use_onetime=True) + if not token: + error_msg = 'Internal Server Error' + return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg) + + params = { + 'file_name': os.path.basename(path), + 'download_token': token, + } + + try: + resp = extract_text(params) + resp_json = resp.json() + except Exception as e: + logger.error(e) error_msg = 'Internal Server Error' return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg) diff --git a/seahub/ai/utils.py b/seahub/ai/utils.py index 615be74393..db2a434649 100644 --- a/seahub/ai/utils.py +++ b/seahub/ai/utils.py @@ -61,3 +61,10 @@ def writing_assistant(params): url = urljoin(SEAFILE_AI_SERVER_URL, '/api/v1/writing-assistant/') resp = requests.post(url, json=params, headers=headers, timeout=30) return resp + + +def extract_text(params): + headers = gen_headers() + url = urljoin(SEAFILE_AI_SERVER_URL, '/api/v1/extract-text/') + resp = requests.post(url, json=params, headers=headers, timeout=30) + return resp diff --git a/seahub/urls.py b/seahub/urls.py index 0dd57049bf..caf88f08f2 100644 --- a/seahub/urls.py +++ b/seahub/urls.py @@ -2,7 +2,8 @@ from django.urls import include, path, re_path from django.views.generic import TemplateView -from seahub.ai.apis import ImageCaption, GenerateSummary, GenerateFileTags, OCR, Translate, WritingAssistant +from seahub.ai.apis import ImageCaption, GenerateSummary, GenerateFileTags, OCR, Translate, WritingAssistant, \ + ExtractText from seahub.api2.endpoints.file_comments import FileCommentsView, FileCommentView, FileCommentRepliesView, \ FileCommentReplyView from seahub.api2.endpoints.share_link_auth import ShareLinkUserAuthView, ShareLinkEmailAuthView @@ -481,15 +482,15 @@ urlpatterns = [ re_path(r'^api/v2.1/repos/(?P[-0-9a-f]{36})/share-info/$', RepoShareInfoView.as_view(), name='api-v2.1-repo-share-info-view'), re_path(r'^api/v2.1/repos/(?P[-0-9a-f]{36})/image-rotate/$', RepoImageRotateView.as_view(), name='api-v2.1-repo-image-rotate-view'), re_path(r'^api/v2.1/repos/(?P[-0-9a-f]{36})/office-suite/$', OfficeSuiteConfig.as_view(), name='api-v2.1-repo-office-suite'), - - + + ## user: repo file comments re_path(r'^api/v2.1/repos/(?P[-0-9a-f]{36})/file/(?P[-0-9a-f]{36})/comments/$', FileCommentsView.as_view(), name='api-v2.1-file-comments'), re_path(r'^api/v2.1/repos/(?P[-0-9a-f]{36})/file/(?P[-0-9a-f]{36})/comments/(?P\d+)/$', FileCommentView.as_view(), name='api-v2.1-file-comment'), re_path(r'^api/v2.1/repos/(?P[-0-9a-f]{36})/file/(?P[-0-9a-f]{36})/comments/(?P\d+)/replies/$', FileCommentRepliesView.as_view(), name='api-v2.1-file-comment-replies'), re_path(r'^api/v2.1/repos/(?P[-0-9a-f]{36})/file/(?P[-0-9a-f]{36})/comments/(?P\d+)/replies/(?P\d+)/$', FileCommentReplyView.as_view(), name='api-v2.1-file-comment-repolies'), - - + + ## user:: repo-api-tokens re_path(r'^api/v2.1/repos/(?P[-0-9a-f]{36})/repo-api-tokens/$', RepoAPITokensView.as_view(), name='api-v2.1-repo-api-tokens'), re_path(r'^api/v2.1/repos/(?P[-0-9a-f]{36})/repo-api-tokens/(?P.*)/$', RepoAPITokenView.as_view(), name='api-v2.1-repo-api-token'), @@ -1087,4 +1088,5 @@ urlpatterns += [ re_path(r'^api/v2.1/ai/ocr/$', OCR.as_view(), name='api-v2.1-ocr'), re_path(r'^api/v2.1/ai/translate/$', Translate.as_view(), name='api-v2.1-translate'), re_path(r'^api/v2.1/ai/writing-assistant/$', WritingAssistant.as_view(), name='api-v2.1-writing-assistant'), + re_path(r'^api/v2.1/ai/extract-text/$', ExtractText.as_view(), name='api-v2.1-extract-text'), ]