diff --git a/frontend/src/components/dialog/lib-settings.js b/frontend/src/components/dialog/lib-settings.js index 8d1784b08a..405772db08 100644 --- a/frontend/src/components/dialog/lib-settings.js +++ b/frontend/src/components/dialog/lib-settings.js @@ -8,6 +8,7 @@ import { MetadataStatusManagementDialog as LibExtendedPropertiesSettingPanel, MetadataFaceRecognitionDialog as LibFaceRecognitionSettingPanel, MetadataTagsStatusDialog as LibMetadataTagsStatusSettingPanel, + MetadataOCRStatusManagementDialog as LibMetadataOCRStatusSettingPanel, useMetadata } from '../../metadata'; import { useMetadataStatus } from '../../hooks'; @@ -19,7 +20,8 @@ const TAB = { AUTO_DEL_SETTING: 'auto_delete_setting', EXTENDED_PROPERTIES_SETTING: 'extended_properties_setting', FACE_RECOGNITION_SETTING: 'face_recognition_setting', - TAGS_SETTING: 'tags_setting' + TAGS_SETTING: 'tags_setting', + OCR_SETTING: 'ocr_setting', }; const propTypes = { @@ -44,7 +46,7 @@ const LibSettingsDialog = ({ repoID, currentRepoInfo, toggleDialog, tab }) => { const { encrypted, is_admin } = currentRepoInfo; const { enableMetadataManagement } = window.app.pageOptions; const { enableFaceRecognition, updateEnableFaceRecognition } = useMetadata(); - const { enableMetadata, updateEnableMetadata, enableTags, tagsLang, updateEnableTags } = useMetadataStatus(); + const { enableMetadata, updateEnableMetadata, enableTags, tagsLang, updateEnableTags, enableOCR, updateEnableOCR } = useMetadataStatus(); const enableHistorySetting = is_admin; // repo owner, admin of the department which the repo belongs to, and ... const enableAutoDelSetting = is_admin && enableRepoAutoDel; const enableExtendedPropertiesSetting = !encrypted && is_admin && enableMetadataManagement; @@ -95,6 +97,13 @@ const LibSettingsDialog = ({ repoID, currentRepoInfo, toggleDialog, tab }) => { )} + {enableMetadataOtherSettings && ( + + + {gettext('OCR')} + + + )} @@ -145,6 +154,17 @@ const LibSettingsDialog = ({ repoID, currentRepoInfo, toggleDialog, tab }) => { /> )} + {(enableMetadataOtherSettings && activeTab === TAB.OCR_SETTING) && ( + + + + )} diff --git a/frontend/src/hooks/metadata-status.js b/frontend/src/hooks/metadata-status.js index 687f660da6..163c818fa1 100644 --- a/frontend/src/hooks/metadata-status.js +++ b/frontend/src/hooks/metadata-status.js @@ -17,6 +17,7 @@ export const MetadataStatusProvider = ({ repoID, currentRepoInfo, hideMetadataVi const [enableMetadata, setEnableMetadata] = useState(false); const [enableTags, setEnableTags] = useState(false); const [tagsLang, setTagsLang] = useState('en'); + const [enableOCR, setEnableOCR] = useState(false); const [detailsSettings, setDetailsSettings] = useState({}); const [isBeingBuilt, setIsBeingBuilt] = useState(false); @@ -38,13 +39,20 @@ export const MetadataStatusProvider = ({ repoID, currentRepoInfo, hideMetadataVi return; } metadataAPI.getMetadataStatus(repoID).then(res => { - const { enabled: enableMetadata, tags_enabled: enableTags, tags_lang: tagsLang, details_settings: detailsSettings } = res.data; + const { + enabled: enableMetadata, + tags_enabled: enableTags, + tags_lang: tagsLang, + details_settings: detailsSettings, + ocr_enabled: enableOCR + } = res.data; if (!enableMetadata) { cancelMetadataURL(); } setEnableTags(enableTags); setTagsLang(tagsLang || 'en'); setDetailsSettings(JSON.parse(detailsSettings)); + setEnableOCR(enableOCR); setEnableMetadata(enableMetadata); setLoading(false); }).catch(error => { @@ -77,6 +85,11 @@ export const MetadataStatusProvider = ({ repoID, currentRepoInfo, hideMetadataVi setTagsLang(lang); }, [enableTags, tagsLang, cancelMetadataURL, hideMetadataView]); + const updateEnableOCR = useCallback((newValue) => { + if (newValue === enableOCR) return; + setEnableOCR(newValue); + }, [enableOCR]); + const modifyDetailsSettings = useCallback((update) => { metadataAPI.modifyMetadataDetailsSettings(repoID, update).then(res => { const newDetailsSettings = { ...detailsSettings, ...update }; @@ -100,6 +113,8 @@ export const MetadataStatusProvider = ({ repoID, currentRepoInfo, hideMetadataVi updateEnableTags, detailsSettings, modifyDetailsSettings, + enableOCR, + updateEnableOCR, }} > {!isLoading && children} diff --git a/frontend/src/metadata/api.js b/frontend/src/metadata/api.js index d86ce302a1..223131b82b 100644 --- a/frontend/src/metadata/api.js +++ b/frontend/src/metadata/api.js @@ -306,6 +306,26 @@ class MetadataManagerAPI { return this.req.get(url); }; + // ocr + openOCR = (repoID) => { + const url = this.server + '/api/v2.1/repos/' + repoID + '/metadata/ocr/'; + return this.req.put(url); + }; + + closeOCR = (repoID) => { + const url = this.server + '/api/v2.1/repos/' + repoID + '/metadata/ocr/'; + return this.req.delete(url); + }; + + ocr = (repoID, filePath) => { + const url = this.server + '/api/v2.1/ai/ocr/'; + const params = { + path: filePath, + repo_id: repoID, + }; + return this.req.post(url, params); + }; + } const metadataAPI = new MetadataManagerAPI(); diff --git a/frontend/src/metadata/components/dialog/metadata-face-recognition-dialog/index.js b/frontend/src/metadata/components/dialog/metadata-face-recognition-dialog/index.js index df7dae0416..488d48181e 100644 --- a/frontend/src/metadata/components/dialog/metadata-face-recognition-dialog/index.js +++ b/frontend/src/metadata/components/dialog/metadata-face-recognition-dialog/index.js @@ -7,7 +7,7 @@ import { gettext } from '../../../../utils/constants'; import metadataAPI from '../../../api'; import toaster from '../../../../components/toast'; import { Utils } from '../../../../utils/utils'; -import TurnOffConfirmDialog from './turn-off-confirm'; +import TurnOffConfirmDialog from '../turn-off-confirm-dialog'; import './index.css'; @@ -83,7 +83,9 @@ const MetadataFaceRecognitionDialog = ({ value: oldValue, repoID, toggleDialog: )} {showTurnOffConfirmDialog && ( - + +

{gettext('Do you really want to turn off face recognition? Existing results will all be deleted.')}

+
)} ); diff --git a/frontend/src/metadata/components/dialog/metadata-face-recognition-dialog/turn-off-confirm.js b/frontend/src/metadata/components/dialog/metadata-face-recognition-dialog/turn-off-confirm.js deleted file mode 100644 index 96ae09ac4d..0000000000 --- a/frontend/src/metadata/components/dialog/metadata-face-recognition-dialog/turn-off-confirm.js +++ /dev/null @@ -1,26 +0,0 @@ -import React from 'react'; -import PropTypes from 'prop-types'; -import { Modal, ModalHeader, ModalBody, ModalFooter, Button } from 'reactstrap'; -import { gettext } from '../../../../utils/constants'; - -const TurnOffConfirmDialog = ({ toggle, submit }) => { - return ( - - {gettext('Turn off face recognition')} - -

{gettext('Do you really want to turn off face recognition? Existing results will all be deleted.')}

-
- - - - -
- ); -}; - -TurnOffConfirmDialog.propTypes = { - toggle: PropTypes.func.isRequired, - submit: PropTypes.func.isRequired -}; - -export default TurnOffConfirmDialog; diff --git a/frontend/src/metadata/components/dialog/metadata-ocr-status-management-dialog/index.js b/frontend/src/metadata/components/dialog/metadata-ocr-status-management-dialog/index.js new file mode 100644 index 0000000000..6a2b7e9ca8 --- /dev/null +++ b/frontend/src/metadata/components/dialog/metadata-ocr-status-management-dialog/index.js @@ -0,0 +1,100 @@ +import React, { useCallback, useState } from 'react'; +import PropTypes from 'prop-types'; +import classnames from 'classnames'; +import { ModalBody, ModalFooter, Button } from 'reactstrap'; +import Switch from '../../../../components/common/switch'; +import toaster from '../../../../components/toast'; +import TurnOffConfirmDialog from '../turn-off-confirm-dialog'; +import metadataAPI from '../../../api'; +import { Utils } from '../../../../utils/utils'; +import { gettext } from '../../../../utils/constants'; + +const MetadataOCRStatusManagementDialog = ({ value: oldValue, repoID, toggleDialog: toggle, submit }) => { + const [value, setValue] = useState(oldValue); + const [submitting, setSubmitting] = useState(false); + const [showTurnOffConfirmDialog, setShowTurnOffConfirmDialog] = useState(false); + + const onToggle = useCallback(() => { + if (submitting) return; + toggle && toggle(); + }, [submitting, toggle]); + + const onSubmit = useCallback(() => { + if (!value) { + setShowTurnOffConfirmDialog(true); + return; + } + setSubmitting(true); + metadataAPI.openOCR(repoID).then(res => { + submit(true); + toggle(); + }).catch(error => { + const errorMsg = Utils.getErrorMsg(error); + toaster.danger(errorMsg); + setSubmitting(false); + }); + }, [repoID, value, submit, toggle]); + + const turnOffConfirmToggle = useCallback(() => { + setShowTurnOffConfirmDialog(!showTurnOffConfirmDialog); + }, [showTurnOffConfirmDialog]); + + const turnOffConfirmSubmit = useCallback(() => { + setShowTurnOffConfirmDialog(false); + setSubmitting(true); + metadataAPI.closeOCR(repoID).then(res => { + submit(false); + toggle(); + }).catch(error => { + const errorMsg = Utils.getErrorMsg(error); + toaster.danger(errorMsg); + setSubmitting(false); + }); + }, [repoID, submit, toggle]); + + const onValueChange = useCallback(() => { + const nextValue = !value; + setValue(nextValue); + }, [value]); + + return ( + <> + {!showTurnOffConfirmDialog && ( + <> + + +

+ {gettext('After enable OCR, you can extract text from images or scanned PDFs.')} +

+
+ + + + + + )} + {showTurnOffConfirmDialog && ( + +

{gettext('Do you really want to turn off OCR? Existing OCR results will be deleted.')}

+
+ )} + + ); +}; + +MetadataOCRStatusManagementDialog.propTypes = { + value: PropTypes.bool, + repoID: PropTypes.string.isRequired, + toggleDialog: PropTypes.func.isRequired, + submit: PropTypes.func.isRequired, +}; + +export default MetadataOCRStatusManagementDialog; diff --git a/frontend/src/metadata/components/dialog/metadata-status-manage-dialog/index.js b/frontend/src/metadata/components/dialog/metadata-status-manage-dialog/index.js index 46a23d09ec..36c665ad90 100644 --- a/frontend/src/metadata/components/dialog/metadata-status-manage-dialog/index.js +++ b/frontend/src/metadata/components/dialog/metadata-status-manage-dialog/index.js @@ -4,7 +4,7 @@ import classnames from 'classnames'; import { ModalBody, ModalFooter, Button } from 'reactstrap'; import Switch from '../../../../components/common/switch'; import toaster from '../../../../components/toast'; -import TurnOffConfirmDialog from './turn-off-confirm'; +import TurnOffConfirmDialog from '../turn-off-confirm-dialog'; import metadataAPI from '../../../api'; import { Utils } from '../../../../utils/utils'; import { gettext } from '../../../../utils/constants'; @@ -84,7 +84,9 @@ const MetadataStatusManagementDialog = ({ value: oldValue, repoID, toggleDialog: )} {showTurnOffConfirmDialog && ( - + +

{gettext('Do you really want to turn off extended properties? Existing properties will all be deleted.')}

+
)} ); diff --git a/frontend/src/metadata/components/dialog/metadata-status-manage-dialog/turn-off-confirm.js b/frontend/src/metadata/components/dialog/metadata-status-manage-dialog/turn-off-confirm.js deleted file mode 100644 index ea6268ca8c..0000000000 --- a/frontend/src/metadata/components/dialog/metadata-status-manage-dialog/turn-off-confirm.js +++ /dev/null @@ -1,26 +0,0 @@ -import React from 'react'; -import PropTypes from 'prop-types'; -import { Modal, ModalHeader, ModalBody, ModalFooter, Button } from 'reactstrap'; -import { gettext } from '../../../../utils/constants'; - -const TurnOffConfirmDialog = ({ toggle, submit }) => { - return ( - - {gettext('Turn off extended properties')} - -

{gettext('Do you really want to turn off extended properties? Existing properties will all be deleted.')}

-
- - - - -
- ); -}; - -TurnOffConfirmDialog.propTypes = { - toggle: PropTypes.func.isRequired, - submit: PropTypes.func.isRequired -}; - -export default TurnOffConfirmDialog; diff --git a/frontend/src/metadata/components/dialog/metadata-tags-status-dialog/index.js b/frontend/src/metadata/components/dialog/metadata-tags-status-dialog/index.js index 61e27546ef..4d68e726d2 100644 --- a/frontend/src/metadata/components/dialog/metadata-tags-status-dialog/index.js +++ b/frontend/src/metadata/components/dialog/metadata-tags-status-dialog/index.js @@ -7,7 +7,7 @@ import { gettext } from '../../../../utils/constants'; import tagsAPI from '../../../../tag/api'; import toaster from '../../../../components/toast'; import { Utils } from '../../../../utils/utils'; -import TurnOffConfirmDialog from './turn-off-confirm'; +import TurnOffConfirmDialog from '../turn-off-confirm-dialog'; import { SeahubSelect } from '../../../../components/common/select'; import './index.css'; @@ -16,8 +16,7 @@ const langOptions = [ { value: 'zh-cn', label: '简体中文' - }, - { + }, { value: 'en', label: 'English' } @@ -113,7 +112,9 @@ const MetadataTagsStatusDialog = ({ value: oldValue, lang: oldLang, repoID, togg )} {showTurnOffConfirmDialog && ( - + +

{gettext('Do you really want to turn off tags? Existing tags will all be deleted.')}

+
)} ); diff --git a/frontend/src/metadata/components/dialog/metadata-tags-status-dialog/turn-off-confirm.js b/frontend/src/metadata/components/dialog/turn-off-confirm-dialog.js similarity index 67% rename from frontend/src/metadata/components/dialog/metadata-tags-status-dialog/turn-off-confirm.js rename to frontend/src/metadata/components/dialog/turn-off-confirm-dialog.js index 88dabc5045..b8e5c59ff2 100644 --- a/frontend/src/metadata/components/dialog/metadata-tags-status-dialog/turn-off-confirm.js +++ b/frontend/src/metadata/components/dialog/turn-off-confirm-dialog.js @@ -1,14 +1,14 @@ import React from 'react'; import PropTypes from 'prop-types'; import { Modal, ModalHeader, ModalBody, ModalFooter, Button } from 'reactstrap'; -import { gettext } from '../../../../utils/constants'; +import { gettext } from '../../../utils/constants'; -const TurnOffConfirmDialog = ({ toggle, submit }) => { +const TurnOffConfirmDialog = ({ title, children, toggle, submit }) => { return ( - {gettext('Turn off tags')} + {title} -

{gettext('Do you really want to turn off tags? Existing tags will all be deleted.')}

+ {children}
@@ -19,6 +19,8 @@ const TurnOffConfirmDialog = ({ toggle, submit }) => { }; TurnOffConfirmDialog.propTypes = { + title: PropTypes.string, + children: PropTypes.any, toggle: PropTypes.func.isRequired, submit: PropTypes.func.isRequired }; diff --git a/frontend/src/metadata/components/metadata-details/constants.js b/frontend/src/metadata/components/metadata-details/constants.js index 1b811db4ee..c8e6eb7f35 100644 --- a/frontend/src/metadata/components/metadata-details/constants.js +++ b/frontend/src/metadata/components/metadata-details/constants.js @@ -21,6 +21,7 @@ export const NOT_DISPLAY_COLUMN_KEYS = [ PRIVATE_COLUMN_KEY.LOCATION, PRIVATE_COLUMN_KEY.FACE_LINKS, PRIVATE_COLUMN_KEY.FACE_VECTORS, + PRIVATE_COLUMN_KEY.OCR, ]; export { diff --git a/frontend/src/metadata/constants/column/common.js b/frontend/src/metadata/constants/column/common.js index 885248bad5..eceb417cbc 100644 --- a/frontend/src/metadata/constants/column/common.js +++ b/frontend/src/metadata/constants/column/common.js @@ -13,6 +13,7 @@ export const NOT_DISPLAY_COLUMN_KEYS = [ PRIVATE_COLUMN_KEY.IS_DIR, PRIVATE_COLUMN_KEY.FACE_LINKS, PRIVATE_COLUMN_KEY.FACE_VECTORS, + PRIVATE_COLUMN_KEY.OCR, ]; export const VIEW_NOT_DISPLAY_COLUMN_KEYS = [ diff --git a/frontend/src/metadata/constants/column/private.js b/frontend/src/metadata/constants/column/private.js index 333c45d7b9..3d8fd34fd4 100644 --- a/frontend/src/metadata/constants/column/private.js +++ b/frontend/src/metadata/constants/column/private.js @@ -37,6 +37,9 @@ export const PRIVATE_COLUMN_KEY = { // tag TAGS: '_tags', + + // ocr + OCR: '_ocr', }; export const PRIVATE_COLUMN_KEYS = [ @@ -71,6 +74,7 @@ export const PRIVATE_COLUMN_KEYS = [ PRIVATE_COLUMN_KEY.FACE_VECTORS, PRIVATE_COLUMN_KEY.FILE_RATE, PRIVATE_COLUMN_KEY.TAGS, + PRIVATE_COLUMN_KEY.OCR, ]; export const EDITABLE_PRIVATE_COLUMN_KEYS = [ diff --git a/frontend/src/metadata/context.js b/frontend/src/metadata/context.js index d20408a8dc..6657b00b10 100644 --- a/frontend/src/metadata/context.js +++ b/frontend/src/metadata/context.js @@ -235,6 +235,11 @@ class Context { return this.metadataAPI.generateFileTags(repoID, filePath); }; + ocr = (filePath) => { + const repoID = this.settings['repoID']; + return this.metadataAPI.ocr(repoID, filePath); + }; + extractFileDetails = (objIds) => { const repoID = this.settings['repoID']; return this.metadataAPI.extractFileDetails(repoID, objIds); diff --git a/frontend/src/metadata/index.js b/frontend/src/metadata/index.js index 3b87ac90e4..bc87f00ee4 100644 --- a/frontend/src/metadata/index.js +++ b/frontend/src/metadata/index.js @@ -4,6 +4,7 @@ export { default as SeafileMetadata } from './views'; export { default as MetadataStatusManagementDialog } from './components/dialog/metadata-status-manage-dialog'; export { default as MetadataFaceRecognitionDialog } from './components/dialog/metadata-face-recognition-dialog'; export { default as MetadataTagsStatusDialog } from './components/dialog/metadata-tags-status-dialog'; +export { default as MetadataOCRStatusManagementDialog } from './components/dialog/metadata-ocr-status-management-dialog'; export { default as MetadataDetails } from './components/metadata-details'; export { default as MetadataTreeView } from './metadata-tree-view'; export { default as metadataAPI } from './api'; diff --git a/frontend/src/metadata/utils/column/index.js b/frontend/src/metadata/utils/column/index.js index 0bf12029ae..e9fa766f13 100644 --- a/frontend/src/metadata/utils/column/index.js +++ b/frontend/src/metadata/utils/column/index.js @@ -193,6 +193,8 @@ export const getColumnDisplayName = (key, name) => { return gettext('Document keywords'); case PRIVATE_COLUMN_KEY.FILE_DESCRIPTION: return gettext('Description'); + case PRIVATE_COLUMN_KEY.OCR: + return gettext('OCR result'); case PRIVATE_COLUMN_KEY.FILE_EXPIRED: return gettext('Is expired'); case PRIVATE_COLUMN_KEY.FILE_STATUS: @@ -259,6 +261,8 @@ export const getNormalizedColumnType = (key, type) => { return CellType.TEXT; case PRIVATE_COLUMN_KEY.FILE_DESCRIPTION: return CellType.LONG_TEXT; + case PRIVATE_COLUMN_KEY.OCR: + return CellType.TEXT; case PRIVATE_COLUMN_KEY.FILE_EXPIRED: return CellType.CHECKBOX; case PRIVATE_COLUMN_KEY.FILE_STATUS: diff --git a/frontend/src/metadata/views/table/context-menu/index.js b/frontend/src/metadata/views/table/context-menu/index.js index 34bbfd70f9..4e2f89df42 100644 --- a/frontend/src/metadata/views/table/context-menu/index.js +++ b/frontend/src/metadata/views/table/context-menu/index.js @@ -4,6 +4,7 @@ import toaster from '../../../../components/toast'; import { gettext, siteRoot } from '../../../../utils/constants'; import { Utils } from '../../../../utils/utils'; import { useMetadataView } from '../../../hooks/metadata-view'; +import { useMetadataStatus } from '../../../../hooks'; import { getColumnByKey, isNameColumn } from '../../../utils/column'; import { checkIsDir } from '../../../utils/row'; import { EVENT_BUS_TYPE, EVENT_BUS_TYPE as METADATA_EVENT_BUS_TYPE, PRIVATE_COLUMN_KEY } from '../../../constants'; @@ -12,14 +13,13 @@ import { getFileNameFromRecord, getParentDirFromRecord, getFileObjIdFromRecord, } from '../../../utils/cell'; import FileTagsDialog from '../../../components/dialog/file-tags-dialog'; -import './index.css'; - const OPERATION = { CLEAR_SELECTED: 'clear-selected', COPY_SELECTED: 'copy-selected', OPEN_PARENT_FOLDER: 'open-parent-folder', OPEN_IN_NEW_TAB: 'open-new-tab', GENERATE_DESCRIPTION: 'generate-description', + OCR: 'ocr', IMAGE_CAPTION: 'image-caption', FILE_TAGS: 'file-tags', DELETE_RECORD: 'delete-record', @@ -40,6 +40,7 @@ const ContextMenu = (props) => { const [fileTagsRecord, setFileTagsRecord] = useState(null); const { metadata } = useMetadataView(); + const { enableOCR } = useMetadataStatus(); const checkCanModifyRow = (row) => { return window.sfMetadataContext.canModifyRow(row); @@ -139,6 +140,10 @@ const ContextMenu = (props) => { } } + if (enableOCR && canModifyRow && Utils.imageCheck(fileName)) { + list.push({ value: OPERATION.OCR, label: gettext('OCR'), record }); + } + if (canModifyRow && (Utils.imageCheck(fileName) || Utils.videoCheck(fileName))) { list.push({ value: OPERATION.FILE_DETAIL, label: gettext('Extract file detail'), record: record }); } @@ -157,7 +162,7 @@ const ContextMenu = (props) => { } return list; - }, [visible, isGroupView, selectedPosition, recordMetrics, selectedRange, metadata, recordGetterByIndex, checkIsDescribableDoc, getAbleDeleteRecords]); + }, [visible, isGroupView, selectedPosition, recordMetrics, selectedRange, metadata, recordGetterByIndex, checkIsDescribableDoc, enableOCR, getAbleDeleteRecords]); const handleHide = useCallback((event) => { if (!menuRef.current && visible) { @@ -256,6 +261,34 @@ const ContextMenu = (props) => { setFileTagsRecord(record); }, []); + const ocr = useCallback((record) => { + const ocrResultColumnKey = PRIVATE_COLUMN_KEY.OCR; + let path = ''; + let idOldRecordData = {}; + let idOriginalOldRecordData = {}; + const fileName = getFileNameFromRecord(record); + if (Utils.imageCheck(fileName) && checkCanModifyRow(record)) { + const parentDir = getParentDirFromRecord(record); + path = Utils.joinPath(parentDir, fileName); + idOldRecordData[record[PRIVATE_COLUMN_KEY.ID]] = { [ocrResultColumnKey]: record[ocrResultColumnKey] }; + idOriginalOldRecordData[record[PRIVATE_COLUMN_KEY.ID]] = { [ocrResultColumnKey]: record[ocrResultColumnKey] }; + } + if (path === '') return; + window.sfMetadataContext.ocr(path).then(res => { + const ocrResult = res.data.ocr_result; + const updateRecordId = record[PRIVATE_COLUMN_KEY.ID]; + const recordIds = [updateRecordId]; + let idRecordUpdates = {}; + let idOriginalRecordUpdates = {}; + idRecordUpdates[updateRecordId] = { [ocrResultColumnKey]: ocrResult ? JSON.stringify(ocrResult) : null }; + idOriginalRecordUpdates[updateRecordId] = { [ocrResultColumnKey]: ocrResult ? JSON.stringify(ocrResult) : null }; + updateRecords({ recordIds, idRecordUpdates, idOriginalRecordUpdates, idOldRecordData, idOriginalOldRecordData }); + }).catch(error => { + const errorMessage = gettext('OCR failed'); + toaster.danger(errorMessage); + }); + }, [updateRecords]); + const updateFileDetails = useCallback((records) => { const recordObjIds = records.map(record => getFileObjIdFromRecord(record)); if (recordObjIds.length > 50) { @@ -331,6 +364,12 @@ const ContextMenu = (props) => { toggleFileTagsRecord(record); break; } + case OPERATION.OCR: { + const { record } = option; + if (!record) break; + ocr(record); + break; + } case OPERATION.DELETE_RECORD: { const { record } = option; if (!record || !record._id || !deleteRecords) break; @@ -374,7 +413,7 @@ const ContextMenu = (props) => { } } setVisible(false); - }, [onOpenFileInNewTab, onOpenParentFolder, onCopySelected, onClearSelected, generateDescription, imageCaption, deleteRecords, toggleDeleteFolderDialog, selectNone, updateFileDetails, toggleFileTagsRecord]); + }, [onOpenFileInNewTab, onOpenParentFolder, onCopySelected, onClearSelected, generateDescription, imageCaption, ocr, deleteRecords, toggleDeleteFolderDialog, selectNone, updateFileDetails, toggleFileTagsRecord]); const getMenuPosition = useCallback((x = 0, y = 0) => { let menuStyles = { diff --git a/seahub/ai/apis.py b/seahub/ai/apis.py index 39da68bffd..654f7380bc 100644 --- a/seahub/ai/apis.py +++ b/seahub/ai/apis.py @@ -15,7 +15,7 @@ from seahub.api2.throttling import UserRateThrottle from seahub.api2.authentication import TokenAuthentication from seahub.utils import get_file_type_and_ext, IMAGE from seahub.views import check_folder_permission -from seahub.ai.utils import image_caption, verify_ai_config, generate_summary, generate_file_tags +from seahub.ai.utils import image_caption, verify_ai_config, generate_summary, generate_file_tags, ocr logger = logging.getLogger(__name__) @@ -216,3 +216,66 @@ class GenerateFileTags(APIView): return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg) return Response(resp_json, resp.status_code) + + +class OCR(APIView): + authentication_classes = (TokenAuthentication, SessionAuthentication) + permission_classes = (IsAuthenticated,) + throttle_classes = (UserRateThrottle,) + + def post(self, request): + if not verify_ai_config(): + return api_error(status.HTTP_400_BAD_REQUEST, 'OCR server not configured') + + repo_id = request.data.get('repo_id') + path = request.data.get('path') + + if not repo_id: + return api_error(status.HTTP_400_BAD_REQUEST, 'repo_id invalid') + if not path: + return api_error(status.HTTP_400_BAD_REQUEST, 'path invalid') + + repo = seafile_api.get_repo(repo_id) + if not repo: + error_msg = 'Library %s not found.' % repo_id + return api_error(status.HTTP_404_NOT_FOUND, error_msg) + + try: + record = RepoMetadata.objects.filter(repo_id=repo_id).first() + except Exception as e: + logger.error(e) + error_msg = 'Internal Server Error' + return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg) + + permission = check_folder_permission(request, repo_id, os.path.dirname(path)) + if not permission: + error_msg = 'Permission denied.' + return api_error(status.HTTP_403_FORBIDDEN, error_msg) + + try: + file_id = seafile_api.get_file_id_by_path(repo_id, path) + except SearpcError as e: + logger.error(e) + return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, 'Internal Server Error') + + if not file_id: + return api_error(status.HTTP_404_NOT_FOUND, f"File {path} not found") + + token = seafile_api.get_fileserver_access_token(repo_id, file_id, 'download', request.user.username, use_onetime=True) + if not token: + error_msg = 'Internal Server Error' + return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg) + + params = { + 'path': path, + 'download_token': token + } + + try: + resp = ocr(params) + resp_json = resp.json() + except Exception as e: + error_msg = 'Internal Server Error' + return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg) + + return Response(resp_json, resp.status_code) diff --git a/seahub/ai/utils.py b/seahub/ai/utils.py index e0a1be94ce..6f1f79ad91 100644 --- a/seahub/ai/utils.py +++ b/seahub/ai/utils.py @@ -40,3 +40,9 @@ def generate_file_tags(params): url = urljoin(SEAFILE_AI_SERVER_URL, '/api/v1/generate-file-tags/') resp = requests.post(url, json=params, headers=headers, timeout=30) return resp + +def ocr(params): + headers = gen_headers() + url = urljoin(SEAFILE_AI_SERVER_URL, '/api/v1/ocr/') + resp = requests.post(url, json=params, headers=headers, timeout=30) + return resp diff --git a/seahub/repo_metadata/apis.py b/seahub/repo_metadata/apis.py index d8564d6835..8854c70a74 100644 --- a/seahub/repo_metadata/apis.py +++ b/seahub/repo_metadata/apis.py @@ -16,7 +16,7 @@ from seahub.views import check_folder_permission from seahub.repo_metadata.utils import add_init_metadata_task, gen_unique_id, init_metadata, \ get_unmodifiable_columns, can_read_metadata, init_faces, \ extract_file_details, get_someone_similar_faces, remove_faces_table, FACES_SAVE_PATH, \ - init_tags, remove_tags_table, add_init_face_recognition_task + init_tags, remove_tags_table, add_init_face_recognition_task, init_ocr, remove_ocr_column from seahub.repo_metadata.metadata_server_api import MetadataServerAPI, list_metadata_view_records from seahub.utils.timeutils import datetime_to_isoformat_timestr from seahub.utils.repo import is_repo_admin @@ -51,6 +51,8 @@ class MetadataManage(APIView): is_tags_enabled = False tags_lang = '' details_settings = '{}' + is_ocr_enabled = False + try: record = RepoMetadata.objects.filter(repo_id=repo_id).first() if record and record.enabled: @@ -58,9 +60,11 @@ class MetadataManage(APIView): details_settings = record.details_settings if not details_settings: details_settings = '{}' - if record and record.tags_enabled: - is_tags_enabled = True - tags_lang = record.tags_lang + if record.tags_enabled: + is_tags_enabled = True + tags_lang = record.tags_lang + if record.ocr_enabled: + is_ocr_enabled = True except Exception as e: logger.error(e) error_msg = 'Internal Server Error' @@ -70,7 +74,8 @@ class MetadataManage(APIView): 'enabled': is_enabled, 'tags_enabled': is_tags_enabled, 'tags_lang': tags_lang, - 'details_settings': details_settings + 'details_settings': details_settings, + 'ocr_enabled': is_ocr_enabled }) def put(self, request, repo_id): @@ -154,6 +159,7 @@ class MetadataManage(APIView): record.face_recognition_enabled = False record.tags_enabled = False record.details_settings = '{}' + record.ocr_enabled = False record.save() RepoMetadataViews.objects.filter(repo_id=repo_id).delete() except Exception as e: @@ -205,6 +211,78 @@ class MetadataDetailsSettingsView(APIView): return Response({'success': True}) + +class MetadataOCRManageView(APIView): + authentication_classes = (TokenAuthentication, SessionAuthentication) + permission_classes = (IsAuthenticated, ) + throttle_classes = (UserRateThrottle, ) + + def put(self, request, repo_id): + + # resource check + repo = seafile_api.get_repo(repo_id) + if not repo: + error_msg = f'Library {repo_id} not found.' + return api_error(status.HTTP_404_NOT_FOUND, error_msg) + + if not is_repo_admin(request.user.username, repo_id): + error_msg = 'Permission denied.' + return api_error(status.HTTP_403_FORBIDDEN, error_msg) + + metadata = RepoMetadata.objects.filter(repo_id=repo_id).first() + if not metadata or not metadata.enabled: + error_msg = f'The metadata module is not enabled for repo {repo_id}.' + return api_error(status.HTTP_404_NOT_FOUND, error_msg) + + try: + metadata.ocr_enabled = True + metadata.save() + except Exception as e: + logger.exception(e) + return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, 'Internal Server Error') + + metadata_server_api = MetadataServerAPI(repo_id, request.user.username) + init_ocr(metadata_server_api) + + return Response({'success': True}) + + def delete(self, request, repo_id): + # resource check + repo = seafile_api.get_repo(repo_id) + if not repo: + error_msg = f'Library {repo_id} not found.' + return api_error(status.HTTP_404_NOT_FOUND, error_msg) + + # permission check + if not is_repo_admin(request.user.username, repo_id): + error_msg = 'Permission denied.' + return api_error(status.HTTP_403_FORBIDDEN, error_msg) + + # check dose the repo have opened metadata manage + record = RepoMetadata.objects.filter(repo_id=repo_id).first() + if not record or not record.enabled or not record.ocr_enabled: + error_msg = f'The repo {repo_id} has disabled the OCR.' + return api_error(status.HTTP_409_CONFLICT, error_msg) + + metadata_server_api = MetadataServerAPI(repo_id, request.user.username) + try: + remove_ocr_column(metadata_server_api) + except Exception as err: + logger.error(err) + error_msg = 'Internal Server Error' + return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg) + + try: + record.ocr_enabled = False + record.save() + except Exception as e: + logger.error(e) + error_msg = 'Internal Server Error' + return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg) + + return Response({'success': True}) + + class MetadataRecords(APIView): authentication_classes = (TokenAuthentication, SessionAuthentication) permission_classes = (IsAuthenticated, ) diff --git a/seahub/repo_metadata/metadata_server_api.py b/seahub/repo_metadata/metadata_server_api.py index 1cd0393ae7..b31dc4c9fe 100644 --- a/seahub/repo_metadata/metadata_server_api.py +++ b/seahub/repo_metadata/metadata_server_api.py @@ -55,6 +55,8 @@ def list_metadata_view_records(repo_id, user, view, start=0, limit=1000): column_name = column.get('name') if column_name == METADATA_TABLE.columns.face_vectors.name: continue + elif column_name == METADATA_TABLE.columns.ocr.name: + continue column_name_str = '`%s`, ' % column_name query_fields_str += column_name_str query_fields_str = query_fields_str.strip(', ') diff --git a/seahub/repo_metadata/models.py b/seahub/repo_metadata/models.py index 5f0dbf9f93..35704f09bb 100644 --- a/seahub/repo_metadata/models.py +++ b/seahub/repo_metadata/models.py @@ -69,6 +69,7 @@ class RepoMetadata(models.Model): tags_lang = models.CharField(max_length=36) last_face_cluster_time = models.DateTimeField(db_index=True, blank=True, null=True) details_settings = models.TextField() + ocr_enabled = models.BooleanField(db_index=True) objects = RepoMetadataManager() diff --git a/seahub/repo_metadata/urls.py b/seahub/repo_metadata/urls.py index 62d8f88e2e..7bfb59bf22 100644 --- a/seahub/repo_metadata/urls.py +++ b/seahub/repo_metadata/urls.py @@ -2,7 +2,7 @@ from django.urls import re_path from .apis import MetadataRecords, MetadataManage, MetadataColumns, MetadataRecordInfo, \ MetadataViews, MetadataViewsMoveView, MetadataViewsDetailView, MetadataViewsDuplicateView, FacesRecords, \ FaceRecognitionManage, FacesRecord, MetadataExtractFileDetails, PeoplePhotos, MetadataTagsStatusManage, MetadataTags, \ - MetadataFileTags, MetadataTagFiles, MetadataDetailsSettingsView + MetadataFileTags, MetadataTagFiles, MetadataDetailsSettingsView, MetadataOCRManageView urlpatterns = [ re_path(r'^$', MetadataManage.as_view(), name='api-v2.1-metadata'), @@ -27,6 +27,9 @@ urlpatterns = [ # details settings re_path(r'^details-settings/', MetadataDetailsSettingsView.as_view(), name='api-v2.1-metadata-details-settings'), + # ocr + re_path(r'^ocr/', MetadataOCRManageView.as_view(), name='api-v2.1-metadata-ocr'), + # tags api re_path(r'^tags-status/$', MetadataTagsStatusManage.as_view(), name='api-v2.1-metadata-tags-status'), re_path(r'^tags/$', MetadataTags.as_view(), name='api-v2.1-metadata-tags'), diff --git a/seahub/repo_metadata/utils.py b/seahub/repo_metadata/utils.py index eb36f28d1e..e14f189ddd 100644 --- a/seahub/repo_metadata/utils.py +++ b/seahub/repo_metadata/utils.py @@ -176,6 +176,7 @@ def remove_faces_table(metadata_server_api): metadata_server_api.delete_column(table['id'], column['key'], True) +# tag def get_tag_link_column(table_id): from seafevents.repo_metadata.constants import METADATA_TABLE, TAGS_TABLE columns = [ @@ -223,7 +224,6 @@ def init_tags(metadata_server_api): metadata_server_api.add_columns(table_id, tag_columns) - def remove_tags_table(metadata_server_api): from seafevents.repo_metadata.constants import METADATA_TABLE, TAGS_TABLE metadata = metadata_server_api.get_metadata() @@ -239,6 +239,32 @@ def remove_tags_table(metadata_server_api): metadata_server_api.delete_column(table['id'], column['key'], True) +# ocr +def init_ocr(metadata_server_api): + from seafevents.repo_metadata.constants import METADATA_TABLE + + remove_ocr_column(metadata_server_api) + + # init ocr column + columns = [ + METADATA_TABLE.columns.ocr.to_dict(), + ] + metadata_server_api.add_columns(METADATA_TABLE.id, columns) + + +def remove_ocr_column(metadata_server_api): + from seafevents.repo_metadata.constants import METADATA_TABLE + metadata = metadata_server_api.get_metadata() + + tables = metadata.get('tables', []) + for table in tables: + if table['name'] == METADATA_TABLE.name: + columns = table.get('columns', []) + for column in columns: + if column['key'] == METADATA_TABLE.columns.ocr.key: + metadata_server_api.delete_column(table['id'], METADATA_TABLE.columns.ocr.key, True) + + def get_file_download_token(repo_id, file_id, username): return seafile_api.get_fileserver_access_token(repo_id, file_id, 'download', username, use_onetime=True) diff --git a/seahub/urls.py b/seahub/urls.py index 7ce5465c8e..4ed758dab3 100644 --- a/seahub/urls.py +++ b/seahub/urls.py @@ -2,7 +2,7 @@ from django.urls import include, path, re_path from django.views.generic import TemplateView -from seahub.ai.apis import ImageCaption, GenerateSummary, GenerateFileTags +from seahub.ai.apis import ImageCaption, GenerateSummary, GenerateFileTags, OCR from seahub.api2.endpoints.share_link_auth import ShareLinkUserAuthView, ShareLinkEmailAuthView from seahub.api2.endpoints.internal_api import InternalUserListView, InternalCheckShareLinkAccess, \ InternalCheckFileOperationAccess @@ -1048,4 +1048,5 @@ urlpatterns += [ re_path(r'^api/v2.1/ai/image-caption/$', ImageCaption.as_view(), name='api-v2.1-image-caption'), re_path(r'^api/v2.1/ai/generate-file-tags/$', GenerateFileTags.as_view(), name='api-v2.1-generate-file-tags'), re_path(r'^api/v2.1/ai/generate-summary/$', GenerateSummary.as_view(), name='api-v2.1-generate-summary'), + re_path(r'^api/v2.1/ai/ocr/$', OCR.as_view(), name='api-v2.1-ocr'), ]