1
0
mirror of https://github.com/haiwen/seahub.git synced 2025-09-26 15:26:19 +00:00

extract text from pdf and image

This commit is contained in:
‘JoinTyang’
2025-05-24 15:59:08 +08:00
committed by 杨国璇
parent 6c9109fc89
commit 5639169c2b
11 changed files with 161 additions and 17 deletions

View File

@@ -106,6 +106,24 @@ export const MetadataAIOperationsProvider = ({
});
}, [repoID]);
const extractText = useCallback(({ parentDir, fileName }, { success_callback, fail_callback } = {}) => {
const filePath = Utils.joinPath(parentDir, fileName);
const inProgressToaster = toaster.notifyInProgress(gettext('Extracting text by AI...'), { duration: null });
metadataAPI.extractText(repoID, filePath).then(res => {
console.log(res)
const extractedText = res?.data?.text || res.data.text || '';
inProgressToaster.close();
success_callback && success_callback({ parentDir, fileName, extractedText });
}).catch(error => {
inProgressToaster.close();
const errorMessage = gettext('Failed to extract text');
toaster.danger(errorMessage);
fail_callback && fail_callback();
});
}, [repoID]);
return (
<MetadataAIOperationsContext.Provider value={{
enableMetadata,
@@ -119,6 +137,7 @@ export const MetadataAIOperationsProvider = ({
extractFilesDetails,
extractFileDetails,
faceRecognition,
extractText
}}>
{children}
</MetadataAIOperationsContext.Provider>

View File

@@ -406,6 +406,15 @@ class MetadataManagerAPI {
return this.req.post(url, params);
};
extractText = (repoID, filePath) => {
const url = this.server + '/api/v2.1/ai/extract-text/';
const params = {
path: filePath,
repo_id: repoID,
};
return this.req.post(url, params);
};
}
const metadataAPI = new MetadataManagerAPI();

View File

@@ -17,6 +17,7 @@ const OPERATION = {
OCR: 'ocr',
FILE_TAGS: 'file-tags',
FILE_DETAIL: 'file-detail',
EXTRACT_TEXT: 'extract-text',
};
const AIIcon = () => {
@@ -24,9 +25,9 @@ const AIIcon = () => {
const [isMenuShow, setMenuShow] = useState(false);
const [isFileTagsDialogShow, setFileTagsDialogShow] = useState(false);
const { enableMetadata, enableTags, enableOCR } = useMetadataStatus();
const { enableMetadata, enableTags } = useMetadataStatus();
const { canModifyRecord, columns, record, onChange, onLocalRecordChange, updateFileTags } = useMetadataDetails();
const { onOCR, generateDescription, extractFileDetails } = useMetadataAIOperations();
const { onOCR, generateDescription, extractFileDetails, extractText } = useMetadataAIOperations();
const options = useMemo(() => {
if (!canModifyRecord || !record || checkIsDir(record)) return [];
@@ -34,6 +35,7 @@ const AIIcon = () => {
const fileName = getFileNameFromRecord(record);
const isImage = Utils.imageCheck(fileName);
const isVideo = Utils.videoCheck(fileName);
const isPdf = Utils.pdfCheck(fileName);
const isDescribableDoc = Utils.isDescriptionSupportedFile(fileName);
let list = [];
@@ -45,10 +47,6 @@ const AIIcon = () => {
});
}
if (enableOCR && isImage) {
list.push({ value: OPERATION.OCR, label: gettext('OCR'), record });
}
if (isImage || isVideo) {
list.push({ value: OPERATION.FILE_DETAIL, label: gettext('Extract file detail'), record });
}
@@ -56,8 +54,12 @@ const AIIcon = () => {
if (enableTags && isDescribableDoc && !isVideo) {
list.push({ value: OPERATION.FILE_TAGS, label: gettext('Generate file tags'), record });
}
if (isImage || isPdf) {
list.push({ value: OPERATION.EXTRACT_TEXT, label: gettext('Extract text'), record });
}
return list;
}, [enableOCR, enableTags, canModifyRecord, columns, record]);
}, [enableTags, canModifyRecord, columns, record]);
const onToggle = useCallback((event) => {
event && event.preventDefault();
@@ -126,6 +128,14 @@ const AIIcon = () => {
});
break;
}
case OPERATION.EXTRACT_TEXT: {
extractText({ parentDir, fileName }, {
success_callback: ({ extractedText }) => {
console.log(extractedText)
},
});
break;
}
default: {
setMenuShow(false);
break;

View File

@@ -46,6 +46,7 @@ export const EVENT_BUS_TYPE = {
UPDATE_RECORD_DETAILS: 'update_record_details',
UPDATE_FACE_RECOGNITION: 'update_face_recognition',
GENERATE_DESCRIPTION: 'generate_description',
EXTRACT_TEXT: 'extract_text',
OCR: 'ocr',
// metadata

View File

@@ -38,7 +38,7 @@ export const MetadataViewProvider = ({
const { collaborators } = useCollaborators();
const { isBeingBuilt, setIsBeingBuilt } = useMetadata();
const { onOCR, generateDescription, extractFilesDetails, faceRecognition } = useMetadataAIOperations();
const { onOCR, generateDescription, extractFilesDetails, faceRecognition, extractText } = useMetadataAIOperations();
const tableChanged = useCallback(() => {
setMetadata(storeRef.current.data);
@@ -403,6 +403,17 @@ export const MetadataViewProvider = ({
});
}, [modifyRecords, onOCR]);
const updateExtractText = useCallback((record) => {
const parentDir = getParentDirFromRecord(record);
const fileName = getFileNameFromRecord(record);
if (!fileName || !parentDir) return;
extractText({ parentDir, fileName }, {
success_callback: ({ extractedText }) => {
console.log(extractedText)
}
});
}, [extractText]);
// init
useEffect(() => {
setLoading(true);
@@ -442,6 +453,7 @@ export const MetadataViewProvider = ({
const unsubscribeUpdateFaceRecognition = eventBus.subscribe(EVENT_BUS_TYPE.UPDATE_FACE_RECOGNITION, updateFaceRecognition);
const unsubscribeUpdateDescription = eventBus.subscribe(EVENT_BUS_TYPE.GENERATE_DESCRIPTION, updateRecordDescription);
const unsubscribeOCR = eventBus.subscribe(EVENT_BUS_TYPE.OCR, ocr);
const unsubscribeUpdateExtract = eventBus.subscribe(EVENT_BUS_TYPE.EXTRACT_TEXT, updateExtractText);
return () => {
if (window.sfMetadataContext) {
@@ -468,6 +480,7 @@ export const MetadataViewProvider = ({
unsubscribeUpdateFaceRecognition();
unsubscribeUpdateDescription();
unsubscribeOCR();
unsubscribeUpdateExtract();
delayReloadDataTimer.current && clearTimeout(delayReloadDataTimer.current);
};
// eslint-disable-next-line react-hooks/exhaustive-deps
@@ -507,6 +520,7 @@ export const MetadataViewProvider = ({
updateRecordDetails,
updateFaceRecognition,
updateRecordDescription,
updateExtractText,
ocr,
}}
>

View File

@@ -33,6 +33,7 @@ const OPERATION = {
FILE_DETAILS: 'file-details',
DETECT_FACES: 'detect-faces',
MOVE: 'move',
EXTRACT_TEXT: 'extract_text',
};
const { enableSeafileAI } = window.app.config;
@@ -40,7 +41,7 @@ const { enableSeafileAI } = window.app.config;
const ContextMenu = ({
isGroupView, selectedRange, selectedPosition, recordMetrics, recordGetterByIndex, onClearSelected, onCopySelected,
getTableContentRect, getTableCanvasContainerRect, deleteRecords, selectNone, updateFileTags, moveRecord, addFolder, updateRecordDetails,
updateFaceRecognition, updateRecordDescription, ocr,
updateFaceRecognition, updateRecordDescription, ocr, updateExtractText
}) => {
const currentRecord = useRef(null);
@@ -211,6 +212,7 @@ const ContextMenu = ({
const isDescribableFile = checkIsDescribableFile(record);
const isImage = Utils.imageCheck(fileName);
const isVideo = Utils.videoCheck(fileName);
const isPdf = Utils.pdfCheck(fileName);
const aiOptions = [];
if (isImage || isVideo) {
@@ -236,6 +238,10 @@ const ContextMenu = ({
aiOptions.push({ value: OPERATION.OCR, label: gettext('OCR'), record });
}
if (isImage || isPdf) {
aiOptions.push({ value: OPERATION.EXTRACT_TEXT, label: gettext('Extract text'), record });
}
if (aiOptions.length > 0) {
list.push('Divider');
list.push(...aiOptions);
@@ -293,6 +299,11 @@ const ContextMenu = ({
ocr(record);
break;
}
case OPERATION.EXTRACT_TEXT: {
const { record } = option;
updateExtractText(record)
break;
}
case OPERATION.DELETE_RECORD: {
const { record } = option;
if (!record || !record._id || !deleteRecords) break;
@@ -346,7 +357,7 @@ const ContextMenu = ({
break;
}
}
}, [repoID, onCopySelected, onClearSelected, updateRecordDescription, toggleFileTagsRecord, ocr, deleteRecords, toggleDeleteFolderDialog, selectNone, updateRecordDetails, updateFaceRecognition, toggleMoveDialog]);
}, [repoID, onCopySelected, onClearSelected, updateRecordDescription, toggleFileTagsRecord, ocr, deleteRecords, toggleDeleteFolderDialog, selectNone, updateRecordDetails, updateFaceRecognition, toggleMoveDialog, updateExtractText]);
useEffect(() => {
const unsubscribeToggleMoveDialog = window.sfMetadataContext.eventBus.subscribe(EVENT_BUS_TYPE.TOGGLE_MOVE_DIALOG, toggleMoveDialog);

View File

@@ -32,6 +32,7 @@ const Table = () => {
updateRecordDetails,
updateFaceRecognition,
updateRecordDescription,
updateExtractText,
ocr,
} = useMetadataView();
const containerRef = useRef(null);
@@ -185,6 +186,7 @@ const Table = () => {
updateRecordDetails={updateRecordDetails}
updateFaceRecognition={updateFaceRecognition}
updateRecordDescription={updateRecordDescription}
updateExtractText={updateExtractText}
ocr={ocr}
/>
</div>

View File

@@ -649,6 +649,7 @@ class Records extends Component {
updateFaceRecognition={this.props.updateFaceRecognition}
updateRecordDescription={this.props.updateRecordDescription}
ocr={this.props.ocr}
updateExtractText={this.props.updateExtractText}
/>
),
hasSelectedRecord: this.hasSelectedRecord(),

View File

@@ -10,12 +10,13 @@ from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
from rest_framework import status
from rest_framework.views import APIView
from seahub.api2.utils import api_error
from seahub.api2.utils import api_error, get_file_size
from seahub.api2.throttling import UserRateThrottle
from seahub.api2.authentication import TokenAuthentication, SdocJWTTokenAuthentication
from seahub.utils import get_file_type_and_ext, IMAGE
from seahub.views import check_folder_permission
from seahub.ai.utils import image_caption, translate, writing_assistant, verify_ai_config, generate_summary, generate_file_tags, ocr
from seahub.ai.utils import image_caption, translate, writing_assistant, verify_ai_config, generate_summary, \
generate_file_tags, ocr, extract_text
logger = logging.getLogger(__name__)
@@ -341,6 +342,73 @@ class WritingAssistant(APIView):
resp = writing_assistant(params)
resp_json = resp.json()
except Exception as e:
logger.error(e)
error_msg = 'Internal Server Error'
return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg)
return Response(resp_json, resp.status_code)
class ExtractText(APIView):
authentication_classes = (TokenAuthentication, SessionAuthentication)
permission_classes = (IsAuthenticated,)
throttle_classes = (UserRateThrottle,)
def post(self, request):
if not verify_ai_config():
return api_error(status.HTTP_400_BAD_REQUEST, 'AI server not configured')
repo_id = request.data.get('repo_id')
path = request.data.get('path')
if not repo_id:
return api_error(status.HTTP_400_BAD_REQUEST, 'repo_id invalid')
if not path:
return api_error(status.HTTP_400_BAD_REQUEST, 'path invalid')
file_type, _ = get_file_type_and_ext(os.path.basename(path))
if file_type != IMAGE and not path.lower().endswith('.pdf'):
return api_error(status.HTTP_400_BAD_REQUEST, 'file type not image or pdf')
repo = seafile_api.get_repo(repo_id)
if not repo:
error_msg = 'Library %s not found.' % repo_id
return api_error(status.HTTP_404_NOT_FOUND, error_msg)
permission = check_folder_permission(request, repo_id, os.path.dirname(path))
if not permission:
error_msg = 'Permission denied.'
return api_error(status.HTTP_403_FORBIDDEN, error_msg)
try:
file_id = seafile_api.get_file_id_by_path(repo_id, path)
except SearpcError as e:
logger.error(e)
return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, 'Internal Server Error')
if not file_id:
return api_error(status.HTTP_404_NOT_FOUND, f"File {path} not found")
file_size = get_file_size(repo.store_id, repo.version, file_id)
if file_size >> 20 > 5:
error_msg = 'File size exceed the limit.'
return api_error(status.HTTP_400_BAD_REQUEST, error_msg)
token = seafile_api.get_fileserver_access_token(repo_id, file_id, 'download', request.user.username, use_onetime=True)
if not token:
error_msg = 'Internal Server Error'
return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg)
params = {
'file_name': os.path.basename(path),
'download_token': token,
}
try:
resp = extract_text(params)
resp_json = resp.json()
except Exception as e:
logger.error(e)
error_msg = 'Internal Server Error'
return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg)

View File

@@ -61,3 +61,10 @@ def writing_assistant(params):
url = urljoin(SEAFILE_AI_SERVER_URL, '/api/v1/writing-assistant/')
resp = requests.post(url, json=params, headers=headers, timeout=30)
return resp
def extract_text(params):
headers = gen_headers()
url = urljoin(SEAFILE_AI_SERVER_URL, '/api/v1/extract-text/')
resp = requests.post(url, json=params, headers=headers, timeout=30)
return resp

View File

@@ -2,7 +2,8 @@
from django.urls import include, path, re_path
from django.views.generic import TemplateView
from seahub.ai.apis import ImageCaption, GenerateSummary, GenerateFileTags, OCR, Translate, WritingAssistant
from seahub.ai.apis import ImageCaption, GenerateSummary, GenerateFileTags, OCR, Translate, WritingAssistant, \
ExtractText
from seahub.api2.endpoints.file_comments import FileCommentsView, FileCommentView, FileCommentRepliesView, \
FileCommentReplyView
from seahub.api2.endpoints.share_link_auth import ShareLinkUserAuthView, ShareLinkEmailAuthView
@@ -1087,4 +1088,5 @@ urlpatterns += [
re_path(r'^api/v2.1/ai/ocr/$', OCR.as_view(), name='api-v2.1-ocr'),
re_path(r'^api/v2.1/ai/translate/$', Translate.as_view(), name='api-v2.1-translate'),
re_path(r'^api/v2.1/ai/writing-assistant/$', WritingAssistant.as_view(), name='api-v2.1-writing-assistant'),
re_path(r'^api/v2.1/ai/extract-text/$', ExtractText.as_view(), name='api-v2.1-extract-text'),
]