handle office convert perm check in cluster mode

2025-09-17 07:41:26 +00:00 · 2015-07-17 12:20:02 +08:00
parent fcf2c992ec
commit 5dee3d44dc
4 changed files with 117 additions and 70 deletions
--- a/seahub/urls.py
+++ b/seahub/urls.py
@@ -311,8 +311,9 @@ if HAS_OFFICE_CONVERTER:
    if CLUSTER_MODE and OFFICE_CONVERTOR_NODE:
        urlpatterns += patterns('',
            url(r'^office-convert/internal/add-task/$', office_convert_add_task),
-            url(r'^office-convert/internal/status/$', office_convert_query_status, {'internal': True}),
+            url(r'^office-convert/internal/status/$', office_convert_query_status, {'cluster_internal': True}),
-            url(r'^office-convert/internal/static/(?P<path>.*)$', office_convert_get_page, {'internal': True}),
+            url(r'^office-convert/internal/static/(?P<repo_id>[-0-9a-f]{36})/(?P<commit_id>[0-9a-f]{40})/(?P<path>.+)/(?P<filename>[^/].+)$',
                office_convert_get_page, {'cluster_internal': True}),
        )
 if TRAFFIC_STATS_ENABLED:
--- a/seahub/utils/init.py
+++ b/seahub/utils/init.py
@@ -952,6 +952,16 @@ if HAS_OFFICE_CONVERTER:
        return office_converter_rpc
    def office_convert_cluster_token(file_id):
        from django.core import signing
        s = '-'.join([file_id, datetime.now().strftime('%Y%m%d')])
        return signing.Signer().sign(s)
    def _office_convert_token_header(file_id):
        return {
            'X-Seafile-Office-Preview-Token': office_convert_cluster_token(file_id),
        }
    def cluster_delegate(delegate_func):
        '''usage:
@@ -966,8 +976,8 @@ if HAS_OFFICE_CONVERTER:
        '''
        def decorated(func):
            def real_func(*args, **kwargs):
-                internal = kwargs.pop('internal', False)
+                cluster_internal = kwargs.pop('cluster_internal', False)
-                if CLUSTER_MODE and not OFFICE_CONVERTOR_NODE and not internal:
+                if CLUSTER_MODE and not OFFICE_CONVERTOR_NODE and not cluster_internal:
                    return delegate_func(*args)
                else:
                    return func(*args)
@@ -977,33 +987,31 @@ if HAS_OFFICE_CONVERTER:
    def delegate_add_office_convert_task(file_id, doctype, raw_path):
        url = urljoin(OFFICE_CONVERTOR_ROOT, '/office-convert/internal/add-task/')
        sec_token = do_md5(seahub.settings.SECRET_KEY)
        data = urllib.urlencode({
            'sec_token': sec_token,
            'file_id': file_id,
            'doctype': doctype,
            'raw_path': raw_path,
        })
-        ret = do_urlopen(url, data=data).read()
+        headers = _office_convert_token_header(file_id)
        ret = do_urlopen(url, data=data, headers=headers).read()
        return json.loads(ret)
    def delegate_query_office_convert_status(file_id, page):
        url = urljoin(OFFICE_CONVERTOR_ROOT, '/office-convert/internal/status/')
        url += '?file_id=%s&page=%s' % (file_id, page)
-        headers = { 
+        headers = _office_convert_token_header(file_id)
            'X-Seafile-Office-Preview-Token': do_md5(file_id + seahub.settings.SECRET_KEY),
        }
        ret = do_urlopen(url, headers=headers).read()
        return json.loads(ret)
-    def delegate_get_office_converted_page(request, path, file_id):
+    def delegate_get_office_converted_page(request, repo_id, commit_id, path, static_filename, file_id):
-        url = urljoin(OFFICE_CONVERTOR_ROOT, '/office-convert/internal/static/' + path)
+        url = urljoin(OFFICE_CONVERTOR_ROOT,
-        headers = {
+                      '/office-convert/internal/static/%s/%s%s/%s' % (
-            'X-Seafile-Office-Preview-Token': do_md5(file_id + seahub.settings.SECRET_KEY),
+                          repo_id, commit_id, urlquote(path), urlquote(static_filename)))
-        }
+        url += '?file_id=' + file_id
        headers = _office_convert_token_header(file_id)
        timestamp = request.META.get('HTTP_IF_MODIFIED_SINCE')
        if timestamp:
            headers['If-Modified-Since'] = timestamp
@@ -1023,8 +1031,7 @@ if HAS_OFFICE_CONVERTER:
            content_type = mimetypes.types_map.get(ext, 'application/octet-stream')
        resp = HttpResponse(data, content_type=content_type)
-
+        if 'last-modified' in ret.headers:
        if ret.headers.has_key('last-modified'):
            resp['Last-Modified'] = ret.headers.get('last-modified')
        return resp
@@ -1052,8 +1059,10 @@ if HAS_OFFICE_CONVERTER:
        return ret
    @cluster_delegate(delegate_get_office_converted_page)
-    def get_office_converted_page(request, path, file_id):
+    def get_office_converted_page(request, repo_id, commit_id, path, static_filename, file_id):
-        return django_static_serve(request, os.path.join(file_id, path), document_root=OFFICE_HTML_DIR)
+        return django_static_serve(request,
                                   os.path.join(file_id, static_filename),
                                   document_root=OFFICE_HTML_DIR)
    def prepare_converted_html(raw_path, obj_id, doctype, ret_dict):
        try:
--- a/seahub/utils/http.py
+++ b/seahub/utils/http.py
@@ -5,7 +5,20 @@ import urlparse
 import json
 from functools import wraps
-from django.http import HttpResponse
+from django.http import HttpResponse, HttpResponseBadRequest, HttpResponseForbidden
 class _HTTPException(Exception):
    def __init__(self, message=''):
        self.message = message
    def __str__(self):
        return '%s: %s' % (self.__class__.__name__, self.message)
 class BadRequestException(_HTTPException):
    pass
 class RequestForbbiddenException(_HTTPException):
    pass
 def is_safe_url(url, host=None):
    """
@@ -44,10 +57,24 @@ JSON_CONTENT_TYPE = 'application/json; charset=utf-8'
 def json_response(func):
    @wraps(func)
    def wrapped(*a, **kw):
-        result = func(*a, **kw)
+        try:
            result = func(*a, **kw)
        except BadRequestException, e:
            return HttpResponseBadRequest(e.message)
        except RequestForbbiddenException, e:
            return HttpResponseForbidden(e.messages)
        if isinstance(result, HttpResponse):
            return result
        else:
            return HttpResponse(json.dumps(result), status=200,
                                content_type=JSON_CONTENT_TYPE)
    return wrapped
 def int_param(request, key):
    v = request.GET.get(key, None)
    if not v:
        raise BadRequestException()
    try:
        return int(v)
    except ValueError:
        raise BadRequestException()
--- a/seahub/views/file.py
+++ b/seahub/views/file.py
@@ -15,7 +15,9 @@ import posixpath
 import re
 import mimetypes
 import urlparse
 import datetime
 from django.core import signing
 from django.core.cache import cache
 from django.contrib.sites.models import RequestSite
 from django.contrib import messages
@@ -60,12 +62,12 @@ from seahub.utils.file_types import (IMAGE, PDF, DOCUMENT, SPREADSHEET, AUDIO,
                                     MARKDOWN, TEXT, OPENDOCUMENT, VIDEO)
 from seahub.utils.star import is_file_starred
 from seahub.utils import HAS_OFFICE_CONVERTER, FILEEXT_TYPE_MAP
-from seahub.utils.http import json_response
+from seahub.utils.http import json_response, int_param, BadRequestException, RequestForbbiddenException
 from seahub.views import check_folder_permission
 if HAS_OFFICE_CONVERTER:
    from seahub.utils import (
-        query_office_convert_status, add_office_convert_task,
+        query_office_convert_status, add_office_convert_task, office_convert_cluster_token,
        prepare_converted_html, OFFICE_PREVIEW_MAX_SIZE, get_office_converted_page
    )
@@ -1283,29 +1285,22 @@ def text_diff(request, repo_id):
 ########## office related
@require_POST
@csrf_exempt
@json_response
 def office_convert_add_task(request):
    if not HAS_OFFICE_CONVERTER:
        raise Http404
    content_type = 'application/json; charset=utf-8'
    try:
        sec_token = request.POST.get('sec_token')
        file_id = request.POST.get('file_id')
        doctype = request.POST.get('doctype')
        raw_path = request.POST.get('raw_path')
    except KeyError:
        return HttpResponseBadRequest('invalid params')
-    if sec_token != do_md5(settings.SECRET_KEY):
+    if not _check_cluster_internal_token(request, file_id):
        return HttpResponseForbidden()
    if len(file_id) != 40:
        return HttpResponseBadRequest('invalid params')
-    resp = add_office_convert_task(file_id, doctype, raw_path, internal=True)
+    return add_office_convert_task(file_id, doctype, raw_path, internal=True)
    return HttpResponse(json.dumps(resp), content_type=content_type)
 def _check_office_convert_perm(request, repo_id, path):
    token = request.GET.get('token', '')
@@ -1324,50 +1319,63 @@ def _check_office_convert_perm(request, repo_id, path):
        return request.user.is_authenticated() and \
            check_folder_permission(request, repo_id, '/') is not None
-@json_response
+def _check_cluster_internal_token(request, file_id):
-def office_convert_query_status(request, internal=False):
+    token = request.META.get('Seafile-Office-Preview-Token', '')
-    if not HAS_OFFICE_CONVERTER:
+    if not token:
-        raise Http404
+        return HttpResponseForbidden()
    try:
        s = '-'.join([file_id, datetime.datetime.now().strftime('%Y%m%d')])
        return signing.Signer().unsign(token) == s
    except signing.BadSignature:
        return False
-    if not internal and not request.is_ajax():
+def _office_convert_get_file_id_internal(request):
-        raise Http404
+    file_id = request.GET.get('file_id', '')
    if len(file_id) != 40:
        raise BadRequestException()
    if not _check_cluster_internal_token(request, file_id):
        raise RequestForbbiddenException()
    return file_id
-    repo_id = request.GET.get('repo_id', '')
+def _office_convert_get_file_id(request, repo_id=None, commit_id=None, path=None):
-    commit_id = request.GET.get('commit_id', '')
+    repo_id = repo_id or request.GET.get('repo_id', '')
-    path = request.GET.get('path', '')
+    commit_id = commit_id or request.GET.get('commit_id', '')
    path = path or request.GET.get('path', '')
    if not (repo_id and path and commit_id):
-        return HttpResponseBadRequest('invalid params')
+        raise BadRequestException()
    page = request.GET.get('page', '')
    doctype = request.GET.get('doctype', None)
    if doctype == 'spreadsheet':
        page = 0
    else:
        try:
            page = int(page)
        except ValueError:
            return HttpResponseBadRequest('invalid params')
    if not _check_office_convert_perm(request, repo_id, path):
-        return HttpResponseForbidden()
+        raise BadRequestException()
-    file_id = seafserv_threaded_rpc.get_file_id_by_commit_and_path(repo_id, commit_id, path)
+    return seafserv_threaded_rpc.get_file_id_by_commit_and_path(repo_id, commit_id, path)
@json_response
 def office_convert_query_status(request, cluster_internal=False):
    if not cluster_internal and not request.is_ajax():
        raise Http404
    doctype = request.GET.get('doctype', None)
    page = 0 if doctype == 'spreadsheet' else int_param(request, 'page')
    if cluster_internal:
        file_id = _office_convert_get_file_id_internal(request)
    else:
        file_id = _office_convert_get_file_id(request)
    ret = {'success': False}
    try:
-        ret = query_office_convert_status(file_id, page, internal=internal)
+        ret = query_office_convert_status(file_id, page, cluster_internal=cluster_internal)
    except Exception, e:
        logging.exception('failed to call query_office_convert_status')
        ret['error'] = str(e)
    return ret
 # Valid static file path inclueds:
 # * 1.page 2.page for pdf/doc/ppt
 # * index.html for spreadsheets and index_html_xxx.png for images embedded in spreadsheets
 _OFFICE_PAGE_PATTERN = re.compile(r'^[\d]+\.page|file\.css|file\.outline|index.html|index_html_.*.png$')
-def office_convert_get_page(request, repo_id, commit_id, path, filename, internal=False):
+def office_convert_get_page(request, repo_id, commit_id, path, filename, cluster_internal=False):
    """Valid static file path inclueds:
    - "1.page" "2.page" for pdf/doc/ppt
    - index.html for spreadsheets and index_html_xxx.png for images embedded in spreadsheets
    """
    if not HAS_OFFICE_CONVERTER:
        raise Http404
@@ -1375,11 +1383,13 @@ def office_convert_get_page(request, repo_id, commit_id, path, filename, interna
        return HttpResponseForbidden()
    path = u'/' + path
-    if not _check_office_convert_perm(request, repo_id, path):
+    if cluster_internal:
-        return HttpResponseForbidden()
+        file_id = _office_convert_get_file_id_internal(request)
    else:
        file_id = _office_convert_get_file_id(request, repo_id, commit_id, path)
-    file_id = seafserv_threaded_rpc.get_file_id_by_commit_and_path(repo_id, commit_id, path)
+    resp = get_office_converted_page(
-    resp = get_office_converted_page(request, filename, file_id, internal=internal)
+        request, repo_id, commit_id, path, filename, file_id, cluster_internal=cluster_internal)
    if filename.endswith('.page'):
        content_type = 'text/html'
    else: