From 5dee3d44dc8ef61d9eeb23daddf1e022dd7d5a2d Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Fri, 17 Jul 2015 12:20:02 +0800 Subject: [PATCH] handle office convert perm check in cluster mode --- seahub/urls.py | 5 +- seahub/utils/__init__.py | 53 +++++++++++++--------- seahub/utils/http.py | 31 ++++++++++++- seahub/views/file.py | 98 ++++++++++++++++++++++------------------ 4 files changed, 117 insertions(+), 70 deletions(-) diff --git a/seahub/urls.py b/seahub/urls.py index 5ae3472916..1a42362507 100644 --- a/seahub/urls.py +++ b/seahub/urls.py @@ -311,8 +311,9 @@ if HAS_OFFICE_CONVERTER: if CLUSTER_MODE and OFFICE_CONVERTOR_NODE: urlpatterns += patterns('', url(r'^office-convert/internal/add-task/$', office_convert_add_task), - url(r'^office-convert/internal/status/$', office_convert_query_status, {'internal': True}), - url(r'^office-convert/internal/static/(?P.*)$', office_convert_get_page, {'internal': True}), + url(r'^office-convert/internal/status/$', office_convert_query_status, {'cluster_internal': True}), + url(r'^office-convert/internal/static/(?P[-0-9a-f]{36})/(?P[0-9a-f]{40})/(?P.+)/(?P[^/].+)$', + office_convert_get_page, {'cluster_internal': True}), ) if TRAFFIC_STATS_ENABLED: diff --git a/seahub/utils/__init__.py b/seahub/utils/__init__.py index e522728ea1..ba6c11e28d 100644 --- a/seahub/utils/__init__.py +++ b/seahub/utils/__init__.py @@ -952,6 +952,16 @@ if HAS_OFFICE_CONVERTER: return office_converter_rpc + def office_convert_cluster_token(file_id): + from django.core import signing + s = '-'.join([file_id, datetime.now().strftime('%Y%m%d')]) + return signing.Signer().sign(s) + + def _office_convert_token_header(file_id): + return { + 'X-Seafile-Office-Preview-Token': office_convert_cluster_token(file_id), + } + def cluster_delegate(delegate_func): '''usage: @@ -966,44 +976,42 @@ if HAS_OFFICE_CONVERTER: ''' def decorated(func): def real_func(*args, **kwargs): - internal = kwargs.pop('internal', False) - if CLUSTER_MODE and not OFFICE_CONVERTOR_NODE and not internal: + cluster_internal = kwargs.pop('cluster_internal', False) + if CLUSTER_MODE and not OFFICE_CONVERTOR_NODE and not cluster_internal: return delegate_func(*args) else: return func(*args) return real_func - + return decorated def delegate_add_office_convert_task(file_id, doctype, raw_path): url = urljoin(OFFICE_CONVERTOR_ROOT, '/office-convert/internal/add-task/') - sec_token = do_md5(seahub.settings.SECRET_KEY) data = urllib.urlencode({ - 'sec_token': sec_token, 'file_id': file_id, 'doctype': doctype, 'raw_path': raw_path, }) - - ret = do_urlopen(url, data=data).read() - + + headers = _office_convert_token_header(file_id) + ret = do_urlopen(url, data=data, headers=headers).read() + return json.loads(ret) def delegate_query_office_convert_status(file_id, page): url = urljoin(OFFICE_CONVERTOR_ROOT, '/office-convert/internal/status/') url += '?file_id=%s&page=%s' % (file_id, page) - headers = { - 'X-Seafile-Office-Preview-Token': do_md5(file_id + seahub.settings.SECRET_KEY), - } + headers = _office_convert_token_header(file_id) ret = do_urlopen(url, headers=headers).read() - + return json.loads(ret) - def delegate_get_office_converted_page(request, path, file_id): - url = urljoin(OFFICE_CONVERTOR_ROOT, '/office-convert/internal/static/' + path) - headers = { - 'X-Seafile-Office-Preview-Token': do_md5(file_id + seahub.settings.SECRET_KEY), - } + def delegate_get_office_converted_page(request, repo_id, commit_id, path, static_filename, file_id): + url = urljoin(OFFICE_CONVERTOR_ROOT, + '/office-convert/internal/static/%s/%s%s/%s' % ( + repo_id, commit_id, urlquote(path), urlquote(static_filename))) + url += '?file_id=' + file_id + headers = _office_convert_token_header(file_id) timestamp = request.META.get('HTTP_IF_MODIFIED_SINCE') if timestamp: headers['If-Modified-Since'] = timestamp @@ -1023,12 +1031,11 @@ if HAS_OFFICE_CONVERTER: content_type = mimetypes.types_map.get(ext, 'application/octet-stream') resp = HttpResponse(data, content_type=content_type) - - if ret.headers.has_key('last-modified'): + if 'last-modified' in ret.headers: resp['Last-Modified'] = ret.headers.get('last-modified') return resp - + @cluster_delegate(delegate_add_office_convert_task) def add_office_convert_task(file_id, doctype, raw_path): rpc = _get_office_converter_rpc() @@ -1052,8 +1059,10 @@ if HAS_OFFICE_CONVERTER: return ret @cluster_delegate(delegate_get_office_converted_page) - def get_office_converted_page(request, path, file_id): - return django_static_serve(request, os.path.join(file_id, path), document_root=OFFICE_HTML_DIR) + def get_office_converted_page(request, repo_id, commit_id, path, static_filename, file_id): + return django_static_serve(request, + os.path.join(file_id, static_filename), + document_root=OFFICE_HTML_DIR) def prepare_converted_html(raw_path, obj_id, doctype, ret_dict): try: diff --git a/seahub/utils/http.py b/seahub/utils/http.py index 06ac500b22..2dbb5d3f2a 100644 --- a/seahub/utils/http.py +++ b/seahub/utils/http.py @@ -5,7 +5,20 @@ import urlparse import json from functools import wraps -from django.http import HttpResponse +from django.http import HttpResponse, HttpResponseBadRequest, HttpResponseForbidden + +class _HTTPException(Exception): + def __init__(self, message=''): + self.message = message + + def __str__(self): + return '%s: %s' % (self.__class__.__name__, self.message) + +class BadRequestException(_HTTPException): + pass + +class RequestForbbiddenException(_HTTPException): + pass def is_safe_url(url, host=None): """ @@ -44,10 +57,24 @@ JSON_CONTENT_TYPE = 'application/json; charset=utf-8' def json_response(func): @wraps(func) def wrapped(*a, **kw): - result = func(*a, **kw) + try: + result = func(*a, **kw) + except BadRequestException, e: + return HttpResponseBadRequest(e.message) + except RequestForbbiddenException, e: + return HttpResponseForbidden(e.messages) if isinstance(result, HttpResponse): return result else: return HttpResponse(json.dumps(result), status=200, content_type=JSON_CONTENT_TYPE) return wrapped + +def int_param(request, key): + v = request.GET.get(key, None) + if not v: + raise BadRequestException() + try: + return int(v) + except ValueError: + raise BadRequestException() diff --git a/seahub/views/file.py b/seahub/views/file.py index 154634637a..b42961b1f4 100644 --- a/seahub/views/file.py +++ b/seahub/views/file.py @@ -15,7 +15,9 @@ import posixpath import re import mimetypes import urlparse +import datetime +from django.core import signing from django.core.cache import cache from django.contrib.sites.models import RequestSite from django.contrib import messages @@ -60,12 +62,12 @@ from seahub.utils.file_types import (IMAGE, PDF, DOCUMENT, SPREADSHEET, AUDIO, MARKDOWN, TEXT, OPENDOCUMENT, VIDEO) from seahub.utils.star import is_file_starred from seahub.utils import HAS_OFFICE_CONVERTER, FILEEXT_TYPE_MAP -from seahub.utils.http import json_response +from seahub.utils.http import json_response, int_param, BadRequestException, RequestForbbiddenException from seahub.views import check_folder_permission if HAS_OFFICE_CONVERTER: from seahub.utils import ( - query_office_convert_status, add_office_convert_task, + query_office_convert_status, add_office_convert_task, office_convert_cluster_token, prepare_converted_html, OFFICE_PREVIEW_MAX_SIZE, get_office_converted_page ) @@ -1283,29 +1285,22 @@ def text_diff(request, repo_id): ########## office related @require_POST @csrf_exempt +@json_response def office_convert_add_task(request): - if not HAS_OFFICE_CONVERTER: - raise Http404 - - content_type = 'application/json; charset=utf-8' - try: - sec_token = request.POST.get('sec_token') file_id = request.POST.get('file_id') doctype = request.POST.get('doctype') raw_path = request.POST.get('raw_path') except KeyError: return HttpResponseBadRequest('invalid params') - if sec_token != do_md5(settings.SECRET_KEY): + if not _check_cluster_internal_token(request, file_id): return HttpResponseForbidden() if len(file_id) != 40: return HttpResponseBadRequest('invalid params') - resp = add_office_convert_task(file_id, doctype, raw_path, internal=True) - - return HttpResponse(json.dumps(resp), content_type=content_type) + return add_office_convert_task(file_id, doctype, raw_path, internal=True) def _check_office_convert_perm(request, repo_id, path): token = request.GET.get('token', '') @@ -1324,50 +1319,63 @@ def _check_office_convert_perm(request, repo_id, path): return request.user.is_authenticated() and \ check_folder_permission(request, repo_id, '/') is not None -@json_response -def office_convert_query_status(request, internal=False): - if not HAS_OFFICE_CONVERTER: - raise Http404 +def _check_cluster_internal_token(request, file_id): + token = request.META.get('Seafile-Office-Preview-Token', '') + if not token: + return HttpResponseForbidden() + try: + s = '-'.join([file_id, datetime.datetime.now().strftime('%Y%m%d')]) + return signing.Signer().unsign(token) == s + except signing.BadSignature: + return False - if not internal and not request.is_ajax(): - raise Http404 +def _office_convert_get_file_id_internal(request): + file_id = request.GET.get('file_id', '') + if len(file_id) != 40: + raise BadRequestException() + if not _check_cluster_internal_token(request, file_id): + raise RequestForbbiddenException() + return file_id - repo_id = request.GET.get('repo_id', '') - commit_id = request.GET.get('commit_id', '') - path = request.GET.get('path', '') +def _office_convert_get_file_id(request, repo_id=None, commit_id=None, path=None): + repo_id = repo_id or request.GET.get('repo_id', '') + commit_id = commit_id or request.GET.get('commit_id', '') + path = path or request.GET.get('path', '') if not (repo_id and path and commit_id): - return HttpResponseBadRequest('invalid params') - - page = request.GET.get('page', '') - doctype = request.GET.get('doctype', None) - if doctype == 'spreadsheet': - page = 0 - else: - try: - page = int(page) - except ValueError: - return HttpResponseBadRequest('invalid params') + raise BadRequestException() if not _check_office_convert_perm(request, repo_id, path): - return HttpResponseForbidden() + raise BadRequestException() - file_id = seafserv_threaded_rpc.get_file_id_by_commit_and_path(repo_id, commit_id, path) + return seafserv_threaded_rpc.get_file_id_by_commit_and_path(repo_id, commit_id, path) + +@json_response +def office_convert_query_status(request, cluster_internal=False): + if not cluster_internal and not request.is_ajax(): + raise Http404 + + doctype = request.GET.get('doctype', None) + page = 0 if doctype == 'spreadsheet' else int_param(request, 'page') + if cluster_internal: + file_id = _office_convert_get_file_id_internal(request) + else: + file_id = _office_convert_get_file_id(request) ret = {'success': False} try: - ret = query_office_convert_status(file_id, page, internal=internal) + ret = query_office_convert_status(file_id, page, cluster_internal=cluster_internal) except Exception, e: logging.exception('failed to call query_office_convert_status') ret['error'] = str(e) return ret - -# Valid static file path inclueds: -# * 1.page 2.page for pdf/doc/ppt -# * index.html for spreadsheets and index_html_xxx.png for images embedded in spreadsheets _OFFICE_PAGE_PATTERN = re.compile(r'^[\d]+\.page|file\.css|file\.outline|index.html|index_html_.*.png$') -def office_convert_get_page(request, repo_id, commit_id, path, filename, internal=False): +def office_convert_get_page(request, repo_id, commit_id, path, filename, cluster_internal=False): + """Valid static file path inclueds: + - "1.page" "2.page" for pdf/doc/ppt + - index.html for spreadsheets and index_html_xxx.png for images embedded in spreadsheets + """ if not HAS_OFFICE_CONVERTER: raise Http404 @@ -1375,11 +1383,13 @@ def office_convert_get_page(request, repo_id, commit_id, path, filename, interna return HttpResponseForbidden() path = u'/' + path - if not _check_office_convert_perm(request, repo_id, path): - return HttpResponseForbidden() + if cluster_internal: + file_id = _office_convert_get_file_id_internal(request) + else: + file_id = _office_convert_get_file_id(request, repo_id, commit_id, path) - file_id = seafserv_threaded_rpc.get_file_id_by_commit_and_path(repo_id, commit_id, path) - resp = get_office_converted_page(request, filename, file_id, internal=internal) + resp = get_office_converted_page( + request, repo_id, commit_id, path, filename, file_id, cluster_internal=cluster_internal) if filename.endswith('.page'): content_type = 'text/html' else: