From 5dee3d44dc8ef61d9eeb23daddf1e022dd7d5a2d Mon Sep 17 00:00:00 2001
From: Shuai Lin <linshuai2012@gmail.com>
Date: Fri, 17 Jul 2015 12:20:02 +0800
Subject: [PATCH] handle office convert perm check in cluster mode

---
 seahub/urls.py           |  5 +-
 seahub/utils/__init__.py | 53 +++++++++++++---------
 seahub/utils/http.py     | 31 ++++++++++++-
 seahub/views/file.py     | 98 ++++++++++++++++++++++------------------
 4 files changed, 117 insertions(+), 70 deletions(-)
diff --git a/seahub/urls.py b/seahub/urls.py
index 5ae3472916..1a42362507 100644
--- a/seahub/urls.py
+++ b/seahub/urls.py
@@ -311,8 +311,9 @@ if HAS_OFFICE_CONVERTER:
     if CLUSTER_MODE and OFFICE_CONVERTOR_NODE:
         urlpatterns += patterns('',
             url(r'^office-convert/internal/add-task/$', office_convert_add_task),
-            url(r'^office-convert/internal/status/$', office_convert_query_status, {'internal': True}),
-            url(r'^office-convert/internal/static/(?P<path>.*)$', office_convert_get_page, {'internal': True}),
+            url(r'^office-convert/internal/status/$', office_convert_query_status, {'cluster_internal': True}),
+            url(r'^office-convert/internal/static/(?P<repo_id>[-0-9a-f]{36})/(?P<commit_id>[0-9a-f]{40})/(?P<path>.+)/(?P<filename>[^/].+)$',
+                office_convert_get_page, {'cluster_internal': True}),
         )
 
 if TRAFFIC_STATS_ENABLED:
diff --git a/seahub/utils/__init__.py b/seahub/utils/__init__.py
index e522728ea1..ba6c11e28d 100644
--- a/seahub/utils/__init__.py
+++ b/seahub/utils/__init__.py
@@ -952,6 +952,16 @@ if HAS_OFFICE_CONVERTER:
 
         return office_converter_rpc
 
+    def office_convert_cluster_token(file_id):
+        from django.core import signing
+        s = '-'.join([file_id, datetime.now().strftime('%Y%m%d')])
+        return signing.Signer().sign(s)
+
+    def _office_convert_token_header(file_id):
+        return {
+            'X-Seafile-Office-Preview-Token': office_convert_cluster_token(file_id),
+        }
+
     def cluster_delegate(delegate_func):
         '''usage:
         
@@ -966,44 +976,42 @@ if HAS_OFFICE_CONVERTER:
         '''
         def decorated(func):
             def real_func(*args, **kwargs):
-                internal = kwargs.pop('internal', False)
-                if CLUSTER_MODE and not OFFICE_CONVERTOR_NODE and not internal:
+                cluster_internal = kwargs.pop('cluster_internal', False)
+                if CLUSTER_MODE and not OFFICE_CONVERTOR_NODE and not cluster_internal:
                     return delegate_func(*args)
                 else:
                     return func(*args)
             return real_func
-            
+
         return decorated
 
     def delegate_add_office_convert_task(file_id, doctype, raw_path):
         url = urljoin(OFFICE_CONVERTOR_ROOT, '/office-convert/internal/add-task/')
-        sec_token = do_md5(seahub.settings.SECRET_KEY)
         data = urllib.urlencode({
-            'sec_token': sec_token,
             'file_id': file_id,
             'doctype': doctype,
             'raw_path': raw_path,
         })
-        
-        ret = do_urlopen(url, data=data).read()
-        
+
+        headers = _office_convert_token_header(file_id)
+        ret = do_urlopen(url, data=data, headers=headers).read()
+
         return json.loads(ret)
 
     def delegate_query_office_convert_status(file_id, page):
         url = urljoin(OFFICE_CONVERTOR_ROOT, '/office-convert/internal/status/')
         url += '?file_id=%s&page=%s' % (file_id, page)
-        headers = { 
-            'X-Seafile-Office-Preview-Token': do_md5(file_id + seahub.settings.SECRET_KEY),
-        }
+        headers = _office_convert_token_header(file_id)
         ret = do_urlopen(url, headers=headers).read()
-        
+
         return json.loads(ret)
 
-    def delegate_get_office_converted_page(request, path, file_id):
-        url = urljoin(OFFICE_CONVERTOR_ROOT, '/office-convert/internal/static/' + path)
-        headers = {
-            'X-Seafile-Office-Preview-Token': do_md5(file_id + seahub.settings.SECRET_KEY),
-        }
+    def delegate_get_office_converted_page(request, repo_id, commit_id, path, static_filename, file_id):
+        url = urljoin(OFFICE_CONVERTOR_ROOT,
+                      '/office-convert/internal/static/%s/%s%s/%s' % (
+                          repo_id, commit_id, urlquote(path), urlquote(static_filename)))
+        url += '?file_id=' + file_id
+        headers = _office_convert_token_header(file_id)
         timestamp = request.META.get('HTTP_IF_MODIFIED_SINCE')
         if timestamp:
             headers['If-Modified-Since'] = timestamp
@@ -1023,12 +1031,11 @@ if HAS_OFFICE_CONVERTER:
             content_type = mimetypes.types_map.get(ext, 'application/octet-stream')
 
         resp = HttpResponse(data, content_type=content_type)
-
-        if ret.headers.has_key('last-modified'):
+        if 'last-modified' in ret.headers:
             resp['Last-Modified'] = ret.headers.get('last-modified')
 
         return resp
-        
+
     @cluster_delegate(delegate_add_office_convert_task)
     def add_office_convert_task(file_id, doctype, raw_path):
         rpc = _get_office_converter_rpc()
@@ -1052,8 +1059,10 @@ if HAS_OFFICE_CONVERTER:
         return ret
 
     @cluster_delegate(delegate_get_office_converted_page)
-    def get_office_converted_page(request, path, file_id):
-        return django_static_serve(request, os.path.join(file_id, path), document_root=OFFICE_HTML_DIR)
+    def get_office_converted_page(request, repo_id, commit_id, path, static_filename, file_id):
+        return django_static_serve(request,
+                                   os.path.join(file_id, static_filename),
+                                   document_root=OFFICE_HTML_DIR)
 
     def prepare_converted_html(raw_path, obj_id, doctype, ret_dict):
         try:
diff --git a/seahub/utils/http.py b/seahub/utils/http.py
index 06ac500b22..2dbb5d3f2a 100644
--- a/seahub/utils/http.py
+++ b/seahub/utils/http.py
@@ -5,7 +5,20 @@ import urlparse
 import json
 
 from functools import wraps
-from django.http import HttpResponse
+from django.http import HttpResponse, HttpResponseBadRequest, HttpResponseForbidden
+
+class _HTTPException(Exception):
+    def __init__(self, message=''):
+        self.message = message
+
+    def __str__(self):
+        return '%s: %s' % (self.__class__.__name__, self.message)
+
+class BadRequestException(_HTTPException):
+    pass
+
+class RequestForbbiddenException(_HTTPException):
+    pass
 
 def is_safe_url(url, host=None):
     """
@@ -44,10 +57,24 @@ JSON_CONTENT_TYPE = 'application/json; charset=utf-8'
 def json_response(func):
     @wraps(func)
     def wrapped(*a, **kw):
-        result = func(*a, **kw)
+        try:
+            result = func(*a, **kw)
+        except BadRequestException, e:
+            return HttpResponseBadRequest(e.message)
+        except RequestForbbiddenException, e:
+            return HttpResponseForbidden(e.messages)
         if isinstance(result, HttpResponse):
             return result
         else:
             return HttpResponse(json.dumps(result), status=200,
                                 content_type=JSON_CONTENT_TYPE)
     return wrapped
+
+def int_param(request, key):
+    v = request.GET.get(key, None)
+    if not v:
+        raise BadRequestException()
+    try:
+        return int(v)
+    except ValueError:
+        raise BadRequestException()
diff --git a/seahub/views/file.py b/seahub/views/file.py
index 154634637a..b42961b1f4 100644
--- a/seahub/views/file.py
+++ b/seahub/views/file.py
@@ -15,7 +15,9 @@ import posixpath
 import re
 import mimetypes
 import urlparse
+import datetime
 
+from django.core import signing
 from django.core.cache import cache
 from django.contrib.sites.models import RequestSite
 from django.contrib import messages
@@ -60,12 +62,12 @@ from seahub.utils.file_types import (IMAGE, PDF, DOCUMENT, SPREADSHEET, AUDIO,
                                      MARKDOWN, TEXT, OPENDOCUMENT, VIDEO)
 from seahub.utils.star import is_file_starred
 from seahub.utils import HAS_OFFICE_CONVERTER, FILEEXT_TYPE_MAP
-from seahub.utils.http import json_response
+from seahub.utils.http import json_response, int_param, BadRequestException, RequestForbbiddenException
 from seahub.views import check_folder_permission
 
 if HAS_OFFICE_CONVERTER:
     from seahub.utils import (
-        query_office_convert_status, add_office_convert_task,
+        query_office_convert_status, add_office_convert_task, office_convert_cluster_token,
         prepare_converted_html, OFFICE_PREVIEW_MAX_SIZE, get_office_converted_page
     )
 
@@ -1283,29 +1285,22 @@ def text_diff(request, repo_id):
 ########## office related
 @require_POST
 @csrf_exempt
+@json_response
 def office_convert_add_task(request):
-    if not HAS_OFFICE_CONVERTER:
-        raise Http404
-
-    content_type = 'application/json; charset=utf-8'
-
     try:
-        sec_token = request.POST.get('sec_token')
         file_id = request.POST.get('file_id')
         doctype = request.POST.get('doctype')
         raw_path = request.POST.get('raw_path')
     except KeyError:
         return HttpResponseBadRequest('invalid params')
 
-    if sec_token != do_md5(settings.SECRET_KEY):
+    if not _check_cluster_internal_token(request, file_id):
         return HttpResponseForbidden()
 
     if len(file_id) != 40:
         return HttpResponseBadRequest('invalid params')
 
-    resp = add_office_convert_task(file_id, doctype, raw_path, internal=True)
-
-    return HttpResponse(json.dumps(resp), content_type=content_type)
+    return add_office_convert_task(file_id, doctype, raw_path, internal=True)
 
 def _check_office_convert_perm(request, repo_id, path):
     token = request.GET.get('token', '')
@@ -1324,50 +1319,63 @@ def _check_office_convert_perm(request, repo_id, path):
         return request.user.is_authenticated() and \
             check_folder_permission(request, repo_id, '/') is not None
 
-@json_response
-def office_convert_query_status(request, internal=False):
-    if not HAS_OFFICE_CONVERTER:
-        raise Http404
+def _check_cluster_internal_token(request, file_id):
+    token = request.META.get('Seafile-Office-Preview-Token', '')
+    if not token:
+        return HttpResponseForbidden()
+    try:
+        s = '-'.join([file_id, datetime.datetime.now().strftime('%Y%m%d')])
+        return signing.Signer().unsign(token) == s
+    except signing.BadSignature:
+        return False
 
-    if not internal and not request.is_ajax():
-        raise Http404
+def _office_convert_get_file_id_internal(request):
+    file_id = request.GET.get('file_id', '')
+    if len(file_id) != 40:
+        raise BadRequestException()
+    if not _check_cluster_internal_token(request, file_id):
+        raise RequestForbbiddenException()
+    return file_id
 
-    repo_id = request.GET.get('repo_id', '')
-    commit_id = request.GET.get('commit_id', '')
-    path = request.GET.get('path', '')
+def _office_convert_get_file_id(request, repo_id=None, commit_id=None, path=None):
+    repo_id = repo_id or request.GET.get('repo_id', '')
+    commit_id = commit_id or request.GET.get('commit_id', '')
+    path = path or request.GET.get('path', '')
     if not (repo_id and path and commit_id):
-        return HttpResponseBadRequest('invalid params')
-
-    page = request.GET.get('page', '')
-    doctype = request.GET.get('doctype', None)
-    if doctype == 'spreadsheet':
-        page = 0
-    else:
-        try:
-            page = int(page)
-        except ValueError:
-            return HttpResponseBadRequest('invalid params')
+        raise BadRequestException()
 
     if not _check_office_convert_perm(request, repo_id, path):
-        return HttpResponseForbidden()
+        raise BadRequestException()
 
-    file_id = seafserv_threaded_rpc.get_file_id_by_commit_and_path(repo_id, commit_id, path)
+    return seafserv_threaded_rpc.get_file_id_by_commit_and_path(repo_id, commit_id, path)
+
+@json_response
+def office_convert_query_status(request, cluster_internal=False):
+    if not cluster_internal and not request.is_ajax():
+        raise Http404
+
+    doctype = request.GET.get('doctype', None)
+    page = 0 if doctype == 'spreadsheet' else int_param(request, 'page')
+    if cluster_internal:
+        file_id = _office_convert_get_file_id_internal(request)
+    else:
+        file_id = _office_convert_get_file_id(request)
 
     ret = {'success': False}
     try:
-        ret = query_office_convert_status(file_id, page, internal=internal)
+        ret = query_office_convert_status(file_id, page, cluster_internal=cluster_internal)
     except Exception, e:
         logging.exception('failed to call query_office_convert_status')
         ret['error'] = str(e)
 
     return ret
 
-
-# Valid static file path inclueds:
-# * 1.page 2.page for pdf/doc/ppt
-# * index.html for spreadsheets and index_html_xxx.png for images embedded in spreadsheets
 _OFFICE_PAGE_PATTERN = re.compile(r'^[\d]+\.page|file\.css|file\.outline|index.html|index_html_.*.png$')
-def office_convert_get_page(request, repo_id, commit_id, path, filename, internal=False):
+def office_convert_get_page(request, repo_id, commit_id, path, filename, cluster_internal=False):
+    """Valid static file path inclueds:
+    - "1.page" "2.page" for pdf/doc/ppt
+    - index.html for spreadsheets and index_html_xxx.png for images embedded in spreadsheets
+    """
     if not HAS_OFFICE_CONVERTER:
         raise Http404
 
@@ -1375,11 +1383,13 @@ def office_convert_get_page(request, repo_id, commit_id, path, filename, interna
         return HttpResponseForbidden()
 
     path = u'/' + path
-    if not _check_office_convert_perm(request, repo_id, path):
-        return HttpResponseForbidden()
+    if cluster_internal:
+        file_id = _office_convert_get_file_id_internal(request)
+    else:
+        file_id = _office_convert_get_file_id(request, repo_id, commit_id, path)
 
-    file_id = seafserv_threaded_rpc.get_file_id_by_commit_and_path(repo_id, commit_id, path)
-    resp = get_office_converted_page(request, filename, file_id, internal=internal)
+    resp = get_office_converted_page(
+        request, repo_id, commit_id, path, filename, file_id, cluster_internal=cluster_internal)
     if filename.endswith('.page'):
         content_type = 'text/html'
     else: