From 086c03a068e9cbf6eb30b1d15ca477706c2e586a Mon Sep 17 00:00:00 2001 From: llj Date: Sun, 24 Feb 2013 16:52:10 +0800 Subject: [PATCH] [file encoding] added encoding option & used chardet for Text/Markdown/Sf file --- base/context_processors.py | 3 +- media/css/seahub.css | 10 ++++ settings.py | 1 + templates/file_view.html | 2 +- templates/snippets/file_content_html.html | 21 +++++++- templates/snippets/file_content_js.html | 29 +++++++++++- views.py | 58 ++++++++++++++++------- 7 files changed, 102 insertions(+), 22 deletions(-) diff --git a/base/context_processors.py b/base/context_processors.py index dc9ae46e35..6a4d589c36 100644 --- a/base/context_processors.py +++ b/base/context_processors.py @@ -7,7 +7,7 @@ These are referenced from the setting TEMPLATE_CONTEXT_PROCESSORS and used by RequestContext. """ from settings import SEAFILE_VERSION, SITE_TITLE, SITE_NAME, SITE_BASE, \ - ENABLE_SIGNUP, MAX_FILE_NAME, USE_PDFJS + ENABLE_SIGNUP, MAX_FILE_NAME, USE_PDFJS, FILE_ENCODING_LIST try: from settings import BUSINESS_MODE except ImportError: @@ -38,5 +38,6 @@ def base(request): 'enable_signup': ENABLE_SIGNUP, 'max_file_name': MAX_FILE_NAME, 'use_pdfjs': USE_PDFJS, + 'file_encoding_list': FILE_ENCODING_LIST, } diff --git a/media/css/seahub.css b/media/css/seahub.css index 55c87cbd97..4fe41a0224 100644 --- a/media/css/seahub.css +++ b/media/css/seahub.css @@ -1507,6 +1507,16 @@ textarea:-moz-placeholder {/* for FF */ #file-op button { padding:2px 8px; } +#file-enc-cont { + width:950px; + margin:-20px auto 6px; + text-align:right; +} +#file-enc { + border:1px solid #ddd; + border-radius:2px; + background:#efefef; +} #file-view-tip { height: 150px; padding:10px; diff --git a/settings.py b/settings.py index c926f1ff9c..be29187b2c 100644 --- a/settings.py +++ b/settings.py @@ -146,6 +146,7 @@ ACCOUNT_ACTIVATION_DAYS = 7 # File preview FILE_PREVIEW_MAX_SIZE = 10 * 1024 * 1024 USE_PDFJS = True +FILE_ENCODING_LIST = ['auto', 'utf-8', 'gbk', 'ISO-8859-1', 'ISO-8859-5'] # Avatar AVATAR_STORAGE_DIR = 'avatars' diff --git a/templates/file_view.html b/templates/file_view.html index 28be7cefb1..94232654b2 100644 --- a/templates/file_view.html +++ b/templates/file_view.html @@ -73,7 +73,7 @@ {% if not read_only %} {% if filetype == 'Text' or filetype == 'Markdown' or filetype == 'Sf' %} - + {% endif %} {% endif %} {% endif %} diff --git a/templates/snippets/file_content_html.html b/templates/snippets/file_content_html.html index d30df55d52..93c7476671 100644 --- a/templates/snippets/file_content_html.html +++ b/templates/snippets/file_content_html.html @@ -5,14 +5,31 @@ content of files that can be viewed online shows here. For details please refer to 'snippets/file_content_js.html'. {% endcomment %}
+ {% if filetype == 'Text' or filetype == 'Sf' or filetype == 'Markdown' %} +
+ + +
+ {% endif %} + {% if not err %} - {% if filetype == 'Text' or filetype == 'Sf' %} + {% if filetype == 'Text' or filetype == 'Sf' or filetype == 'Markdown' %} {% ifnotequal file_content None %} {% if filetype == 'Text' %} - {% else %} + {% endif %} + + {% if filetype == 'Sf' %}
{{ file_content|safe }}
{% endif %} + + {% if filetype == 'Markdown' %} +
+ {% endif %} {% endifnotequal %} {% endif %} diff --git a/templates/snippets/file_content_js.html b/templates/snippets/file_content_js.html index 46f84e424b..25b6d8dd48 100644 --- a/templates/snippets/file_content_js.html +++ b/templates/snippets/file_content_js.html @@ -40,8 +40,7 @@ {% if filetype == 'Markdown' %} {% ifnotequal file_content None %} var converter = new Showdown.converter(); - $('#file-view').html('
' + converter.makeHtml('{{ file_content|escapejs }}') + '
'); - $('#md-view').children(':first').css('margin-top', '0'); + $('#md-view').html(converter.makeHtml('{{ file_content|escapejs }}')).children(':first').css('margin-top', '0'); {% endifnotequal %} {% endif %} @@ -168,3 +167,29 @@ $('#file-view').html('

{% trans "This type of file can {% endif %} {% endif %}{# 'if not err' ends here. #} + +{% if filetype == 'Text' or filetype == 'Sf' or filetype == 'Markdown' %} + $('#file-enc').change(function() { + var file_enc = $(this).val(); + var s = location.search; + if (s.indexOf('?') == -1) { + location.search = '?file_enc=' + file_enc; + } else { + if (s.indexOf('file_enc') == -1) { + location.search += '&file_enc=' + file_enc; + } else { + var params = s.substr(1).split('&'); + var param; + for (var i = 0, len = params.length; i < len; i++) { + param = params[i].split('='); + if (param[0] == 'file_enc') { + param[1] = file_enc; + params[i] = param.join('='); + break; + } + } + location.search = '?' + params.join('&'); + } + } + }) +{% endif %} diff --git a/views.py b/views.py index 7b2c20fd46..78efc68c29 100644 --- a/views.py +++ b/views.py @@ -9,6 +9,7 @@ import sys import urllib import urllib2 import logging +import chardet from urllib import quote from django.core.cache import cache from django.core.urlresolvers import reverse @@ -78,7 +79,7 @@ try: DOCUMENT_CONVERTOR_ROOT += '/' except ImportError: DOCUMENT_CONVERTOR_ROOT = None -from settings import FILE_PREVIEW_MAX_SIZE, INIT_PASSWD, USE_PDFJS,\ +from settings import FILE_PREVIEW_MAX_SIZE, INIT_PASSWD, USE_PDFJS, FILE_ENCODING_LIST, \ SEND_EMAIL_ON_ADDING_SYSTEM_MEMBER, SEND_EMAIL_ON_RESETTING_USER_PASSWD try: @@ -1247,6 +1248,7 @@ def repo_view_file(request, repo_id): filename = urllib2.quote(u_filename.encode('utf-8')) comment_open = request.GET.get('comment_open', '') page_from = request.GET.get('from', '') + file_enc = request.GET.get('file_enc', 'auto') commit_id = request.GET.get('commit_id', '') view_history = True if commit_id else False @@ -1304,7 +1306,7 @@ def repo_view_file(request, repo_id): raw_path = gen_file_get_url(token, filename) # get file content - err, file_content, swf_exists, filetype = get_file_content(filetype, raw_path, obj_id, fileext) + err, file_content, swf_exists, filetype = get_file_content(filetype, raw_path, obj_id, fileext, file_enc) img_prev = None img_next = None @@ -1346,6 +1348,7 @@ def repo_view_file(request, repo_id): 'raw_path': raw_path, 'err': err, 'file_content': file_content, + 'file_enc': file_enc, 'swf_exists': swf_exists, 'DOCUMENT_CONVERTOR_ROOT': DOCUMENT_CONVERTOR_ROOT, 'page_from': page_from, @@ -1423,6 +1426,7 @@ def repo_view_file(request, repo_id): 'contacts': contacts, 'err': err, 'file_content': file_content, + 'file_enc': file_enc, "applet_root": get_ccnetapplet_root(), 'groups': groups, 'comments': comments, @@ -1492,10 +1496,13 @@ def file_comment(request): content_type=content_type) -def repo_file_get(raw_path): +def repo_file_get(raw_path, file_enc): err = '' file_content = '' encoding = '' + if file_enc in FILE_ENCODING_LIST and file_enc != 'auto': + encoding = file_enc + try: file_response = urllib2.urlopen(raw_path) if long(file_response.headers['Content-Length']) > FILE_PREVIEW_MAX_SIZE: @@ -1510,23 +1517,37 @@ def repo_file_get(raw_path): err = _(u'URLError: failed to open file online') return err, '', '' else: - try: - u_content = content.decode('utf-8') - encoding = 'utf-8' - except UnicodeDecodeError: - # XXX: file in windows is encoded in gbk + if encoding: try: - u_content = content.decode('gbk') - encoding = 'gbk' + u_content = content.decode(encoding) except UnicodeDecodeError: - err = _(u'Unknown file encoding') + err = _(u'The encoding you chose is not proper.') return err, '', '' + else: + try: + u_content = content.decode('utf-8') + encoding = 'utf-8' + except UnicodeDecodeError: + try: + u_content = content.decode('gbk') + encoding = 'gbk' + except UnicodeDecodeError: + encoding = chardet.detect(content)['encoding'] + if encoding != None: + try: + u_content = content.decode(encoding) + except UnicodeDecodeError: + err = _(u'Unknown file encoding') + return err, '', '' + else: + err = _(u'Unknown file encoding') + return err, '', '' file_content = u_content return err, file_content, encoding -def get_file_content(filetype, raw_path, obj_id, fileext): +def get_file_content(filetype, raw_path, obj_id, fileext, file_enc): err = '' file_content = '' swf_exists = False @@ -1537,7 +1558,7 @@ def get_file_content(filetype, raw_path, obj_id, fileext): file_content['img_w'], file_content['img_h'] = img.size if filetype == 'Text' or filetype == 'Markdown' or filetype == 'Sf': - err, file_content, encoding = repo_file_get(raw_path) + err, file_content, encoding = repo_file_get(raw_path, file_enc) elif filetype == 'Document': if DOCUMENT_CONVERTOR_ROOT: err, swf_exists = flash_prepare(raw_path, obj_id, fileext) @@ -1664,7 +1685,8 @@ def file_edit(request, repo_id): op = 'decrypt' if not op: raw_path = gen_file_get_url(token, filename) - err, file_content, encoding = repo_file_get(raw_path) + file_enc = request.GET.get('file_enc', 'auto') + err, file_content, encoding = repo_file_get(raw_path, file_enc) else: err = _(u'Edit online is not offered for this type of file.') @@ -2581,7 +2603,8 @@ def view_shared_file(request, token): raw_path = gen_file_get_url(access_token, quote_filename) # get file content - err, file_content, swf_exists, filetype = get_file_content(filetype, raw_path, obj_id, fileext) + file_enc = request.GET.get('file_enc', 'auto') + err, file_content, swf_exists, filetype = get_file_content(filetype, raw_path, obj_id, fileext, file_enc) # Increase file shared link view_cnt, this operation should be atomic fileshare = FileShare.objects.get(token=token) @@ -2601,6 +2624,7 @@ def view_shared_file(request, token): 'username': username, 'err': err, 'file_content': file_content, + 'file_enc': file_enc, 'swf_exists': swf_exists, 'DOCUMENT_CONVERTOR_ROOT': DOCUMENT_CONVERTOR_ROOT, }, context_instance=RequestContext(request)) @@ -2683,7 +2707,8 @@ def view_file_via_shared_dir(request, token): # Raw path raw_path = gen_file_get_url(access_token, quote_filename) # get file content - err, file_content, swf_exists, filetype = get_file_content(filetype, raw_path, obj_id, fileext) + file_enc = request.GET.get('file_enc', 'auto') + err, file_content, swf_exists, filetype, encoding = get_file_content(filetype, raw_path, obj_id, fileext, file_enc) zipped = gen_path_link(path, '') @@ -2700,6 +2725,7 @@ def view_file_via_shared_dir(request, token): 'username': username, 'err': err, 'file_content': file_content, + 'file_enc': file_enc, 'swf_exists': swf_exists, 'DOCUMENT_CONVERTOR_ROOT': DOCUMENT_CONVERTOR_ROOT, 'zipped': zipped,