1
0
mirror of https://github.com/haiwen/seahub.git synced 2025-04-27 19:05:16 +00:00

fix Polynomial regular expression used on uncontrolled data (#7656)

* fix Polynomial regular expression used on uncontrolled data

* remove office convert code
This commit is contained in:
lian 2025-03-24 14:26:33 +08:00 committed by GitHub
parent ff7fd0f0d5
commit 213927e1a7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 9 additions and 315 deletions

View File

@ -102,16 +102,6 @@ urlpatterns = [
re_path(r'^repos/(?P<repo_id>[-0-9-a-f]{36})/fileops/move/$', OpMoveView.as_view(), name="api2-fileops-move"),
]
# serve office converter static files
from seahub.utils import HAS_OFFICE_CONVERTER
if HAS_OFFICE_CONVERTER:
urlpatterns += [
path('office-convert/status/', OfficeConvertQueryStatus.as_view()),
]
urlpatterns += [
re_path(r'^office-convert/generate/repos/(?P<repo_id>[-0-9-a-f]{36})/$', OfficeGenerateView.as_view()),
]
from seahub.settings import CLIENT_SSO_VIA_LOCAL_BROWSER
if CLIENT_SSO_VIA_LOCAL_BROWSER:
urlpatterns += [

View File

@ -98,15 +98,13 @@ from seahub.views.file import get_file_view_path_and_perm, send_file_access_msg,
if HAS_FILE_SEARCH or HAS_FILE_SEASEARCH:
from seahub.search.utils import search_files, get_search_repos_map, SEARCH_FILEEXT, ai_search_files, \
RELATED_REPOS_PREFIX, SEARCH_REPOS_LIMIT, RELATED_REPOS_CACHE_TIMEOUT, format_repos
from seahub.utils import HAS_OFFICE_CONVERTER, transfer_repo
if HAS_OFFICE_CONVERTER:
from seahub.utils import query_office_convert_status, prepare_converted_html
from seahub.utils import transfer_repo
import seahub.settings as settings
from seahub.settings import THUMBNAIL_EXTENSION, THUMBNAIL_ROOT, \
FILE_LOCK_EXPIRATION_DAYS, ENABLE_STORAGE_CLASSES, \
STORAGE_CLASS_MAPPING_POLICY, \
ENABLE_RESET_ENCRYPTED_REPO_PASSWORD, SHARE_LINK_EXPIRE_DAYS_MAX, \
SHARE_LINK_EXPIRE_DAYS_MIN, SHARE_LINK_EXPIRE_DAYS_DEFAULT
SHARE_LINK_EXPIRE_DAYS_MIN, SHARE_LINK_EXPIRE_DAYS_DEFAULT
from seahub.subscription.utils import subscription_check
from seahub.organizations.models import OrgAdminSettings, DISABLE_ORG_ENCRYPTED_LIBRARY
from seahub.seadoc.utils import get_seadoc_file_uuid, gen_seadoc_image_parent_path, get_seadoc_asset_upload_link
@ -5099,94 +5097,6 @@ class RepoHistoryChange(APIView):
content_type=json_content_type)
# based on views/file.py::office_convert_query_status
class OfficeConvertQueryStatus(APIView):
authentication_classes = (TokenAuthentication, )
permission_classes = (IsAuthenticated, )
throttle_classes = (UserRateThrottle, )
def get(self, request, format=None):
if not HAS_OFFICE_CONVERTER:
return api_error(status.HTTP_404_NOT_FOUND, 'Office converter not enabled.')
content_type = 'application/json; charset=utf-8'
ret = {'success': False}
file_id = request.GET.get('file_id', '')
if len(file_id) != 40:
ret['error'] = 'invalid param'
else:
try:
d = query_office_convert_status(file_id)
if d.error:
ret['error'] = d.error
else:
ret['success'] = True
ret['status'] = d.status
except Exception as e:
logging.exception('failed to call query_office_convert_status')
ret['error'] = str(e)
return HttpResponse(json.dumps(ret), content_type=content_type)
# based on views/file.py::view_file and views/file.py::handle_document
class OfficeGenerateView(APIView):
authentication_classes = (TokenAuthentication, )
permission_classes = (IsAuthenticated, )
throttle_classes = (UserRateThrottle, )
def get(self, request, repo_id, format=None):
username = request.user.username
# check arguments
repo = get_repo(repo_id)
if not repo:
return api_error(status.HTTP_404_NOT_FOUND, 'Library not found.')
path = request.GET.get('p', '/').rstrip('/')
commit_id = request.GET.get('commit_id', None)
if commit_id:
try:
obj_id = seafserv_threaded_rpc.get_file_id_by_commit_and_path(
repo.id, commit_id, path)
except:
return api_error(status.HTTP_404_NOT_FOUND, 'Revision not found.')
else:
try:
obj_id = seafile_api.get_file_id_by_path(repo_id, path)
except:
return api_error(status.HTTP_404_NOT_FOUND, 'File not found.')
if not obj_id:
return api_error(status.HTTP_404_NOT_FOUND, 'File not found.')
# Check whether user has permission to view file and get file raw path,
# render error page if permission deny.
raw_path, inner_path, user_perm = get_file_view_path_and_perm(request,
repo_id,
obj_id, path)
if not user_perm:
return api_error(status.HTTP_403_FORBIDDEN, 'You do not have permission to view this file.')
u_filename = os.path.basename(path)
filetype, fileext = get_file_type_and_ext(u_filename)
if filetype != DOCUMENT:
return api_error(status.HTTP_400_BAD_REQUEST, 'File is not a convertable document')
ret_dict = {}
if HAS_OFFICE_CONVERTER:
err = prepare_converted_html(raw_path, obj_id, fileext, ret_dict)
# populate return value dict
ret_dict['err'] = err
ret_dict['obj_id'] = obj_id
else:
ret_dict['filetype'] = 'Unknown'
return HttpResponse(json.dumps(ret_dict), status=200, content_type=json_content_type)
class ThumbnailView(APIView):
authentication_classes = (TokenAuthentication, SessionAuthentication)
permission_classes = (IsAuthenticated,)

View File

@ -2,7 +2,7 @@
from seahub.api2.base import APIView
from seahub.api2.utils import json_response
from seahub import settings
from seahub.utils import HAS_OFFICE_CONVERTER, HAS_FILE_SEARCH, is_pro_version, HAS_FILE_SEASEARCH
from seahub.utils import HAS_FILE_SEARCH, is_pro_version, HAS_FILE_SEASEARCH
from constance import config
@ -32,9 +32,6 @@ class ServerInfoView(APIView):
if is_pro_version():
features.append('seafile-pro')
if HAS_OFFICE_CONVERTER:
features.append('office-preview')
if HAS_FILE_SEARCH or HAS_FILE_SEASEARCH:
features.append('file-search')

View File

@ -972,18 +972,6 @@ if getattr(settings, 'ENABLE_LOGIN_SIMPLE_CHECK', False):
re_path(r'^sso-auto-login/', login_simple_check),
]
# serve office converter static files
from seahub.utils import HAS_OFFICE_CONVERTER
if HAS_OFFICE_CONVERTER:
from seahub.views.file import (
office_convert_query_status, office_convert_get_page
)
urlpatterns += [
re_path(r'^office-convert/static/(?P<repo_id>[-0-9a-f]{36})/(?P<commit_id>[0-9a-f]{40})/(?P<path>.+)/(?P<filename>[^/].+)$',
office_convert_get_page, name='office_convert_get_page'),
path('office-convert/status/', office_convert_query_status, name='office_convert_query_status'),
]
if getattr(settings, 'ENABLE_MULTI_ADFS', False):
from seahub.adfs_auth.views import *
urlpatterns += [

View File

@ -120,11 +120,6 @@ def is_db_sqlite3():
IS_DB_SQLITE3 = is_db_sqlite3()
try:
from seahub.settings import OFFICE_CONVERTOR_ROOT
except ImportError:
OFFICE_CONVERTOR_ROOT = ''
from seahub.utils.file_types import *
from seahub.utils.htmldiff import HtmlDiff # used in views/files.py
@ -1154,78 +1149,9 @@ if EVENTS_CONFIG_FILE:
FILE_AUDIT_ENABLED = check_file_audit_enabled()
# office convert related
def check_office_converter_enabled():
if OFFICE_CONVERTOR_ROOT:
return True
return False
HAS_OFFICE_CONVERTER = check_office_converter_enabled()
OFFICE_PREVIEW_MAX_SIZE = 2 * 1024 * 1024
OFFICE_PREVIEW_MAX_PAGES = 50
if HAS_OFFICE_CONVERTER:
import time
import requests
import jwt
def add_office_convert_task(file_id, doctype, raw_path):
payload = {'exp': int(time.time()) + 300, }
token = jwt.encode(payload, seahub.settings.SECRET_KEY, algorithm='HS256')
headers = {"Authorization": "Token %s" % token}
params = {'file_id': file_id, 'doctype': doctype, 'raw_path': raw_path}
url = urljoin(OFFICE_CONVERTOR_ROOT, '/add-task')
requests.get(url, params, headers=headers)
return {'exists': False}
def query_office_convert_status(file_id, doctype):
payload = {'exp': int(time.time()) + 300, }
token = jwt.encode(payload, seahub.settings.SECRET_KEY, algorithm='HS256')
headers = {"Authorization": "Token %s" % token}
params = {'file_id': file_id, 'doctype': doctype}
url = urljoin(OFFICE_CONVERTOR_ROOT, '/query-status')
d = requests.get(url, params, headers=headers)
d = d.json()
ret = {}
if 'error' in d:
ret['error'] = d['error']
ret['status'] = 'ERROR'
else:
ret['success'] = True
ret['status'] = d['status']
return ret
def get_office_converted_page(path, static_filename, file_id):
url = urljoin(OFFICE_CONVERTOR_ROOT, '/get-converted-page')
payload = {'exp': int(time.time()) + 300, }
token = jwt.encode(payload, seahub.settings.SECRET_KEY, algorithm='HS256')
headers = {"Authorization": "Token %s" % token}
params = {'static_filename': static_filename, 'file_id': file_id}
try:
ret = requests.get(url, params, headers=headers)
except urllib.error.HTTPError as e:
raise Exception(e)
content_type = ret.headers.get('content-type', None)
if content_type is None:
dummy, ext = os.path.splitext(os.path.basename(path))
content_type = mimetypes.types_map.get(ext, 'application/octet-stream')
resp = HttpResponse(ret, content_type=content_type)
if 'last-modified' in ret.headers:
resp['Last-Modified'] = ret.headers.get('last-modified')
return resp
def prepare_converted_html(raw_path, obj_id, doctype, ret_dict):
try:
add_office_convert_task(obj_id, doctype, raw_path)
except Exception as e:
logging.exception('failed to add_office_convert_task: %s' % e)
return _('Internal Server Error')
return None
# search realted
HAS_FILE_SEARCH = False
HAS_FILE_SEASEARCH = False

View File

@ -55,7 +55,7 @@ from seahub.utils import render_error, is_org_context, \
get_file_type_and_ext, gen_file_get_url, \
render_permission_error, is_pro_version, is_textual_file, \
EMPTY_SHA1, HtmlDiff, gen_inner_file_get_url, \
get_conf_text_ext, HAS_OFFICE_CONVERTER, PREVIEW_FILEEXT, \
get_conf_text_ext, PREVIEW_FILEEXT, \
normalize_file_path, get_service_url, OFFICE_PREVIEW_MAX_SIZE, \
normalize_cache_key, gen_file_get_url_by_sharelink, gen_file_get_url_new
from seahub.utils.ip import get_remote_ip
@ -78,12 +78,6 @@ from seahub.thumbnail.utils import extract_xmind_image, get_thumbnail_src, \
from seahub.seadoc.utils import get_seadoc_file_uuid, gen_seadoc_access_token, is_seadoc_revision
from seahub.seadoc.models import SeadocRevision
if HAS_OFFICE_CONVERTER:
from seahub.utils import (
query_office_convert_status, get_office_converted_page,
prepare_converted_html,
)
import seahub.settings as settings
from seahub.settings import FILE_ENCODING_LIST, FILE_PREVIEW_MAX_SIZE, \
FILE_ENCODING_TRY_LIST, MEDIA_URL, ENABLE_WATERMARK, \
@ -327,12 +321,7 @@ def handle_textual_file(request, filetype, raw_path, ret_dict):
ret_dict['file_encoding_list'] = file_encoding_list
def handle_document(raw_path, obj_id, fileext, ret_dict):
if HAS_OFFICE_CONVERTER:
err = prepare_converted_html(raw_path, obj_id, fileext, ret_dict)
# populate return value dict
ret_dict['err'] = err
else:
ret_dict['filetype'] = 'Unknown'
ret_dict['filetype'] = 'Unknown'
def handle_spreadsheet(raw_path, obj_id, fileext, ret_dict):
handle_document(raw_path, obj_id, fileext, ret_dict)
@ -410,34 +399,22 @@ def can_preview_file(file_name, file_size, repo):
error_msg = _('The library is encrypted, can not open file online.')
return False, error_msg
if not HAS_OFFICE_CONVERTER and \
not ENABLE_OFFICE_WEB_APP and \
if not ENABLE_OFFICE_WEB_APP and \
not ENABLE_ONLYOFFICE:
error_msg = "File preview unsupported"
return False, error_msg
# priority of view office file is:
# OOS > OnlyOffice > Seafile integrated
# OOS > OnlyOffice
if ENABLE_OFFICE_WEB_APP:
if fileext not in OFFICE_WEB_APP_FILE_EXTENSION:
error_msg = "File preview unsupported"
return False, error_msg
elif ENABLE_ONLYOFFICE:
else:
if fileext not in ONLYOFFICE_FILE_EXTENSION:
error_msg = "File preview unsupported"
return False, error_msg
else:
if not HAS_OFFICE_CONVERTER:
error_msg = "File preview unsupported"
return False, error_msg
# HAS_OFFICE_CONVERTER
if file_size > OFFICE_PREVIEW_MAX_SIZE:
error_msg = _('File size surpasses %s, can not be opened online.') % \
filesizeformat(OFFICE_PREVIEW_MAX_SIZE)
return False, error_msg
else:
# NOT depends on Seafile settings
if filetype not in list(PREVIEW_FILEEXT.keys()):
@ -999,23 +976,15 @@ def view_lib_file(request, repo_id, path):
else:
return_dict['err'] = _('Error when prepare OnlyOffice file preview page.')
if not HAS_OFFICE_CONVERTER:
return_dict['err'] = "File preview unsupported"
return render(request, template, return_dict)
if file_size > OFFICE_PREVIEW_MAX_SIZE:
error_msg = _('File size surpasses %s, can not be opened online.') % \
filesizeformat(OFFICE_PREVIEW_MAX_SIZE)
return_dict['err'] = error_msg
return render(request, template, return_dict)
error_msg = prepare_converted_html(raw_path, file_id, fileext, return_dict)
if error_msg:
return_dict['err'] = error_msg
return render(request, template, return_dict)
send_file_access_msg(request, repo, path, 'web')
return render(request, template, return_dict)
elif getattr(settings, 'ENABLE_CAD', False) and path.endswith('.dwg'):
from seahub.cad.utils import get_cad_dict
@ -1983,92 +1952,6 @@ def text_diff(request, repo_id):
})
########## office related
def _check_office_convert_perm(request, repo_id, path, ret):
token = request.GET.get('token', '')
if not token:
# Work around for the images embedded in excel files
referer = request.headers.get('referer', '')
if referer:
token = urllib.parse.parse_qs(
urllib.parse.urlparse(referer).query).get('token', [''])[0]
if token:
fileshare = FileShare.objects.get_valid_file_link_by_token(token)
if not fileshare or fileshare.repo_id != repo_id:
return False
if fileshare.is_file_share_link() and fileshare.path == path:
return True
if fileshare.is_dir_share_link():
ret['dir_share_path'] = fileshare.path
return True
return False
else:
return request.user.is_authenticated and \
check_folder_permission(request, repo_id, '/') is not None
def _office_convert_get_file_id(request, repo_id=None, commit_id=None, path=None):
repo_id = repo_id or request.GET.get('repo_id', '')
commit_id = commit_id or request.GET.get('commit_id', '')
path = path or request.GET.get('path', '')
if not (repo_id and path and commit_id):
raise BadRequestException()
if '../' in path:
raise BadRequestException()
ret = {'dir_share_path': None}
if not _check_office_convert_perm(request, repo_id, path, ret):
raise BadRequestException()
if ret['dir_share_path']:
path = posixpath.join(ret['dir_share_path'], path.lstrip('/'))
return seafserv_threaded_rpc.get_file_id_by_commit_and_path(repo_id, commit_id, path)
@json_response
def office_convert_query_status(request):
if not request.headers.get('x-requested-with') == 'XMLHttpRequest':
raise Http404
doctype = request.GET.get('doctype', None)
file_id = _office_convert_get_file_id(request)
ret = {'success': False}
try:
ret = query_office_convert_status(file_id, doctype)
except Exception as e:
logging.exception('failed to call query_office_convert_status')
ret['error'] = str(e)
return ret
_OFFICE_PAGE_PATTERN = re.compile(r'^file\.css|file\.outline|index.html|index_html_.*.png|[a-z0-9]+\.pdf$')
def office_convert_get_page(request, repo_id, commit_id, path, filename):
"""Valid static file path inclueds:
- index.html for spreadsheets and index_html_xxx.png for images embedded in spreadsheets
- 77e168722458356507a1f373714aa9b575491f09.pdf
"""
if not HAS_OFFICE_CONVERTER:
raise Http404
if not _OFFICE_PAGE_PATTERN.match(filename):
return HttpResponseForbidden()
path = '/' + path
file_id = _office_convert_get_file_id(request, repo_id, commit_id, path)
if filename.endswith('.pdf'):
filename = "{0}.pdf".format(file_id)
resp = get_office_converted_page(path, filename, file_id)
if filename.endswith('.page'):
content_type = 'text/html'
else:
content_type = mimetypes.guess_type(filename)[0] or 'text/html'
resp['Content-Type'] = content_type
return resp
def view_media_file_via_share_link(request):
image_path = request.GET.get('path', '')
token = request.GET.get('token', '')