From e96b0c0bced2a75b3cbcbde4687e63a17a918974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E5=81=A5=E8=BE=89?= <40563566+mrwangjianhui@users.noreply.github.com> Date: Thu, 28 Jul 2022 13:40:56 +0800 Subject: [PATCH] repair empty md wiki mode display (#5222) * repair empty md display * optimize code --- seahub/wiki/views.py | 144 +++++++++++++++++++++++-------------------- 1 file changed, 78 insertions(+), 66 deletions(-) diff --git a/seahub/wiki/views.py b/seahub/wiki/views.py index 025ed4e3bb..d4aae19cf9 100644 --- a/seahub/wiki/views.py +++ b/seahub/wiki/views.py @@ -28,6 +28,76 @@ from seahub.settings import SERVICE_URL logger = logging.getLogger(__name__) +def format_markdown_file_content(slug, repo_id, file_path, token, file_response): + # Convert a markdown string to HTML and parse the html + try: + html_content = markdown.markdown(file_response) + html_doc = lxml.html.fromstring(html_content) + except Exception as err_msg: + return '', [], err_msg + + service_url = SERVICE_URL.strip('/') + + # Replace src to wiki mode + img_labels = html_doc.xpath('//img') # Get all labels + img_url_re = re.compile(r'^%s/lib/%s/file/.*raw=1$' % (service_url, repo_id)) + for img in img_labels: + img_url = img.attrib.get('src', '') + if img_url_re.match(img_url) is not None: + img_path = img_url[img_url.find('/file/')+5:img_url.find('?')] + new_img_url = '%s/view-image-via-public-wiki/?slug=%s&path=%s' % (service_url, slug, img_path) + html_content = html_content.replace(img_url, new_img_url) + elif re.compile(r'^\.\./*|^\./').match(img_url): + if img_url.startswith('../'): + img_path = os.path.join(os.path.dirname(os.path.dirname(file_path)), img_url[3:]) + else: + img_path = os.path.join(os.path.dirname(file_path), img_url[2:]) + new_img_url = '%s/view-image-via-public-wiki/?slug=%s&path=%s' % (service_url, slug, img_path) + html_content = html_content.replace(img_url, new_img_url) + + # Replace href to wiki mode + a_labels = html_doc.xpath('//a') # Get all labels + file_url_re = re.compile(r'^%s/lib/%s/file/.*' % (service_url, repo_id)) + md_url_re = re.compile(r'^%s/lib/%s/file/.*\.md$' % (service_url, repo_id)) + dir_url_re = re.compile(r'^%s/library/%s/(.*)' % (service_url, repo_id)) + for a in a_labels: + old_href = a.attrib.get('href', '') + if file_url_re.match(old_href) is not None: + path = old_href[old_href.find('/file/') + 5:].strip('/') + if md_url_re.match(old_href) is not None: + new_href = '%s/published/%s/%s' % (service_url, slug, path) + html_content = html_content.replace(old_href, new_href) + else: + new_href = '%s/d/%s/files/?p=%s&dl=1' % (service_url, token, path) + html_content = html_content.replace(old_href, new_href) + elif dir_url_re.match(old_href) is not None: + url_path = dir_url_re.match(old_href).groups()[0].strip('/') + path = url_path[url_path.find('/'):].strip('/') + new_href = '%s/published/%s/%s' % (service_url, slug, path) + html_content = html_content.replace(old_href, new_href) + + # Get markdown outlines and format labels + outlines = list() + for p in html_content.split('\n'): + if p.startswith('

') and p.endswith('

'): + head = p.replace('

', '

' % p.strip('

'), 1) + html_content = html_content.replace(p, head) + elif p.startswith('

') and p.endswith('

'): + head = p.replace('

', '

' % p.strip('

'), 1) + html_content = html_content.replace(p, head) + outline = '
' + p.strip('

') + '
' + outlines.append(mark_safe(outline)) + elif p.startswith('

') and p.endswith('

'): + head = p.replace('

', '

' % p.strip('

'), 1) + html_content = html_content.replace(p, head) + outline = '
' + p.strip('

') + '
' + outlines.append(mark_safe(outline)) + + file_content = mark_safe(html_content) + + return file_content, outlines, None + + def slug(request, slug, file_path="home.md"): """Show wiki page. """ @@ -95,7 +165,7 @@ def slug(request, slug, file_path="home.md"): repo = seafile_api.get_repo(wiki.repo_id) file_content, outlines, latest_contributor, last_modified = '', [], '', 0 - if is_dir is False: + if is_dir is False and file_type == MARKDOWN: send_file_access_msg(request, repo, file_path, 'web') file_name = os.path.basename(file_path) @@ -111,72 +181,14 @@ def slug(request, slug, file_path="home.md"): logger.error(e) return render_error(request, _('Internal Server Error')) - if file_type == MARKDOWN: - # Convert a markdown string to HTML - try: - html_content = markdown.markdown(file_response) - except Exception as e: - logger.error(e) - return render_error(request, _('Internal Server Error')) + err_msg = None + if file_response: + file_content, outlines, err_msg = format_markdown_file_content( + slug, wiki.repo_id, file_path, fs.token, file_response) - # Parse the html and replace image url to wiki mode - html_doc = lxml.html.fromstring(html_content) - img_elements = html_doc.xpath('//img') # Get the elements - img_url_re = re.compile(r'^%s/lib/%s/file/.*raw=1$' % (SERVICE_URL.strip('/'), repo.id)) - for img in img_elements: - img_url = img.attrib.get('src', '') - if img_url_re.match(img_url) is not None: - img_path = img_url[img_url.find('/file/')+5:img_url.find('?')] - new_img_url = '%s/view-image-via-public-wiki/?slug=%s&path=%s' \ - % (SERVICE_URL.strip('/'), slug, img_path) - html_content = html_content.replace(img_url, new_img_url) - elif re.compile(r'^\.\./*|^\./').match(img_url): - if img_url.startswith('../'): - img_path = os.path.join(os.path.dirname(os.path.dirname(file_path)), img_url[3:]) - else: - img_path = os.path.join(os.path.dirname(file_path), img_url[2:]) - new_img_url = '%s/view-image-via-public-wiki/?slug=%s&path=%s' \ - % (SERVICE_URL.strip('/'), slug, img_path) - html_content = html_content.replace(img_url, new_img_url) - - # Replace link url to wiki mode - link_elements = html_doc.xpath('//a') # Get the
elements - file_link_re = re.compile(r'^%s/lib/%s/file/.*' % (SERVICE_URL.strip('/'), repo.id)) - md_link_re = re.compile(r'^%s/lib/%s/file/.*\.md$' % (SERVICE_URL.strip('/'), repo.id)) - dir_link_re = re.compile(r'^%s/library/%s/(.*)' % (SERVICE_URL.strip('/'), repo.id)) - for link in link_elements: - link_url = link.attrib.get('href', '') - if file_link_re.match(link_url) is not None: - link_path = link_url[link_url.find('/file/') + 5:].strip('/') - if md_link_re.match(link_url) is not None: - new_md_url = '%s/published/%s/%s' % (SERVICE_URL.strip('/'), slug, link_path) - html_content = html_content.replace(link_url, new_md_url) - else: - new_file_url = '%s/d/%s/files/?p=%s&dl=1' % (SERVICE_URL.strip('/'), fs.token, link_path) - html_content = html_content.replace(link_url, new_file_url) - elif dir_link_re.match(link_url) is not None: - link_path = dir_link_re.match(link_url).groups()[0].strip('/') - dir_path = link_path[link_path.find('/'):].strip('/') - new_dir_url = '%s/published/%s/%s' % (SERVICE_URL.strip('/'), slug, dir_path) - html_content = html_content.replace(link_url, new_dir_url) - - # Get markdown outlines and format label - for p in html_content.split('\n'): - if p.startswith('

') and p.endswith('

'): - head = p.replace('

', '

' % p.strip('

'), 1) - html_content = html_content.replace(p, head) - elif p.startswith('

') and p.endswith('

'): - head = p.replace('

', '

' % p.strip('

'), 1) - html_content = html_content.replace(p, head) - outline = '
' + p.strip('

') + '
' - outlines.append(mark_safe(outline)) - elif p.startswith('

') and p.endswith('

'): - head = p.replace('

', '

' % p.strip('

'), 1) - html_content = html_content.replace(p, head) - outline = '
' + p.strip('

') + '
' - outlines.append(mark_safe(outline)) - - file_content = mark_safe(html_content) + if err_msg: + logger.error(err_msg) + return render_error(request, _('Internal Server Error')) try: dirent = seafile_api.get_dirent_by_path(wiki.repo_id, file_path)