From e96b0c0bced2a75b3cbcbde4687e63a17a918974 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E5=81=A5=E8=BE=89?=
 <40563566+mrwangjianhui@users.noreply.github.com>
Date: Thu, 28 Jul 2022 13:40:56 +0800
Subject: [PATCH] repair empty md wiki mode display (#5222)

* repair empty md display

* optimize code
---
 seahub/wiki/views.py | 144 +++++++++++++++++++++++--------------------
 1 file changed, 78 insertions(+), 66 deletions(-)
diff --git a/seahub/wiki/views.py b/seahub/wiki/views.py
index 025ed4e3bb..d4aae19cf9 100644
--- a/seahub/wiki/views.py
+++ b/seahub/wiki/views.py
@@ -28,6 +28,76 @@ from seahub.settings import SERVICE_URL
 logger = logging.getLogger(__name__)
 
 
+def format_markdown_file_content(slug, repo_id, file_path, token, file_response):
+    # Convert a markdown string to HTML and parse the html
+    try:
+        html_content = markdown.markdown(file_response)
+        html_doc = lxml.html.fromstring(html_content)
+    except Exception as err_msg:
+        return '', [], err_msg
+
+    service_url = SERVICE_URL.strip('/')
+
+    # Replace <img> src to wiki mode
+    img_labels = html_doc.xpath('//img')   # Get all <img> labels
+    img_url_re = re.compile(r'^%s/lib/%s/file/.*raw=1$' % (service_url, repo_id))
+    for img in img_labels:
+        img_url = img.attrib.get('src', '')
+        if img_url_re.match(img_url) is not None:
+            img_path = img_url[img_url.find('/file/')+5:img_url.find('?')]
+            new_img_url = '%s/view-image-via-public-wiki/?slug=%s&path=%s' % (service_url, slug, img_path)
+            html_content = html_content.replace(img_url, new_img_url)
+        elif re.compile(r'^\.\./*|^\./').match(img_url):
+            if img_url.startswith('../'):
+                img_path = os.path.join(os.path.dirname(os.path.dirname(file_path)), img_url[3:])
+            else:
+                img_path = os.path.join(os.path.dirname(file_path), img_url[2:])
+            new_img_url = '%s/view-image-via-public-wiki/?slug=%s&path=%s' % (service_url, slug, img_path)
+            html_content = html_content.replace(img_url, new_img_url)
+
+    # Replace <a> href to wiki mode
+    a_labels = html_doc.xpath('//a')  # Get all <a> labels
+    file_url_re = re.compile(r'^%s/lib/%s/file/.*' % (service_url, repo_id))
+    md_url_re = re.compile(r'^%s/lib/%s/file/.*\.md$' % (service_url, repo_id))
+    dir_url_re = re.compile(r'^%s/library/%s/(.*)' % (service_url, repo_id))
+    for a in a_labels:
+        old_href = a.attrib.get('href', '')
+        if file_url_re.match(old_href) is not None:
+            path = old_href[old_href.find('/file/') + 5:].strip('/')
+            if md_url_re.match(old_href) is not None:
+                new_href = '%s/published/%s/%s' % (service_url, slug, path)
+                html_content = html_content.replace(old_href, new_href)
+            else:
+                new_href = '%s/d/%s/files/?p=%s&dl=1' % (service_url, token, path)
+                html_content = html_content.replace(old_href, new_href)
+        elif dir_url_re.match(old_href) is not None:
+            url_path = dir_url_re.match(old_href).groups()[0].strip('/')
+            path = url_path[url_path.find('/'):].strip('/')
+            new_href = '%s/published/%s/%s' % (service_url, slug, path)
+            html_content = html_content.replace(old_href, new_href)
+
+    # Get markdown outlines and format <h> labels
+    outlines = list()
+    for p in html_content.split('\n'):
+        if p.startswith('<h1>') and p.endswith('</h1>'):
+            head = p.replace('<h1>', '<h1 id="user-content-%s">' % p.strip('<h1></h1>'), 1)
+            html_content = html_content.replace(p, head)
+        elif p.startswith('<h2>') and p.endswith('</h2>'):
+            head = p.replace('<h2>', '<h2 id="user-content-%s">' % p.strip('<h2></h2>'), 1)
+            html_content = html_content.replace(p, head)
+            outline = '<div class="outline-h2">' + p.strip('<h2></h2>') + '</div>'
+            outlines.append(mark_safe(outline))
+        elif p.startswith('<h3>') and p.endswith('</h3>'):
+            head = p.replace('<h3>', '<h3 id="user-content-%s">' % p.strip('<h3></h3>'), 1)
+            html_content = html_content.replace(p, head)
+            outline = '<div class="outline-h3">' + p.strip('<h3></h3>') + '</div>'
+            outlines.append(mark_safe(outline))
+
+    file_content = mark_safe(html_content)
+
+    return file_content, outlines, None
+
+
 def slug(request, slug, file_path="home.md"):
     """Show wiki page.
     """
@@ -95,7 +165,7 @@ def slug(request, slug, file_path="home.md"):
     repo = seafile_api.get_repo(wiki.repo_id)
 
     file_content, outlines, latest_contributor, last_modified = '', [], '', 0
-    if is_dir is False:
+    if is_dir is False and file_type == MARKDOWN:
         send_file_access_msg(request, repo, file_path, 'web')
 
         file_name = os.path.basename(file_path)
@@ -111,72 +181,14 @@ def slug(request, slug, file_path="home.md"):
             logger.error(e)
             return render_error(request, _('Internal Server Error'))
 
-        if file_type == MARKDOWN:
-            # Convert a markdown string to HTML
-            try:
-                html_content = markdown.markdown(file_response)
-            except Exception as e:
-                logger.error(e)
-                return render_error(request, _('Internal Server Error'))
+        err_msg = None
+        if file_response:
+            file_content, outlines, err_msg = format_markdown_file_content(
+                slug, wiki.repo_id, file_path, fs.token, file_response)
 
-            # Parse the html and replace image url to wiki mode
-            html_doc = lxml.html.fromstring(html_content)
-            img_elements = html_doc.xpath('//img')   # Get the <img> elements
-            img_url_re = re.compile(r'^%s/lib/%s/file/.*raw=1$' % (SERVICE_URL.strip('/'), repo.id))
-            for img in img_elements:
-                img_url = img.attrib.get('src', '')
-                if img_url_re.match(img_url) is not None:
-                    img_path = img_url[img_url.find('/file/')+5:img_url.find('?')]
-                    new_img_url = '%s/view-image-via-public-wiki/?slug=%s&path=%s' \
-                                  % (SERVICE_URL.strip('/'), slug, img_path)
-                    html_content = html_content.replace(img_url, new_img_url)
-                elif re.compile(r'^\.\./*|^\./').match(img_url):
-                    if img_url.startswith('../'):
-                        img_path = os.path.join(os.path.dirname(os.path.dirname(file_path)), img_url[3:])
-                    else:
-                        img_path = os.path.join(os.path.dirname(file_path), img_url[2:])
-                    new_img_url = '%s/view-image-via-public-wiki/?slug=%s&path=%s' \
-                                  % (SERVICE_URL.strip('/'), slug, img_path)
-                    html_content = html_content.replace(img_url, new_img_url)
-
-            # Replace link url to wiki mode
-            link_elements = html_doc.xpath('//a')  # Get the <a> elements
-            file_link_re = re.compile(r'^%s/lib/%s/file/.*' % (SERVICE_URL.strip('/'), repo.id))
-            md_link_re = re.compile(r'^%s/lib/%s/file/.*\.md$' % (SERVICE_URL.strip('/'), repo.id))
-            dir_link_re = re.compile(r'^%s/library/%s/(.*)' % (SERVICE_URL.strip('/'), repo.id))
-            for link in link_elements:
-                link_url = link.attrib.get('href', '')
-                if file_link_re.match(link_url) is not None:
-                    link_path = link_url[link_url.find('/file/') + 5:].strip('/')
-                    if md_link_re.match(link_url) is not None:
-                        new_md_url = '%s/published/%s/%s' % (SERVICE_URL.strip('/'), slug, link_path)
-                        html_content = html_content.replace(link_url, new_md_url)
-                    else:
-                        new_file_url = '%s/d/%s/files/?p=%s&dl=1' % (SERVICE_URL.strip('/'), fs.token, link_path)
-                        html_content = html_content.replace(link_url, new_file_url)
-                elif dir_link_re.match(link_url) is not None:
-                    link_path = dir_link_re.match(link_url).groups()[0].strip('/')
-                    dir_path = link_path[link_path.find('/'):].strip('/')
-                    new_dir_url = '%s/published/%s/%s' % (SERVICE_URL.strip('/'), slug, dir_path)
-                    html_content = html_content.replace(link_url, new_dir_url)
-
-            # Get markdown outlines and format <h> label
-            for p in html_content.split('\n'):
-                if p.startswith('<h1>') and p.endswith('</h1>'):
-                    head = p.replace('<h1>', '<h1 id="user-content-%s">' % p.strip('<h1></h1>'), 1)
-                    html_content = html_content.replace(p, head)
-                elif p.startswith('<h2>') and p.endswith('</h2>'):
-                    head = p.replace('<h2>', '<h2 id="user-content-%s">' % p.strip('<h2></h2>'), 1)
-                    html_content = html_content.replace(p, head)
-                    outline = '<div class="outline-h2">' + p.strip('<h2></h2>') + '</div>'
-                    outlines.append(mark_safe(outline))
-                elif p.startswith('<h3>') and p.endswith('</h3>'):
-                    head = p.replace('<h3>', '<h3 id="user-content-%s">' % p.strip('<h3></h3>'), 1)
-                    html_content = html_content.replace(p, head)
-                    outline = '<div class="outline-h3">' + p.strip('<h3></h3>') + '</div>'
-                    outlines.append(mark_safe(outline))
-
-            file_content = mark_safe(html_content)
+        if err_msg:
+            logger.error(err_msg)
+            return render_error(request, _('Internal Server Error'))
 
         try:
             dirent = seafile_api.get_dirent_by_path(wiki.repo_id, file_path)