diff --git a/app/utils/template_filters.py b/app/utils/template_filters.py index a71146fa..099ec136 100644 --- a/app/utils/template_filters.py +++ b/app/utils/template_filters.py @@ -25,7 +25,7 @@ def _normalize_toastui_markdown(text): """Clean up artefacts of Toast UI Editor's WYSIWYG-to-markdown conversion. Toast UI follows CommonMark and over-escapes punctuation when round- - tripping rich text through its editor. Two patterns matter for display: + tripping rich text through its editor. Three patterns matter for display: 1. Line-leading ``\\- `` (also ``\\* ``, ``\\+ ``) prevents Python markdown from parsing the line as a list item. Toast UI emits this when the @@ -34,6 +34,10 @@ def _normalize_toastui_markdown(text): (commas, colons, semicolons, etc.) renders as a literal ``\\,`` in the HTML output. Stripping the backslash is safe because none of those chars carry markdown meaning anywhere in a paragraph. + 3. ``~~text~~`` strikethrough is part of CommonMark/GFM but the Python + ``markdown`` library's ``extra`` extension does not implement it, + so it leaks through as literal ``~~...~~`` text. Convert to inline + ```` HTML, which markdown passes through and bleach allows. """ import re @@ -43,6 +47,9 @@ def _normalize_toastui_markdown(text): text = re.sub(r"^(\s*)\\([\-+*])(\s)", r"\1\2\3", text, flags=re.MULTILINE) # Drop backslashes before punctuation Python markdown does not handle. text = re.sub(r"\\([^" + _PYMD_ESCAPABLE + r"])", r"\1", text) + # Convert ~~strikethrough~~ to .... Non-greedy and same-line + # only so multiple strikethroughs on one line each get their own pair. + text = re.sub(r"~~(.+?)~~", r"\1", text) return text