Remove obsolete Markdown module
All functionality has been moved to Views, and is now mainly implemented by Haskell's Pandoc library.
- Author
- Maarten 'Vngngdn' Vangeneugden
- Date
- Sept. 28, 2017, 10:27 p.m.
- Hash
- bb491800b73980c262e7e68f93529f36a56a4076
- Parent
- 5a08d014792d6a8417ef684c8e0a22eb6e6ac351
- Modified files
- markdown.py
- views.py
markdown.py ¶
0 additions and 223 deletions.
View changes Hide changes
1 |
- | import pygments |
2 |
- | |
3 |
- | """ So welcome to my Markdown module. Since the markdown library in PyPI is |
4 |
- | fucking shit, I've decided to write my own implementation. Contary to the one in |
5 |
- | PyPI, my version handles **all** cases, and is a **full implementation** of the |
6 |
- | reference. |
7 |
- | |
8 |
- | Oh, and just so you know: You don't need an entire shitty object oriented system |
9 |
- | to make something decent. Sometimes the solution is a function. Period. |
10 |
- | """ |
11 |
- | |
12 |
- | """ |
13 |
- | Checklist about all shit that must be implemented: |
14 |
- | - headers need to have their ID's be the same as the title. BUT! id's |
15 |
- | mustn't have spaces, and need to be unique. The latter isn't that big of a |
16 |
- | deal, but spaces in the header title must be converted to dashes. |
17 |
- | - HTML code needs to be escaped; & must become &, < and > become < |
18 |
- | and > and so on. This isn't necessary for UTF-8 symbols such as ©, |
19 |
- | which can be put in place as is, instead of converting to ©. |
20 |
- | - Some elements have to be placed in the tag itself, such as links in <a />. |
21 |
- | This is noted with the {#} tags. The context in which they are used in the |
22 |
- | defaults should give a good explanation on what number points to what. |
23 |
- | - Remember to support 2 trailing spaces as <br />! |
24 |
- | - There are also "closing ATX headers": "# title" is the same as |
25 |
- | "# title ####" and "# title #". (So it's purely cosmetic, remove the |
26 |
- | trailing whitespace in these cases) |
27 |
- | - When code is used, call Pygments to markup the code properly. If a code |
28 |
- | tag is provided (e.g. "Python", "C", ...), tell that to Pygments as well, |
29 |
- | so it can do a better job. If nothing is provided, leave it as is. When |
30 |
- | it's an inline code block (`CODE`), leave that always as is. |
31 |
- | Look how to do it at |
32 |
- | <http://docs.getpelican.com/en/stable/content.html#syntax-highlighting>. |
33 |
- | |
34 |
- | Future expansions: |
35 |
- | - Allow nesting of more elements. For example: Headers cannot be nested in |
36 |
- | blockquotes, but this is a nice thing to have. |
37 |
- | - Allow headers to follow a line wrapping, if the next line is perceded by |
38 |
- | the same amount of hashtags (=> same header level). |
39 |
- | https://daringfireball.net/projects/markdown/syntax#blockquote |
40 |
- | """ |
41 |
- | |
42 |
- | def toHTML( |
43 |
- | text, |
44 |
- | emphasis = r"<em>{text}</em>", |
45 |
- | strong = r"<strong>{text}</strong>", |
46 |
- | unordered_list = r"<ul>{items}</ul>", |
47 |
- | ordered_list = r"<ol>{items}</ol>", |
48 |
- | list_item = r"<li>{text}</li>", |
49 |
- | hyperlink = r'<a href="{link}" title="{title}">{text}</a>', |
50 |
- | image = r'<img src="{link}" alt="{alt}" title="{title}" />', |
51 |
- | paragraph = r"<p>{text}</p>", |
52 |
- | blockquote = r"<blockquote>{text}</blockquote>", |
53 |
- | header1 = r'<h1 id="{link}">{text}</h1>', |
54 |
- | header2 = r'<h2 id="{link}">{text}</h2>', |
55 |
- | header3 = r'<h3 id="{link}">{text}</h3>', |
56 |
- | header4 = r'<h4 id="{link}">{text}</h4>', |
57 |
- | header5 = r'<h5 id="{link}">{text}</h5>', |
58 |
- | header6 = r'<h6 id="{link}">{text}</h6>', |
59 |
- | inline_code = r'<code>\g<code></code>', |
60 |
- | code = r'<code lang="\g<language>">\g<code></code>', |
61 |
- | incorrect = r"<s>{text}</s>", |
62 |
- | line_break = r"<br />", |
63 |
- | horizontal_rule = r"<hr />", |
64 |
- | ): |
65 |
- | """ Translates Markdown code to HTML code. |
66 |
- | |
67 |
- | This is a pure function. |
68 |
- | |
69 |
- | This function will translate given Markdown code to HTML code. |
70 |
- | It follows the specification as good as possible, with a few custom additions: |
71 |
- | - Incorrect text can be marked with "~" around a text block. |
72 |
- | |
73 |
- | The default parameters have sane defaults, but can be customized if you wish |
74 |
- | to do so. Pay attention to the tags, as your custom value must also |
75 |
- | incorporate these. |
76 |
- | |
77 |
- | The function works in a simple way: |
78 |
- | 1. Replace all redundant content with only 1 unique part |
79 |
- | 1.1. For example: 5 blank lines mean the same as 2; a line with only spaces |
80 |
- | and tabs means the same as an empty line; hashtags at the end of a header |
81 |
- | line are meaningless; ... |
82 |
- | 2. Handle blockquotes. Blockquotes have the highest precedence and can contain |
83 |
- | any other element, thus it's easiest to just handle these as soon as possible. |
84 |
- | 3. Replace Setext with atx-style headers, to provide consistency for header handling. |
85 |
- | 4. Handle block elements (paragraphs, code, ...). |
86 |
- | 5. In all block elements, handle span elements (links, emphasis, ...). |
87 |
- | """ |
88 |
- | |
89 |
- | # Replacing some shit: |
90 |
- | text = re.sub(r"^[ \t]+$", "\n", text) # Make all blank lines consistent |
91 |
- | text = re.sub(r"\n{3}", "\n\n", text) # Replace redundant blanks with 2 blank lines |
92 |
- | |
93 |
- | # XXX: Blockquotes have the highest precedence: **ANYTHING** can be nested |
94 |
- | # in a blockquote. So, handle these first, and convert them up front to |
95 |
- | # make it easier to handle the other text. |
96 |
- | |
97 |
- | |
98 |
- | """ About handling blockquotes: |
99 |
- | Every line that starts with "> " is a blockquote. As long as the next line |
100 |
- | starts in the same way, it's considered part of the same blockquote. |
101 |
- | **However**, there is 1 exception to this rule: |
102 |
- | paragraphs that are hard-wrapped only need 1 > for their first line, but can |
103 |
- | then be hard wrapped, and even start without prior spacing. |
104 |
- | """ |
105 |
- | blockquotes_left = True |
106 |
- | while blockquotes_left: |
107 |
- | blockquote = re.compile(r"(^> .+\n)+") |
108 |
- | quote = blockquote.search(text) |
109 |
- | if quote is None: |
110 |
- | blockquotes_left = False |
111 |
- | else: |
112 |
- | begin, end = quote.span() |
113 |
- | reworked = "<blockquote>" + text[begin:end].replace(r"\n> ", r"\n") + r"</blockquote>\n" |
114 |
- | text = text[:begin] + reworked + text[end:] |
115 |
- | |
116 |
- | # All blockquotes are now removed |
117 |
- | |
118 |
- | # Converting setext to atx headers |
119 |
- | text = re.sub(r"^(?P<title>.+)\n=+$", r"# \g<title>", text, flags=re.MULTILINE) |
120 |
- | text = re.sub(r"^(?P<title>.+)\n-+$", r"## \g<title>", text, flags=re.MULTILINE) |
121 |
- | # All are now converted to atx style headers |
122 |
- | # Transforming headers: |
123 |
- | for i in range(1,7): |
124 |
- | header = r"^#{"+str(i)+r"} (?P<title>.+)$" |
125 |
- | match = re.search(header, text, flags=re.MULTILINE) |
126 |
- | while match is not None: |
127 |
- | future_id = match['title'].lower() |
128 |
- | future_id = re.sub(r"[_,.!]", r"", future_id) |
129 |
- | future_id = re.sub(r" ", r"-", future_id) |
130 |
- | dictionary = match.groupdict() |
131 |
- | dictionary['link'] = future_id |
132 |
- | replacement = (r'<h'+str(i)+r' id="{link}">{title}</h'+str(i)+r'>').format_map(dictionary) |
133 |
- | text = text[:match.start()] + replacement + text[match.end():] |
134 |
- | match = re.search(header, text, flags=re.MULTILINE) |
135 |
- | |
136 |
- | # All headers transformed |
137 |
- | |
138 |
- | # Paragraphs |
139 |
- | text = re.sub(r"(?P<text>(?:^(?!<).+\n)+)", r"<p>\n\g<text></p>", text, flags=re.MULTILINE) |
140 |
- | |
141 |
- | |
142 |
- | # Doing inline hyperlinks |
143 |
- | text = re.sub(r"\[(?P<text>.+?)\]\((?P<url>.+?) \"(?P<title>.+?)\"\)", r'<a href="\g<url>" title="\g<title>">\g<text></a>', text, flags=re.S) |
144 |
- | text = re.sub(r"\[(?P<text>.+?)\]\((?P<url>.+?)\)", r'<a href="\g<url>">\g<text></a>', text, flags=re.S) |
145 |
- | |
146 |
- | # Doing strongs |
147 |
- | text = re.sub(r"\*\*(?P<text>.+?)\*\*", r"<strong>\g<text></strong>", text, flags=re.S) |
148 |
- | text = re.sub(r"__(?P<text>.+?)__", r"<strong>\g<text></strong>", text, flags=re.S) |
149 |
- | # Doing emphasis |
150 |
- | text = re.sub(r"\*(?P<text>.+?)\*", r"<em>\g<text></em>", text, flags=re.S) |
151 |
- | text = re.sub(r"_(?P<text>.+?)_", r"<em>\g<text></em>", text, flags=re.S) |
152 |
- | # Code blocks |
153 |
- | text = re.sub(r"^```(?P<language>.+?)\n(?P<code>.+?)\n```$", code, text, flags=re.S) |
154 |
- | # Doing inline code |
155 |
- | text = re.sub(r"``(?P<code>.+?)``", inline_code, text, flags=re.S) |
156 |
- | text = re.sub(r"`(?P<code>.+?)`", inline_code, text, flags=re.S) |
157 |
- | # Header lines |
158 |
- | text = re.sub(r"^((\*|_|-) *){3,}$", horizontal_rule, text) |
159 |
- | # Line breaks |
160 |
- | text = re.sub(r" $", line_break, text) |
161 |
- | |
162 |
- | return text |
163 |
- | """ |
164 |
- | |
165 |
- | |
166 |
- | |
167 |
- | |
168 |
- | |
169 |
- | block_elements_table = { |
170 |
- | "code": r"```(?P<language>\w+)\n( .*\n)+", |
171 |
- | "blockquote": r"^> (?P<text>.+) |
172 |
- | "paragraph": r"(?P<text>(^.+\n)+)", |
173 |
- | "header": r"^#{1,6} (?P<title>(\w+ ?)+ *) ?#*$", |
174 |
- | |
175 |
- | |
176 |
- | element_table = { |
177 |
- | "emphasis": (r"\*(?P<text>[^\*.]*)\*|_(?P<text>[^\_.]*)_", emphasis, emphasis_end), |
178 |
- | "strong": (r"\*\*(?P<text>[^*.]*)\*\*|__(?P<text>[^\_.]*)__", strong, strong_end), |
179 |
- | "unordered list": (r"") |
180 |
- | "inline link": (r"\[(\w\s)+\]\( |
181 |
- | |
182 |
- | |
183 |
- | def translate(text, begin, end, parameters): |
184 |
- | |
185 |
- | if alpha: # If this contains no more nested elements: |
186 |
- | return begin.format(parameters) + text + end |
187 |
- | elif beta: # text contains nested elements: |
188 |
- | # Find parameters or something IDK |
189 |
- | return begin.format(parameters) + _ |
190 |
- | translate(text[alpha:beta], begin_tag, end_tag, found_parameters) + _ |
191 |
- | end |
192 |
- | |
193 |
- | # Zoom zoom insert magic code here |
194 |
- | |
195 |
- | # NOTE: Hyperlinks are handled specially in Markdown. Check the syntax page |
196 |
- | # for more information. That said, it's imperative to **first** collect all |
197 |
- | # information about hyperlinks, and remove it, so it can be used when |
198 |
- | # parsing hyperlinks. |
199 |
- | |
200 |
- | # Table of all elements and their respective regular expression: |
201 |
- | elements = { |
202 |
- | paragraph: r"", |
203 |
- | ordered_list_item: r"" |
204 |
- | hyperlink: |
205 |
- | header1: r"^# [*\(\n)] \n" |
206 |
- | } |
207 |
- | |
208 |
- | """ |
209 |
- | """ The reason the length is stored instead of the end, is because it is |
210 |
- | less error prone; if a parent node is updated, only the begin needs to be |
211 |
- | updated, as the length is still the same for the node. The begin can be |
212 |
- | relative to the parent node, so even that won't have to be updated. """ |
213 |
- | """ |
214 |
- | node = { |
215 |
- | "type": block_type, |
216 |
- | "begin": begin, |
217 |
- | "length": length, |
218 |
- | "children": children, |
219 |
- | } |
220 |
- | |
221 |
- | return markdown_code |
222 |
- | """ |
223 |
- |
views.py ¶
7 additions and 7 deletions.
View changes Hide changes
1 |
1 |
import subprocess |
2 |
2 |
|
3 |
3 |
from django.shortcuts import get_object_or_404, render # This allows to render the template with the view here. It's pretty cool and important. |
4 |
4 |
from django.http import HttpResponseRedirect, HttpResponse |
5 |
5 |
from django.core.urlresolvers import reverse # Why? |
6 |
6 |
from django.template import loader # This allows to actually load the template. |
7 |
7 |
from django.contrib.auth.decorators import login_required |
8 |
8 |
from django.contrib.auth import authenticate, login |
9 |
9 |
from .models import Post |
10 |
10 |
from django.core.exceptions import ObjectDoesNotExist |
11 |
11 |
from markdown import markdown |
12 |
- | from django.utils import translation |
13 |
12 |
|
14 |
13 |
# FIXME: Remove this template trash. THIS IS A VIEW, NOT A FUCKING TEMPLATE FFS |
15 |
14 |
context = { |
16 |
15 |
'materialDesign_color': "green", |
17 |
16 |
'materialDesign_accentColor': "purple", |
18 |
17 |
'navbar_title': "Blog", |
19 |
18 |
'navbar_fixed': True, |
20 |
19 |
'navbar_backArrow': True, |
21 |
20 |
#'footer_title': "Maarten's blog", |
22 |
21 |
#'footer_description': "My personal scribbly notepad.", |
23 |
22 |
#'footer_links': footer_links, |
24 |
23 |
} |
25 |
24 |
|
26 |
25 |
def get_markdown_text(file_path): |
27 |
- | # TODO: This still uses Pandoc to convert the file in the background to HTML |
28 |
- | # code. That's a pretty bad solution (doesn't mean Pandoc is bad though). |
29 |
- | # Remember to write a custom implementation when there's time available. |
30 |
- | return subprocess.check_output(["pandoc", file_path]) |
31 |
- | |
+ |
26 |
""" Converts the given Markdown formatted file to HTML. |
+ |
27 |
This function directly returns the resulting HTML code. This function uses |
+ |
28 |
the amazing Haskell library Pandoc to convert the file (and takes care |
+ |
29 |
of header id's and all that stuff). |
+ |
30 |
""" |
+ |
31 |
return subprocess.check_output(["pandoc", "--from=markdown", "--to=html", file_path]) |
+ |
32 |
|
32 |
33 |
def get_available_post_languages(post): |
33 |
34 |
""" Returns the language codes for which a blog post exists. This function |
34 |
35 |
always returns English (because that field mustn't be empty). |
35 |
36 |
So say a blog post has an English, Dutch and French version (which means |
36 |
37 |
english_file, french_file and dutch_file aren't empty), the function will return {"en", |
37 |
38 |
"fr", "nl"}. """ |
38 |
39 |
available_languages = {"en"} |
39 |
40 |
if post.german_file is not None: |
40 |
41 |
available_languages.add("de") |
41 |
42 |
if post.spanish_file is not None: |
42 |
43 |
available_languages.add("es") |
43 |
44 |
if post.french_file is not None: |
44 |
45 |
available_languages.add("fr") |
45 |
46 |
if post.dutch_file is not None: |
46 |
47 |
available_languages.add("nl") |
47 |
48 |
return available_languages |
48 |
49 |
|
49 |
50 |
def get_preferred_post_language(post, language): |
50 |
51 |
""" Returns the post language file that best suits the given language. This |
51 |
52 |
is handy if you know what language the user prefers, but aren't sure whether |
52 |
53 |
you can provide that language. This function will try to provide the file |
53 |
54 |
for that language, or return English if that's not possible. """ |
54 |
55 |
if language == "de" and post.german_file is not None: |
55 |
56 |
return post.german_file |
56 |
57 |
if language == "es" and post.spanish_file is not None: |
57 |
58 |
return post.spanish_file |
58 |
59 |
if language == "fr" and post.french_file is not None: |
59 |
60 |
return post.french_file |
60 |
61 |
if language == "nl" and post.dutch_file is not None: |
61 |
62 |
return post.dutch_file |
62 |
63 |
return post.english_file # Returned if all other choices wouldn't be satisfactory, or the requested language is English. |
63 |
64 |
|
64 |
- | |
65 |
65 |
def index(request): |
66 |
66 |
template = "blog/index.html" |
67 |
67 |
posts = Post.objects.all() |
68 |
68 |
language = translation.get_language() |
69 |
69 |
|
70 |
70 |
post_links = [] |
71 |
71 |
for post in posts: |
72 |
72 |
blog_file = get_preferred_post_language(post, language) |
73 |
73 |
# TODO: Find a cleaner way to determine the title. First and foremost: |
74 |
74 |
# If the language differs from English, the other language file needs to |
75 |
75 |
# be loaded. Plus: look for a built in function to remove the full path |
76 |
76 |
# and only return the file name. |
77 |
77 |
title = (blog_file.name.rpartition("/")[2]).rpartition(".")[0] |
78 |
78 |
date = post.published |
79 |
79 |
description = "Lorem ipsum" |
80 |
80 |
# TODO: The link can possibly be reversed in the DTL using the title, which is actually |
81 |
81 |
# a cleaner way to do it. Investigate. |
82 |
82 |
link = reverse("blog-post", args=[str(post)]) |
83 |
83 |
post_links.append([title, date, description, link]) |
84 |
84 |
|
85 |
85 |
context = { |
86 |
86 |
'post_links': post_links, |
87 |
87 |
} |
88 |
88 |
return render(request, template, context) |
89 |
89 |
|
90 |
90 |
def post(request, title): |
91 |
91 |
template = "blog/post.html" |
92 |
92 |
posts = Post.objects.get(english_file=title) |
93 |
93 |
language = translation.get_language() |
94 |
94 |
blog_file = get_preferred_post_language(post, language) |
95 |
95 |
blog_text = markdown(blog_file) |
96 |
96 |
|
97 |
97 |
context = { |
98 |
98 |
'article': blog_text, |
99 |
99 |
'title': blog_file.name, |
100 |
100 |
} |
101 |
101 |
return render(request, template, context) |
102 |
102 |