gitar

syntax.py

1
""" This module is responsible for the interaction between Gitar and Pygments.
2
3
In essence, this means that in this module, a file's contents are being parsed
4
to Pygments, which will then return the appropriate HTML output, which can then
5
be directly parsed in a Django template.
6
"""
7
8
from pygments import highlight
9
from pygments.lexers import get_lexer_by_name
10
from pygments.formatters import HtmlFormatter
11
from pygments.lexers import guess_lexer_for_filename
12
from pygments.lexers.templates import HtmlDjangoLexer
13
14
def code_to_HTML(code, file_name):
15
    """ Turns the given list of code strings in HTML, ready for output.
16
17
    Please note that the lexer that will be used for the syntax coloring, is
18
    determined by the given file name, so assert that the given code comes from
19
    the given file.
20
    Keyword arguments:
21
    code -- A non-empty list of code strings.
22
    file_name -- The name of the file from where the given code originates.
23
    """
24
    # stripall removes whitespace in front, but that also removes file
25
    # indentation.
26
    if code is None:
27
        return []
28
29
    # I have to add this lexer manually because djhtml is not recognized as a
30
    # filename for the HtmlDjangoLexer
31
    if file_name.endswith('.djhtml'):
32
        lexer = HtmlDjangoLexer()
33
        formatter = HtmlFormatter(linenos=False, cssclass="source")
34
        unicode_data = decode_to_unicode(highlight(code, lexer, formatter))
35
        return unicode_data
36
    try:
37
        lexer = guess_lexer_for_filename(file_name, code, stripall=False)
38
        # linenos (line-n°'s) adds line numbering to the front of the output. I'm
39
        # doing that myself, so False.
40
        # cssclass sets the enclosing <div>'s class to the given CSS class name.
41
        formatter = HtmlFormatter(linenos=False, cssclass="source")
42
        #result = highlight(code, lexer, formatter)
43
44
        unicode_data = decode_to_unicode(highlight(code, lexer, formatter))
45
        return unicode_data
46
    #except pygments.ClassNotFound as exception:
47
    except:
48
        # This happens with files lacking a file extension, and MarkDown files.
49
        # In that case, no lexer should be used, but Simple parse the same code.
50
        return no_syntax(code)
51
52
def no_syntax(data):
53
    """ Decodes a given bytearray to a list of unicode strings."""
54
    if type(data) is str:
55
        decoded_data = data
56
    else:
57
        decoded_data = data.decode("utf-8")
58
    formatted_data = []
59
    line = ""
60
    for character in decoded_data:
61
        if character != "\n":
62
            line = line + character
63
        else:
64
            formatted_data.append(line)
65
            line = ""
66
    return formatted_data
67
68
69
def decode_to_unicode(data):
70
    """ Decodes a given bytearray to a list of unicode strings."""
71
    #decoded_data = data.decode("utf-8")
72
    decoded_data = data
73
    formatted_data = []
74
    line = ""
75
    for character in decoded_data:
76
        if character != "\n":
77
            line = line + character
78
        else:
79
            formatted_data.append(line)
80
            line = ""
81
    return formatted_data
82