syntax.py

1
""" This module is responsible for the interaction between Gitar and Pygments.
2
3
In essence, this means that in this module, a file's contents are being parsed
4
to Pygments, which will then return the appropriate HTML output, which can then
5
be directly parsed in a Django template.
6
"""
7
8
from pygments import highlight
9
from pygments.lexers import get_lexer_by_name
10
from pygments.formatters import HtmlFormatter
11
from pygments.lexers import guess_lexer_for_filename
12
13
def code_to_HTML(code, file_name):
14
    """ Turns the given list of code strings in HTML, ready for output.
15
16
    Please note that the lexer that will be used for the syntax coloring, is
17
    determined by the given file name, so assert that the given code comes from
18
    the given file.
19
    Keyword arguments:
20
    code -- A non-empty list of code strings.
21
    file_name -- The name of the file from where the given code originates.
22
    """
23
    # stripall removes whitespace in front, but that also removes file
24
    # indentation.
25
    try:
26
        lexer = guess_lexer_for_filename(file_name, code, stripall=False)
27
        # linenos (line-n°'s) adds line numbering to the front of the output. I'm
28
        # doing that myself, so False.
29
        # cssclass sets the enclosing <div>'s class to the given CSS class name.
30
        formatter = HtmlFormatter(linenos=False, cssclass="source")
31
        #result = highlight(code, lexer, formatter)
32
        unicode_data = decode_to_unicode(highlight(code, lexer, formatter))
33
        return unicode_data
34
    #except pygments.ClassNotFound as exception:
35
    except:
36
        # This happens with files lacking a file extension, and MarkDown files.
37
        # In that case, no lexer should be used, but Simple parse the same code.
38
        return no_syntax(code)
39
40
def no_syntax(data):
41
    """ Decodes a given bytearray to a list of unicode strings."""
42
    decoded_data = data.decode("utf-8")
43
    formatted_data = []
44
    line = ""
45
    for character in decoded_data:
46
        if character != "\n":
47
            line = line + character
48
        else:
49
            formatted_data.append(line)
50
            line = ""
51
    return formatted_data
52
53
54
def decode_to_unicode(data):
55
    """ Decodes a given bytearray to a list of unicode strings."""
56
    #decoded_data = data.decode("utf-8")
57
    decoded_data = data
58
    formatted_data = []
59
    line = ""
60
    for character in decoded_data:
61
        if character != "\n":
62
            line = line + character
63
        else:
64
            formatted_data.append(line)
65
            line = ""
66
    return formatted_data
67