gitar/syntax.py

    syntax.py
	
			""" This module is responsible for the interaction between Gitar and Pygments.

			In essence, this means that in this module, a file's contents are being parsed
		
			to Pygments, which will then return the appropriate HTML output, which can then
		
			be directly parsed in a Django template.
		
			"""
		
			from pygments import highlight
		
			from pygments.lexers import get_lexer_by_name
		
			from pygments.formatters import HtmlFormatter
		
			from pygments.lexers import guess_lexer_for_filename
		
			from pygments.lexers.templates import HtmlDjangoLexer
		
			def code_to_HTML(code, file_name):
		
			    """ Turns the given list of code strings in HTML, ready for output.
		
			    Please note that the lexer that will be used for the syntax coloring, is
		
			    determined by the given file name, so assert that the given code comes from
		
			    the given file.
		
			    Keyword arguments:
		
			    code -- A non-empty list of code strings.
		
			    file_name -- The name of the file from where the given code originates.
		
			    """
		
			    # stripall removes whitespace in front, but that also removes file
		
			    # indentation.
		
			    if code is None:
		
			        return []
		
			    # I have to add this lexer manually because djhtml is not recognized as a
		
			    # filename for the HtmlDjangoLexer
		
			    if file_name.endswith('.djhtml'):
		
			        lexer = HtmlDjangoLexer()
		
			        formatter = HtmlFormatter(linenos=False, cssclass="source")
		
			        unicode_data = decode_to_unicode(highlight(code, lexer, formatter))
		
			        return unicode_data
		
			    try:
		
			        lexer = guess_lexer_for_filename(file_name, code, stripall=False)
		
			        # linenos (line-n°'s) adds line numbering to the front of the output. I'm
		
			        # doing that myself, so False.
		
			        # cssclass sets the enclosing <div>'s class to the given CSS class name.
		
			        formatter = HtmlFormatter(linenos=False, cssclass="source")
		
			        #result = highlight(code, lexer, formatter)
		
			        unicode_data = decode_to_unicode(highlight(code, lexer, formatter))
		
			        return unicode_data
		
			    #except pygments.ClassNotFound as exception:
		
			    except:
		
			        # This happens with files lacking a file extension, and MarkDown files.
		
			        # In that case, no lexer should be used, but Simple parse the same code.
		
			        return no_syntax(code)
		
			def no_syntax(data):
		
			    """ Decodes a given bytearray to a list of unicode strings."""
		
			    if type(data) is str:
		
			        decoded_data = data
		
			    else:
		
			        decoded_data = data.decode("utf-8")
		
			    formatted_data = []
		
			    line = ""
		
			    for character in decoded_data:
		
			        if character != "\n":
		
			            line = line + character
		
			        else:
		
			            formatted_data.append(line)
		
			            line = ""
		
			    return formatted_data
		
			def decode_to_unicode(data):
		
			    """ Decodes a given bytearray to a list of unicode strings."""
		
			    #decoded_data = data.decode("utf-8")
		
			    decoded_data = data
		
			    formatted_data = []
		
			    line = ""
		
			    for character in decoded_data:
		
			        if character != "\n":
		
			            line = line + character
		
			        else:
		
			            formatted_data.append(line)
		
			            line = ""
		
			    return formatted_data

1	""" This module is responsible for the interaction between Gitar and Pygments.
2
3	In essence, this means that in this module, a file's contents are being parsed
4	to Pygments, which will then return the appropriate HTML output, which can then
5	be directly parsed in a Django template.
6	"""
7
8	from pygments import highlight
9	from pygments.lexers import get_lexer_by_name
10	from pygments.formatters import HtmlFormatter
11	from pygments.lexers import guess_lexer_for_filename
12	from pygments.lexers.templates import HtmlDjangoLexer
13
14	def code_to_HTML(code, file_name):
15	""" Turns the given list of code strings in HTML, ready for output.
16
17	Please note that the lexer that will be used for the syntax coloring, is
18	determined by the given file name, so assert that the given code comes from
19	the given file.
20	Keyword arguments:
21	code -- A non-empty list of code strings.
22	file_name -- The name of the file from where the given code originates.
23	"""
24	# stripall removes whitespace in front, but that also removes file
25	# indentation.
26	if code is None:
27	return []
28
29	# I have to add this lexer manually because djhtml is not recognized as a
30	# filename for the HtmlDjangoLexer
31	if file_name.endswith('.djhtml'):
32	lexer = HtmlDjangoLexer()
33	formatter = HtmlFormatter(linenos=False, cssclass="source")
34	unicode_data = decode_to_unicode(highlight(code, lexer, formatter))
35	return unicode_data
36	try:
37	lexer = guess_lexer_for_filename(file_name, code, stripall=False)
38	# linenos (line-n°'s) adds line numbering to the front of the output. I'm
39	# doing that myself, so False.
40	# cssclass sets the enclosing <div>'s class to the given CSS class name.
41	formatter = HtmlFormatter(linenos=False, cssclass="source")
42	#result = highlight(code, lexer, formatter)
43
44	unicode_data = decode_to_unicode(highlight(code, lexer, formatter))
45	return unicode_data
46	#except pygments.ClassNotFound as exception:
47	except:
48	# This happens with files lacking a file extension, and MarkDown files.
49	# In that case, no lexer should be used, but Simple parse the same code.
50	return no_syntax(code)
51
52	def no_syntax(data):
53	""" Decodes a given bytearray to a list of unicode strings."""
54	if type(data) is str:
55	decoded_data = data
56	else:
57	decoded_data = data.decode("utf-8")
58	formatted_data = []
59	line = ""
60	for character in decoded_data:
61	if character != "\n":
62	line = line + character
63	else:
64	formatted_data.append(line)
65	line = ""
66	return formatted_data
67
68
69	def decode_to_unicode(data):
70	""" Decodes a given bytearray to a list of unicode strings."""
71	#decoded_data = data.decode("utf-8")
72	decoded_data = data
73	formatted_data = []
74	line = ""
75	for character in decoded_data:
76	if character != "\n":
77	line = line + character
78	else:
79	formatted_data.append(line)
80	line = ""
81	return formatted_data
82