comparison MoinMoin/support/pygments/lexers/rdf.py @ 6097:815981fad7fd

upgrade pygments from 1.6 to 2.1.3
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Mon, 05 Sep 2016 23:55:33 +0200
parents
children
comparison
equal deleted inserted replaced
6096:86a41c2bedec 6097:815981fad7fd
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.rdf
4 ~~~~~~~~~~~~~~~~~~~
5
6 Lexers for semantic web and RDF query languages and markup.
7
8 :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import RegexLexer, bygroups, default
15 from pygments.token import Keyword, Punctuation, String, Number, Operator, Generic, \
16 Whitespace, Name, Literal, Comment, Text
17
18 __all__ = ['SparqlLexer', 'TurtleLexer']
19
20
21 class SparqlLexer(RegexLexer):
22 """
23 Lexer for `SPARQL <http://www.w3.org/TR/rdf-sparql-query/>`_ query language.
24
25 .. versionadded:: 2.0
26 """
27 name = 'SPARQL'
28 aliases = ['sparql']
29 filenames = ['*.rq', '*.sparql']
30 mimetypes = ['application/sparql-query']
31
32 # character group definitions ::
33
34 PN_CHARS_BASE_GRP = (u'a-zA-Z'
35 u'\u00c0-\u00d6'
36 u'\u00d8-\u00f6'
37 u'\u00f8-\u02ff'
38 u'\u0370-\u037d'
39 u'\u037f-\u1fff'
40 u'\u200c-\u200d'
41 u'\u2070-\u218f'
42 u'\u2c00-\u2fef'
43 u'\u3001-\ud7ff'
44 u'\uf900-\ufdcf'
45 u'\ufdf0-\ufffd'
46 u'\U00010000-\U000effff')
47
48 PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_')
49
50 PN_CHARS_GRP = (PN_CHARS_U_GRP +
51 r'\-' +
52 r'0-9' +
53 u'\u00b7' +
54 u'\u0300-\u036f' +
55 u'\u203f-\u2040')
56
57 HEX_GRP = '0-9A-Fa-f'
58
59 PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&""()*+,;=/?#@%'
60
61 # terminal productions ::
62
63 PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']'
64
65 PN_CHARS_U = '[' + PN_CHARS_U_GRP + ']'
66
67 PN_CHARS = '[' + PN_CHARS_GRP + ']'
68
69 HEX = '[' + HEX_GRP + ']'
70
71 PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']'
72
73 IRIREF = r'<(?:[^<>"{}|^`\\\x00-\x20])*>'
74
75 BLANK_NODE_LABEL = '_:[0-9' + PN_CHARS_U_GRP + '](?:[' + PN_CHARS_GRP + \
76 '.]*' + PN_CHARS + ')?'
77
78 PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?'
79
80 VARNAME = u'[0-9' + PN_CHARS_U_GRP + '][' + PN_CHARS_U_GRP + \
81 u'0-9\u00b7\u0300-\u036f\u203f-\u2040]*'
82
83 PERCENT = '%' + HEX + HEX
84
85 PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS
86
87 PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')'
88
89 PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' +
90 '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' +
91 PN_CHARS_GRP + ':]|' + PLX + '))?')
92
93 EXPONENT = r'[eE][+-]?\d+'
94
95 # Lexer token definitions ::
96
97 tokens = {
98 'root': [
99 (r'\s+', Text),
100 # keywords ::
101 (r'((?i)select|construct|describe|ask|where|filter|group\s+by|minus|'
102 r'distinct|reduced|from\s+named|from|order\s+by|desc|asc|limit|'
103 r'offset|bindings|load|clear|drop|create|add|move|copy|'
104 r'insert\s+data|delete\s+data|delete\s+where|delete|insert|'
105 r'using\s+named|using|graph|default|named|all|optional|service|'
106 r'silent|bind|union|not\s+in|in|as|having|to|prefix|base)\b', Keyword),
107 (r'(a)\b', Keyword),
108 # IRIs ::
109 ('(' + IRIREF + ')', Name.Label),
110 # blank nodes ::
111 ('(' + BLANK_NODE_LABEL + ')', Name.Label),
112 # # variables ::
113 ('[?$]' + VARNAME, Name.Variable),
114 # prefixed names ::
115 (r'(' + PN_PREFIX + ')?(\:)(' + PN_LOCAL + ')?',
116 bygroups(Name.Namespace, Punctuation, Name.Tag)),
117 # function names ::
118 (r'((?i)str|lang|langmatches|datatype|bound|iri|uri|bnode|rand|abs|'
119 r'ceil|floor|round|concat|strlen|ucase|lcase|encode_for_uri|'
120 r'contains|strstarts|strends|strbefore|strafter|year|month|day|'
121 r'hours|minutes|seconds|timezone|tz|now|md5|sha1|sha256|sha384|'
122 r'sha512|coalesce|if|strlang|strdt|sameterm|isiri|isuri|isblank|'
123 r'isliteral|isnumeric|regex|substr|replace|exists|not\s+exists|'
124 r'count|sum|min|max|avg|sample|group_concat|separator)\b',
125 Name.Function),
126 # boolean literals ::
127 (r'(true|false)', Keyword.Constant),
128 # double literals ::
129 (r'[+\-]?(\d+\.\d*' + EXPONENT + '|\.?\d+' + EXPONENT + ')', Number.Float),
130 # decimal literals ::
131 (r'[+\-]?(\d+\.\d*|\.\d+)', Number.Float),
132 # integer literals ::
133 (r'[+\-]?\d+', Number.Integer),
134 # operators ::
135 (r'(\|\||&&|=|\*|\-|\+|/|!=|<=|>=|!|<|>)', Operator),
136 # punctuation characters ::
137 (r'[(){}.;,:^\[\]]', Punctuation),
138 # line comments ::
139 (r'#[^\n]*', Comment),
140 # strings ::
141 (r'"""', String, 'triple-double-quoted-string'),
142 (r'"', String, 'single-double-quoted-string'),
143 (r"'''", String, 'triple-single-quoted-string'),
144 (r"'", String, 'single-single-quoted-string'),
145 ],
146 'triple-double-quoted-string': [
147 (r'"""', String, 'end-of-string'),
148 (r'[^\\]+', String),
149 (r'\\', String, 'string-escape'),
150 ],
151 'single-double-quoted-string': [
152 (r'"', String, 'end-of-string'),
153 (r'[^"\\\n]+', String),
154 (r'\\', String, 'string-escape'),
155 ],
156 'triple-single-quoted-string': [
157 (r"'''", String, 'end-of-string'),
158 (r'[^\\]+', String),
159 (r'\\', String.Escape, 'string-escape'),
160 ],
161 'single-single-quoted-string': [
162 (r"'", String, 'end-of-string'),
163 (r"[^'\\\n]+", String),
164 (r'\\', String, 'string-escape'),
165 ],
166 'string-escape': [
167 (r'u' + HEX + '{4}', String.Escape, '#pop'),
168 (r'U' + HEX + '{8}', String.Escape, '#pop'),
169 (r'.', String.Escape, '#pop'),
170 ],
171 'end-of-string': [
172 (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)',
173 bygroups(Operator, Name.Function), '#pop:2'),
174 (r'\^\^', Operator, '#pop:2'),
175 default('#pop:2'),
176 ],
177 }
178
179
180 class TurtleLexer(RegexLexer):
181 """
182 Lexer for `Turtle <http://www.w3.org/TR/turtle/>`_ data language.
183
184 .. versionadded:: 2.1
185 """
186 name = 'Turtle'
187 aliases = ['turtle']
188 filenames = ['*.ttl']
189 mimetypes = ['text/turtle', 'application/x-turtle']
190
191 flags = re.IGNORECASE
192
193 patterns = {
194 'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range
195 'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)'
196 }
197
198 # PNAME_NS PN_LOCAL (with simplified character range)
199 patterns['PrefixedName'] = r'%(PNAME_NS)s([a-z][\w-]*)' % patterns
200
201 tokens = {
202 'root': [
203 (r'\s+', Whitespace),
204
205 # Base / prefix
206 (r'(@base|BASE)(\s+)%(IRIREF)s(\s*)(\.?)' % patterns,
207 bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
208 Punctuation)),
209 (r'(@prefix|PREFIX)(\s+)%(PNAME_NS)s(\s+)%(IRIREF)s(\s*)(\.?)' % patterns,
210 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace,
211 Name.Variable, Whitespace, Punctuation)),
212
213 # The shorthand predicate 'a'
214 (r'(?<=\s)a(?=\s)', Keyword.Type),
215
216 # IRIREF
217 (r'%(IRIREF)s' % patterns, Name.Variable),
218
219 # PrefixedName
220 (r'%(PrefixedName)s' % patterns,
221 bygroups(Name.Namespace, Name.Tag)),
222
223 # Comment
224 (r'#[^\n]+', Comment),
225
226 (r'\b(true|false)\b', Literal),
227 (r'[+\-]?\d*\.\d+', Number.Float),
228 (r'[+\-]?\d*(:?\.\d+)?E[+\-]?\d+', Number.Float),
229 (r'[+\-]?\d+', Number.Integer),
230 (r'[\[\](){}.;,:^]', Punctuation),
231
232 (r'"""', String, 'triple-double-quoted-string'),
233 (r'"', String, 'single-double-quoted-string'),
234 (r"'''", String, 'triple-single-quoted-string'),
235 (r"'", String, 'single-single-quoted-string'),
236 ],
237 'triple-double-quoted-string': [
238 (r'"""', String, 'end-of-string'),
239 (r'[^\\]+', String),
240 (r'\\', String, 'string-escape'),
241 ],
242 'single-double-quoted-string': [
243 (r'"', String, 'end-of-string'),
244 (r'[^"\\\n]+', String),
245 (r'\\', String, 'string-escape'),
246 ],
247 'triple-single-quoted-string': [
248 (r"'''", String, 'end-of-string'),
249 (r'[^\\]+', String),
250 (r'\\', String, 'string-escape'),
251 ],
252 'single-single-quoted-string': [
253 (r"'", String, 'end-of-string'),
254 (r"[^'\\\n]+", String),
255 (r'\\', String, 'string-escape'),
256 ],
257 'string-escape': [
258 (r'.', String, '#pop'),
259 ],
260 'end-of-string': [
261
262 (r'(@)([a-zA-Z]+(:?-[a-zA-Z0-9]+)*)',
263 bygroups(Operator, Generic.Emph), '#pop:2'),
264
265 (r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'),
266 (r'(\^\^)%(PrefixedName)s' % patterns,
267 bygroups(Operator, Generic.Emph, Generic.Emph), '#pop:2'),
268
269 default('#pop:2'),
270
271 ],
272 }