1 # -*- coding: iso-8859-1 -*-
3 MoinMoin - convert content in 1.5.8 wiki markup to 1.6.0 style
4 by using a modified 1.5.8 parser as translator.
6 Assuming we have this "renames" map:
7 -------------------------------------------------------
8 'PAGE', 'some_page' -> 'some page'
9 'FILE', 'with%20blank.txt' -> 'with blank.txt'
11 Markup transformations needed:
12 -------------------------------------------------------
13 ["some_page"] -> [[some page]] # renamed
14 [:some_page:some text] -> [[some page|some text]]
15 [:page:text] -> [[page|text]]
16 (with a page not being renamed)
18 attachment:with%20blank.txt -> [[attachment:with blank.txt]]
19 attachment:some_page/with%20blank.txt -> [[attachment:some page/with blank.txt]]
20 The attachment processing should also urllib.unquote the filename (or at
21 least replace %20 by space) and put it into "quotes" if it contains spaces.
25 @license: GNU GPL, see COPYING for details.
30 from MoinMoin import i18n
31 i18n.wikiLanguages = lambda: {}
33 from MoinMoin import config, wikiutil, macro
34 from MoinMoin.action import AttachFile
35 from MoinMoin.Page import Page
36 from MoinMoin.support.python_compatibility import rsplit
38 from text_moin158_wiki import Parser
40 def convert_wiki(request, pagename, intext, renames):
41 """ Convert content written in wiki markup """
43 if not intext.endswith('\r\n'):
46 c = Converter(request, pagename, intext, renames)
47 result = request.redirectedOutput(c.convert, request)
48 if noeol and result.endswith('\r\n'):
53 STONEAGE_IMAGELINK = False # True for ImageLink(target,image), False for ImageLink(image,target)
55 # copied from moin 1.6.0 macro/ImageLink.py (to be safe in case we remove ImageLink some day)
56 # ... and slightly modified/refactored for our needs here.
57 # hint: using parse_quoted_separated from wikiutil does NOT work here, because we do not have
58 # quoted urls when they contain a '=' char in the 1.5 data input.
59 def explore_args(args):
60 """ explore args for positional and keyword parameters """
62 args = args.split(',')
63 args = [arg.strip() for arg in args]
68 kw = {} # keyword args
69 pp = [] # positional parameters
71 kwAllowed = ('width', 'height', 'alt')
75 key, value = arg.split('=', 1)
76 key_lowerstr = str(key.lower())
77 # avoid that urls with "=" are interpreted as keyword
78 if key_lowerstr in kwAllowed:
80 kw[key_lowerstr] = value
81 elif not kw_count and '://' in arg:
82 # assuming that this is the image
87 if STONEAGE_IMAGELINK and len(pp) >= 2:
88 pp[0], pp[1] = pp[1], pp[0]
93 class Converter(Parser):
94 def __init__(self, request, pagename, raw, renames):
95 self.pagename = pagename
97 self.renames = renames
98 self.request = request
102 self.formatting_rules = self.formatting_rules % {'macronames': u'|'.join(['ImageLink', ] + macro.getNames(self.request.cfg))}
105 def return_word(self, word):
107 _emph_repl = return_word
108 _emph_ibb_repl = return_word
109 _emph_ibi_repl = return_word
110 _emph_ib_or_bi_repl = return_word
111 _u_repl = return_word
112 _strike_repl = return_word
113 _sup_repl = return_word
114 _sub_repl = return_word
115 _small_repl = return_word
116 _big_repl = return_word
117 _tt_repl = return_word
118 _tt_bt_repl = return_word
119 _remark_repl = return_word
120 _table_repl = return_word
121 _tableZ_repl = return_word
122 _rule_repl = return_word
123 _smiley_repl = return_word
124 _smileyA_repl = return_word
125 _ent_repl = return_word
126 _ent_numeric_repl = return_word
127 _ent_symbolic_repl = return_word
128 _heading_repl = return_word
129 _email_repl = return_word
130 _notword_repl = return_word
131 _indent_repl = return_word
132 _li_none_repl = return_word
133 _li_repl = return_word
134 _ol_repl = return_word
135 _dl_repl = return_word
136 _comment_repl = return_word
138 # translate pagenames using pagename translation map
140 def _replace(self, key):
141 """ replace a item_name if it is in the renames dict
142 key is either a 2-tuple ('PAGE', pagename)
143 or a 3-tuple ('FILE', pagename, filename)
145 current_page = self.pagename
146 item_type, page_name, file_name = (key + (None, ))[:3]
147 abs_page_name = wikiutil.AbsPageName(current_page, page_name)
148 if item_type == 'PAGE':
149 key = (item_type, abs_page_name)
150 new_name = self.renames.get(key)
152 # we don't have an entry in rename map - apply the same magic
153 # to the page name as 1.5 did (" " -> "_") and try again:
154 abs_magic_name = abs_page_name.replace(u' ', u'_')
155 key = (item_type, abs_magic_name)
156 new_name = self.renames.get(key)
158 # we didn't find it under the magic name either -
159 # that means we do not rename it!
161 if new_name != page_name and abs_page_name != page_name:
162 # we have to fix the (absolute) new_name to be a relative name (as it was before)
163 new_name = wikiutil.RelPageName(current_page, new_name)
164 elif item_type == 'FILE':
165 key = (item_type, abs_page_name, file_name)
166 new_name = self.renames.get(key)
168 # we don't have an entry in rename map - apply the same magic
169 # to the page name as 1.5 did (" " -> "_") and try again:
170 abs_magic_name = abs_page_name.replace(u' ', u'_')
171 key = (item_type, abs_magic_name, file_name)
172 new_name = self.renames.get(key)
174 # we didn't find it under the magic name either -
175 # that means we do not rename it!
179 def _replace_target(self, target):
180 target_and_anchor = rsplit(target, '#', 1)
181 if len(target_and_anchor) > 1:
182 target, anchor = target_and_anchor
183 target = self._replace(('PAGE', target))
184 return '%s#%s' % (target, anchor)
186 target = self._replace(('PAGE', target))
191 def _macro_repl(self, word):
192 # we use [[...]] for links now, macros will be <<...>>
196 (\((?P<macro_args>.*?)\))?
199 word = unicode(word) # XXX why is word not unicode before???
200 m = re.match(macro_rule, word, re.X|re.U)
201 macro_name = m.group('macro_name')
202 macro_args = m.group('macro_args')
203 if macro_name == 'ImageLink':
204 fixed, kw = explore_args(macro_args)
205 #print "macro_args=%r" % macro_args
206 #print "fixed=%r, kw=%r" % (fixed, kw)
207 image, target = (fixed + ['', ''])[:2]
212 if '://' not in image:
213 # if it is not a URL, it is meant as attachment
214 image = u'attachment:%s' % image
217 elif target.startswith('inline:'):
218 target = 'attachment:' + target[7:] # we don't support inline:
219 elif target.startswith('wiki:'):
220 target = target[5:] # drop wiki:
222 alt = kw.get('alt') or ''
223 width = kw.get('width')
224 if width is not None:
225 image_attrs.append(u"width=%s" % width)
226 height = kw.get('height')
227 if height is not None:
228 image_attrs.append(u"height=%s" % height)
229 image_attrs = u", ".join(image_attrs)
231 image_attrs = u'|' + image_attrs
232 if alt or image_attrs:
234 result = u'[[%s|{{%s%s%s}}]]' % (target, image, alt, image_attrs)
237 macro_args = u"(%s)" % macro_args
240 result = u"<<%s%s>>" % (macro_name, macro_args)
241 # XXX later check whether some to be renamed pagename is used as macro param
244 def _word_repl(self, word, text=None):
245 """Handle WikiNames."""
248 else: # internal use:
249 return '[[%s|%s]]' % (word, text)
251 def _wikiname_bracket_repl(self, word):
252 """Handle special-char wikinames."""
253 pagename = word[2:-2]
255 pagename = self._replace(('PAGE', pagename))
256 return '[[%s]]' % pagename
260 def _interwiki_repl(self, word):
261 """Handle InterWiki links."""
262 wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, word)
266 wikiname, pagename = word.split(':', 1)
267 pagename = wikiutil.url_unquote(pagename) # maybe someone has used %20 for blanks in pagename
268 camelcase = wikiutil.isStrictWikiname(pagename)
269 if wikiname in ('Self', self.request.cfg.interwikiname):
270 pagename = self._replace(('PAGE', pagename))
272 return '%s' % pagename # optimize special case
274 return '[[%s]]' % pagename # optimize special case
276 if ' ' in pagename: # we could get a ' ' by urlunquoting
277 return '[[%s:%s]]' % (wikiname, pagename)
279 return '%s:%s' % (wikiname, pagename)
281 def interwiki(self, url_and_text):
282 if len(url_and_text) == 1:
283 url = url_and_text[0]
286 url, text = url_and_text
289 # keep track of whether this is a self-reference, so links
290 # are always shown even the page doesn't exist.
291 scheme, url = url.split(':', 1)
292 wikiname, pagename = wikiutil.split_wiki(url)
293 if (url.startswith(wikiutil.CHILD_PREFIX) or # fancy link to subpage [wiki:/SubPage text]
294 Page(self.request, url).exists()): # fancy link to local page [wiki:LocalPage text]
295 pagename = wikiutil.url_unquote(url)
296 pagename = self._replace_target(pagename)
297 return '[[%s%s]]' % (pagename, text)
298 if wikiname in ('Self', self.request.cfg.interwikiname, ''): # [wiki:Self:LocalPage text] or [:LocalPage:text]
299 pagename = wikiutil.url_unquote(pagename)
300 pagename = self._replace_target(pagename)
301 return '[[%s%s]]' % (pagename, text)
303 wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, url)
304 if wikitag_bad: # likely we got some /InterWiki as wikitail, we don't want that!
305 pagename = wikiutil.url_unquote(pagename)
306 pagename = self._replace_target(pagename)
309 wikitail = wikiutil.url_unquote(wikitail)
312 if wikiutil.isPicture(wikitail):
313 return '{{%s:%s%s}}' % (wikitag, wikitail, text)
315 if ' ' not in wikitail and not text:
316 return '%s:%s' % (wikitag, wikitail)
318 return '[[%s:%s%s]]' % (wikitag, wikitail, text)
320 def attachment(self, url_and_text):
321 """ This gets called on attachment URLs. """
322 if len(url_and_text) == 1:
323 url = url_and_text[0]
326 url, text = url_and_text
329 scheme, fname = url.split(":", 1)
330 #scheme, fname, text = wikiutil.split_wiki(target_and_text)
332 pagename, fname = AttachFile.absoluteName(fname, self.pagename)
333 from_this_page = pagename == self.pagename
334 fname = self._replace(('FILE', pagename, fname))
335 fname = wikiutil.url_unquote(fname)
336 fname = self._replace(('FILE', pagename, fname))
337 pagename = self._replace(('PAGE', pagename))
341 name = "%s/%s" % (pagename, fname)
343 if scheme == 'drawing':
344 return "{{drawing:%s%s}}" % (name, text)
346 # check for image URL, and possibly return IMG tag
347 # (images are always inlined, just like for other URLs)
348 if wikiutil.isPicture(name):
349 return "{{attachment:%s%s}}" % (name, text)
351 # inline the attachment
352 if scheme == 'inline':
353 return '{{attachment:%s%s}}' % (name, text)
355 return '[[attachment:%s%s]]' % (name, text)
357 def _url_repl(self, word):
358 """Handle literal URLs including inline images."""
359 scheme = word.split(":", 1)[0]
362 return self.interwiki([word])
363 if scheme in self.attachment_schemas:
364 return '%s' % self.attachment([word])
366 if wikiutil.isPicture(word): # magic will go away in 1.6!
367 return '{{%s}}' % word # new markup for inline images
371 def _url_bracket_repl(self, word):
372 """Handle bracketed URLs."""
373 word = word[1:-1] # strip brackets
375 # Local extended link?
377 words = word[1:].split(':', 1)
378 link, text = (words + ['', ''])[:2]
379 if link.strip() == text.strip():
381 link = self._replace_target(link)
384 return '[[%s%s]]' % (link, text)
386 # Traditional split on space
387 words = word.split(None, 1)
388 if words[0][0] == '#':
390 link, text = (words + ['', ''])[:2]
391 if link.strip() == text.strip():
393 #link = self._replace_target(link)
396 return '[[%s%s]]' % (link, text)
398 scheme = words[0].split(":", 1)[0]
400 return self.interwiki(words)
401 #scheme, wikiname, pagename, text = self.interwiki(word)
402 #print "%r %r %r %r" % (scheme, wikiname, pagename, text)
403 #if wikiname in ('Self', self.request.cfg.interwikiname, ''):
406 # return '[[%s%s]]' % (pagename, text)
410 # return "[[%s:%s%s]]" % (wikiname, pagename, text)
411 if scheme in self.attachment_schemas:
412 m = self.attachment(words)
413 if m.startswith('{{') and m.endswith('}}'):
414 # with url_bracket markup, 1.5.8 parser does not embed, but link!
415 m = '[[%s]]' % m[2:-2]
418 target, desc = (words + ['', ''])[:2]
419 if wikiutil.isPicture(desc) and re.match(self.url_rule, desc):
420 #return '[[%s|{{%s|%s}}]]' % (words[0], words[1], words[0])
421 return '[[%s|{{%s}}]]' % (target, desc)
425 return '[[%s%s]]' % (target, desc)
427 def _pre_repl(self, word):
429 if w == '{{{' and not self.in_pre:
431 elif w == '}}}' and self.in_pre:
435 def _processor_repl(self, word):
439 def scan(self, scan_re, line):
440 """ Scans one line - append text before match, invoke replace() with match, and add text after match. """
444 for match in scan_re.finditer(line):
445 # Add text before the match
446 if lastpos < match.start():
447 result.append(line[lastpos:match.start()])
448 # Replace match with markup
449 result.append(self.replace(match))
450 lastpos = match.end()
452 # Add remainder of the line
453 result.append(line[lastpos:])
454 return u''.join(result)
457 def replace(self, match):
458 """ Replace match using type name """
460 for _type, hit in match.groupdict().items():
461 if hit is not None and not _type in ["hmarker", ]:
462 # Get replace method and replace hit
463 replace = getattr(self, '_' + _type + '_repl')
465 result.append(replace(hit))
466 return ''.join(result)
468 # We should never get here
470 raise Exception("Can't handle match %r\n%s\n%s" % (
472 pprint.pformat(match.groupdict()),
473 pprint.pformat(match.groups()),
478 def convert(self, request):
479 """ For each line, scan through looking for magic
480 strings, outputting verbatim any intervening text.
482 self.request = request
483 # prepare regex patterns
484 rules = self.formatting_rules.replace('\n', '|')
485 if self.request.cfg.bang_meta:
486 rules = ur'(?P<notword>!%(word_rule)s)|%(rules)s' % {
487 'word_rule': self.word_rule,
490 pre_rules = r'''(?P<pre>\}\}\})'''
491 pre_scan_re = re.compile(pre_rules, re.UNICODE)
492 scan_re = re.compile(rules, re.UNICODE)
493 eol_re = re.compile(r'\r?\n', re.UNICODE)
497 # remove last item because it's guaranteed to be empty
498 self.lines = eol_re.split(rawtext)[:-1]
499 self.in_processing_instructions = True
502 for line in self.lines:
503 # ignore processing instructions
504 if self.in_processing_instructions:
506 for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
507 "#pragma", "#form", "#acl", "#language"):
508 if line.lower().startswith(pi):
509 self.request.write(line + '\r\n')
513 self.in_processing_instructions = False
515 continue # do not parse this line
517 self.request.write(line + '\r\n')
519 # Scan line, format and write
520 scanning_re = self.in_pre and pre_scan_re or scan_re
521 formatted_line = self.scan(scanning_re, line)
522 self.request.write(formatted_line + '\r\n')