changeset 151:226b379bde22

added missing HTML entities to the text_html_text_x_moin converter imported from: moin--main--1.5--patch-153
author Florian Festi <Florian.Festi@trick.informatik.uni-stuttgart.de>
date Fri, 28 Oct 2005 12:21:11 +0000
parents 3f6de4cba30b
children faf3bc1c91a3
files MoinMoin/converter/text_html_text_x_moin.py
diffstat 1 files changed, 282 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/converter/text_html_text_x_moin.py	Fri Oct 28 11:04:05 2005 +0000
+++ b/MoinMoin/converter/text_html_text_x_moin.py	Fri Oct 28 12:21:11 2005 +0000
@@ -113,6 +113,282 @@
 <!ENTITY yacute "&#253;"> <!-- latin small letter y with acute, U+00FD ISOlat1 -->
 <!ENTITY thorn  "&#254;"> <!-- latin small letter thorn, U+00FE ISOlat1 -->
 <!ENTITY yuml   "&#255;"> <!-- latin small letter y with diaeresis, U+00FF ISOlat1 -->
+
+<!-- Latin Extended-B -->
+<!ENTITY fnof     "&#402;"> <!-- latin small f with hook = function                                    = florin, U+0192 ISOtech -->
+
+<!-- Greek -->
+<!ENTITY Alpha    "&#913;"> <!-- greek capital letter alpha, U+0391 -->
+<!ENTITY Beta     "&#914;"> <!-- greek capital letter beta, U+0392 -->
+<!ENTITY Gamma    "&#915;"> <!-- greek capital letter gamma,
+                                    U+0393 ISOgrk3 -->
+<!ENTITY Delta    "&#916;"> <!-- greek capital letter delta,
+                                    U+0394 ISOgrk3 -->
+<!ENTITY Epsilon  "&#917;"> <!-- greek capital letter epsilon, U+0395 -->
+<!ENTITY Zeta     "&#918;"> <!-- greek capital letter zeta, U+0396 -->
+<!ENTITY Eta      "&#919;"> <!-- greek capital letter eta, U+0397 -->
+<!ENTITY Theta    "&#920;"> <!-- greek capital letter theta,
+                                    U+0398 ISOgrk3 -->
+<!ENTITY Iota     "&#921;"> <!-- greek capital letter iota, U+0399 -->
+<!ENTITY Kappa    "&#922;"> <!-- greek capital letter kappa, U+039A -->
+<!ENTITY Lambda   "&#923;"> <!-- greek capital letter lambda,
+                                    U+039B ISOgrk3 -->
+<!ENTITY Mu       "&#924;"> <!-- greek capital letter mu, U+039C -->
+<!ENTITY Nu       "&#925;"> <!-- greek capital letter nu, U+039D -->
+<!ENTITY Xi       "&#926;"> <!-- greek capital letter xi, U+039E ISOgrk3 -->
+<!ENTITY Omicron  "&#927;"> <!-- greek capital letter omicron, U+039F -->
+<!ENTITY Pi       "&#928;"> <!-- greek capital letter pi, U+03A0 ISOgrk3 -->
+<!ENTITY Rho      "&#929;"> <!-- greek capital letter rho, U+03A1 -->
+<!-- there is no Sigmaf, and no U+03A2 character either -->
+<!ENTITY Sigma    "&#931;"> <!-- greek capital letter sigma,
+                                    U+03A3 ISOgrk3 -->
+<!ENTITY Tau      "&#932;"> <!-- greek capital letter tau, U+03A4 -->
+<!ENTITY Upsilon  "&#933;"> <!-- greek capital letter upsilon,
+                                    U+03A5 ISOgrk3 -->
+<!ENTITY Phi      "&#934;"> <!-- greek capital letter phi,
+                                    U+03A6 ISOgrk3 -->
+<!ENTITY Chi      "&#935;"> <!-- greek capital letter chi, U+03A7 -->
+<!ENTITY Psi      "&#936;"> <!-- greek capital letter psi,
+                                    U+03A8 ISOgrk3 -->
+<!ENTITY Omega    "&#937;"> <!-- greek capital letter omega,
+                                    U+03A9 ISOgrk3 -->
+
+<!ENTITY alpha    "&#945;"> <!-- greek small letter alpha,
+                                    U+03B1 ISOgrk3 -->
+<!ENTITY beta     "&#946;"> <!-- greek small letter beta, U+03B2 ISOgrk3 -->
+<!ENTITY gamma    "&#947;"> <!-- greek small letter gamma,
+                                    U+03B3 ISOgrk3 -->
+<!ENTITY delta    "&#948;"> <!-- greek small letter delta,
+                                    U+03B4 ISOgrk3 -->
+<!ENTITY epsilon  "&#949;"> <!-- greek small letter epsilon,
+                                    U+03B5 ISOgrk3 -->
+<!ENTITY zeta     "&#950;"> <!-- greek small letter zeta, U+03B6 ISOgrk3 -->
+<!ENTITY eta      "&#951;"> <!-- greek small letter eta, U+03B7 ISOgrk3 -->
+<!ENTITY theta    "&#952;"> <!-- greek small letter theta,
+                                    U+03B8 ISOgrk3 -->
+<!ENTITY iota     "&#953;"> <!-- greek small letter iota, U+03B9 ISOgrk3 -->
+<!ENTITY kappa    "&#954;"> <!-- greek small letter kappa,
+                                    U+03BA ISOgrk3 -->
+<!ENTITY lambda   "&#955;"> <!-- greek small letter lambda,
+                                    U+03BB ISOgrk3 -->
+<!ENTITY mu       "&#956;"> <!-- greek small letter mu, U+03BC ISOgrk3 -->
+<!ENTITY nu       "&#957;"> <!-- greek small letter nu, U+03BD ISOgrk3 -->
+<!ENTITY xi       "&#958;"> <!-- greek small letter xi, U+03BE ISOgrk3 -->
+<!ENTITY omicron  "&#959;"> <!-- greek small letter omicron, U+03BF NEW -->
+<!ENTITY pi       "&#960;"> <!-- greek small letter pi, U+03C0 ISOgrk3 -->
+<!ENTITY rho      "&#961;"> <!-- greek small letter rho, U+03C1 ISOgrk3 -->
+<!ENTITY sigmaf   "&#962;"> <!-- greek small letter final sigma,
+                                    U+03C2 ISOgrk3 -->
+<!ENTITY sigma    "&#963;"> <!-- greek small letter sigma,
+                                    U+03C3 ISOgrk3 -->
+<!ENTITY tau      "&#964;"> <!-- greek small letter tau, U+03C4 ISOgrk3 -->
+<!ENTITY upsilon  "&#965;"> <!-- greek small letter upsilon,
+                                    U+03C5 ISOgrk3 -->
+<!ENTITY phi      "&#966;"> <!-- greek small letter phi, U+03C6 ISOgrk3 -->
+<!ENTITY chi      "&#967;"> <!-- greek small letter chi, U+03C7 ISOgrk3 -->
+<!ENTITY psi      "&#968;"> <!-- greek small letter psi, U+03C8 ISOgrk3 -->
+<!ENTITY omega    "&#969;"> <!-- greek small letter omega,
+                                    U+03C9 ISOgrk3 -->
+<!ENTITY thetasym "&#977;"> <!-- greek small letter theta symbol,
+                                    U+03D1 NEW -->
+<!ENTITY upsih    "&#978;"> <!-- greek upsilon with hook symbol,
+                                    U+03D2 NEW -->
+<!ENTITY piv      "&#982;"> <!-- greek pi symbol, U+03D6 ISOgrk3 -->
+
+<!-- General Punctuation -->
+<!ENTITY bull     "&#8226;"> <!-- bullet = black small circle,
+                                     U+2022 ISOpub  -->
+<!-- bullet is NOT the same as bullet operator, U+2219 -->
+<!ENTITY hellip   "&#8230;"> <!-- horizontal ellipsis = three dot leader,
+                                     U+2026 ISOpub  -->
+<!ENTITY prime    "&#8242;"> <!-- prime = minutes = feet, U+2032 ISOtech -->
+<!ENTITY Prime    "&#8243;"> <!-- double prime = seconds = inches,
+                                     U+2033 ISOtech -->
+<!ENTITY oline    "&#8254;"> <!-- overline = spacing overscore,
+                                     U+203E NEW -->
+<!ENTITY frasl    "&#8260;"> <!-- fraction slash, U+2044 NEW -->
+
+<!-- Letterlike Symbols -->
+<!ENTITY weierp   "&#8472;"> <!-- script capital P = power set
+                                     = Weierstrass p, U+2118 ISOamso -->
+<!ENTITY image    "&#8465;"> <!-- blackletter capital I = imaginary part,
+                                     U+2111 ISOamso -->
+<!ENTITY real     "&#8476;"> <!-- blackletter capital R = real part symbol,
+                                     U+211C ISOamso -->
+<!ENTITY trade    "&#8482;"> <!-- trade mark sign, U+2122 ISOnum -->
+<!ENTITY alefsym  "&#8501;"> <!-- alef symbol = first transfinite cardinal,
+                                     U+2135 NEW -->
+<!-- alef symbol is NOT the same as hebrew letter alef,
+     U+05D0 although the same glyph could be used to depict both characters -->
+
+<!-- Arrows -->
+<!ENTITY larr     "&#8592;"> <!-- leftwards arrow, U+2190 ISOnum -->
+<!ENTITY uarr     "&#8593;"> <!-- upwards arrow, U+2191 ISOnum-->
+<!ENTITY rarr     "&#8594;"> <!-- rightwards arrow, U+2192 ISOnum -->
+<!ENTITY darr     "&#8595;"> <!-- downwards arrow, U+2193 ISOnum -->
+<!ENTITY harr     "&#8596;"> <!-- left right arrow, U+2194 ISOamsa -->
+<!ENTITY crarr    "&#8629;"> <!-- downwards arrow with corner leftwards
+                                     = carriage return, U+21B5 NEW -->
+<!ENTITY lArr     "&#8656;"> <!-- leftwards double arrow, U+21D0 ISOtech -->
+<!-- ISO 10646 does not say that lArr is the same as the 'is implied by' arrow
+    but also does not have any other character for that function. So ? lArr can
+    be used for 'is implied by' as ISOtech suggests -->
+<!ENTITY uArr     "&#8657;"> <!-- upwards double arrow, U+21D1 ISOamsa -->
+<!ENTITY rArr     "&#8658;"> <!-- rightwards double arrow,
+                                     U+21D2 ISOtech -->
+<!-- ISO 10646 does not say this is the 'implies' character but does not have
+     another character with this function so ?
+     rArr can be used for 'implies' as ISOtech suggests -->
+<!ENTITY dArr     "&#8659;"> <!-- downwards double arrow, U+21D3 ISOamsa -->
+<!ENTITY hArr     "&#8660;"> <!-- left right double arrow,
+                                     U+21D4 ISOamsa -->
+
+<!-- Mathematical Operators -->
+<!ENTITY forall   "&#8704;"> <!-- for all, U+2200 ISOtech -->
+<!ENTITY part     "&#8706;"> <!-- partial differential, U+2202 ISOtech  -->
+<!ENTITY exist    "&#8707;"> <!-- there exists, U+2203 ISOtech -->
+<!ENTITY empty    "&#8709;"> <!-- empty set = null set = diameter,
+                                     U+2205 ISOamso -->
+<!ENTITY nabla    "&#8711;"> <!-- nabla = backward difference,
+                                     U+2207 ISOtech -->
+<!ENTITY isin     "&#8712;"> <!-- element of, U+2208 ISOtech -->
+<!ENTITY notin    "&#8713;"> <!-- not an element of, U+2209 ISOtech -->
+<!ENTITY ni       "&#8715;"> <!-- contains as member, U+220B ISOtech -->
+<!-- should there be a more memorable name than 'ni'? -->
+<!ENTITY prod     "&#8719;"> <!-- n-ary product = product sign,
+                                     U+220F ISOamsb -->
+<!-- prod is NOT the same character as U+03A0 'greek capital letter pi' though
+     the same glyph might be used for both -->
+<!ENTITY sum      "&#8721;"> <!-- n-ary sumation, U+2211 ISOamsb -->
+<!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
+     though the same glyph might be used for both -->
+<!ENTITY minus    "&#8722;"> <!-- minus sign, U+2212 ISOtech -->
+<!ENTITY lowast   "&#8727;"> <!-- asterisk operator, U+2217 ISOtech -->
+<!ENTITY radic    "&#8730;"> <!-- square root = radical sign,
+                                     U+221A ISOtech -->
+<!ENTITY prop     "&#8733;"> <!-- proportional to, U+221D ISOtech -->
+<!ENTITY infin    "&#8734;"> <!-- infinity, U+221E ISOtech -->
+<!ENTITY ang      "&#8736;"> <!-- angle, U+2220 ISOamso -->
+<!ENTITY and      "&#8743;"> <!-- logical and = wedge, U+2227 ISOtech -->
+<!ENTITY or       "&#8744;"> <!-- logical or = vee, U+2228 ISOtech -->
+<!ENTITY cap      "&#8745;"> <!-- intersection = cap, U+2229 ISOtech -->
+<!ENTITY cup      "&#8746;"> <!-- union = cup, U+222A ISOtech -->
+<!ENTITY int      "&#8747;"> <!-- integral, U+222B ISOtech -->
+<!ENTITY there4   "&#8756;"> <!-- therefore, U+2234 ISOtech -->
+<!ENTITY sim      "&#8764;"> <!-- tilde operator = varies with = similar to,
+                                     U+223C ISOtech -->
+<!-- tilde operator is NOT the same character as the tilde, U+007E,
+     although the same glyph might be used to represent both  -->
+<!ENTITY cong     "&#8773;"> <!-- approximately equal to, U+2245 ISOtech -->
+<!ENTITY asymp    "&#8776;"> <!-- almost equal to = asymptotic to,
+                                     U+2248 ISOamsr -->
+<!ENTITY ne       "&#8800;"> <!-- not equal to, U+2260 ISOtech -->
+<!ENTITY equiv    "&#8801;"> <!-- identical to, U+2261 ISOtech -->
+<!ENTITY le       "&#8804;"> <!-- less-than or equal to, U+2264 ISOtech -->
+<!ENTITY ge       "&#8805;"> <!-- greater-than or equal to,
+                                     U+2265 ISOtech -->
+<!ENTITY sub      "&#8834;"> <!-- subset of, U+2282 ISOtech -->
+<!ENTITY sup      "&#8835;"> <!-- superset of, U+2283 ISOtech -->
+<!-- note that nsup, 'not a superset of, U+2283' is not covered by the Symbol
+     font encoding and is not included. Should it be, for symmetry?
+     It is in ISOamsn  -->
+<!ENTITY nsub     "&#8836;"> <!-- not a subset of, U+2284 ISOamsn -->
+<!ENTITY sube     "&#8838;"> <!-- subset of or equal to, U+2286 ISOtech -->
+<!ENTITY supe     "&#8839;"> <!-- superset of or equal to,
+                                     U+2287 ISOtech -->
+<!ENTITY oplus    "&#8853;"> <!-- circled plus = direct sum,
+                                     U+2295 ISOamsb -->
+<!ENTITY otimes   "&#8855;"> <!-- circled times = vector product,
+                                     U+2297 ISOamsb -->
+<!ENTITY perp     "&#8869;"> <!-- up tack = orthogonal to = perpendicular,
+                                     U+22A5 ISOtech -->
+<!ENTITY sdot     "&#8901;"> <!-- dot operator, U+22C5 ISOamsb -->
+<!-- dot operator is NOT the same character as U+00B7 middle dot -->
+
+<!-- Miscellaneous Technical -->
+<!ENTITY lceil    "&#8968;"> <!-- left ceiling = apl upstile,
+                                     U+2308 ISOamsc  -->
+<!ENTITY rceil    "&#8969;"> <!-- right ceiling, U+2309 ISOamsc  -->
+<!ENTITY lfloor   "&#8970;"> <!-- left floor = apl downstile,
+                                     U+230A ISOamsc  -->
+<!ENTITY rfloor   "&#8971;"> <!-- right floor, U+230B ISOamsc  -->
+<!ENTITY lang     "&#9001;"> <!-- left-pointing angle bracket = bra,
+                                     U+2329 ISOtech -->
+<!-- lang is NOT the same character as U+003C 'less than'
+     or U+2039 'single left-pointing angle quotation mark' -->
+<!ENTITY rang     "&#9002;"> <!-- right-pointing angle bracket = ket,
+                                     U+232A ISOtech -->
+<!-- rang is NOT the same character as U+003E 'greater than'
+     or U+203A 'single right-pointing angle quotation mark' -->
+
+<!-- Geometric Shapes -->
+<!ENTITY loz      "&#9674;"> <!-- lozenge, U+25CA ISOpub -->
+
+<!-- Miscellaneous Symbols -->
+<!ENTITY spades   "&#9824;"> <!-- black spade suit, U+2660 ISOpub -->
+<!-- black here seems to mean filled as opposed to hollow -->
+<!ENTITY clubs    "&#9827;"> <!-- black club suit = shamrock,
+                                     U+2663 ISOpub -->
+<!ENTITY hearts   "&#9829;"> <!-- black heart suit = valentine,
+                                     U+2665 ISOpub -->
+<!ENTITY diams    "&#9830;"> <!-- black diamond suit, U+2666 ISOpub -->
+
+<!-- C0 Controls and Basic Latin -->
+<!ENTITY quot    "&#34;"> <!-- quotation mark = APL quote,
+                                    U+0022 ISOnum -->
+<!ENTITY amp     "&#38;"> <!-- ampersand, U+0026 ISOnum -->
+<!ENTITY lt      "&#60;"> <!-- less-than sign, U+003C ISOnum -->
+<!ENTITY gt      "&#62;"> <!-- greater-than sign, U+003E ISOnum -->
+
+<!-- Latin Extended-A -->
+<!ENTITY OElig   "&#338;"> <!-- latin capital ligature OE,
+                                    U+0152 ISOlat2 -->
+<!ENTITY oelig   "&#339;"> <!-- latin small ligature oe, U+0153 ISOlat2 -->
+<!-- ligature is a misnomer, this is a separate character in some languages -->
+<!ENTITY Scaron  "&#352;"> <!-- latin capital letter S with caron,
+                                    U+0160 ISOlat2 -->
+<!ENTITY scaron  "&#353;"> <!-- latin small letter s with caron,
+                                    U+0161 ISOlat2 -->
+<!ENTITY Yuml    "&#376;"> <!-- latin capital letter Y with diaeresis,
+                                    U+0178 ISOlat2 -->
+
+<!-- Spacing Modifier Letters -->
+<!ENTITY circ    "&#710;"> <!-- modifier letter circumflex accent,
+                                    U+02C6 ISOpub -->
+<!ENTITY tilde   "&#732;"> <!-- small tilde, U+02DC ISOdia -->
+
+<!-- General Punctuation -->
+<!ENTITY ensp    "&#8194;"> <!-- en space, U+2002 ISOpub -->
+<!ENTITY emsp    "&#8195;"> <!-- em space, U+2003 ISOpub -->
+<!ENTITY thinsp  "&#8201;"> <!-- thin space, U+2009 ISOpub -->
+<!ENTITY zwnj    "&#8204;"> <!-- zero width non-joiner,
+                                    U+200C NEW RFC 2070 -->
+<!ENTITY zwj     "&#8205;"> <!-- zero width joiner, U+200D NEW RFC 2070 -->
+<!ENTITY lrm     "&#8206;"> <!-- left-to-right mark, U+200E NEW RFC 2070 -->
+<!ENTITY rlm     "&#8207;"> <!-- right-to-left mark, U+200F NEW RFC 2070 -->
+<!ENTITY ndash   "&#8211;"> <!-- en dash, U+2013 ISOpub -->
+<!ENTITY mdash   "&#8212;"> <!-- em dash, U+2014 ISOpub -->
+<!ENTITY lsquo   "&#8216;"> <!-- left single quotation mark,
+                                    U+2018 ISOnum -->
+<!ENTITY rsquo   "&#8217;"> <!-- right single quotation mark,
+                                    U+2019 ISOnum -->
+<!ENTITY sbquo   "&#8218;"> <!-- single low-9 quotation mark, U+201A NEW -->
+<!ENTITY ldquo   "&#8220;"> <!-- left double quotation mark,
+                                    U+201C ISOnum -->
+<!ENTITY rdquo   "&#8221;"> <!-- right double quotation mark,
+                                    U+201D ISOnum -->
+<!ENTITY bdquo   "&#8222;"> <!-- double low-9 quotation mark, U+201E NEW -->
+<!ENTITY dagger  "&#8224;"> <!-- dagger, U+2020 ISOpub -->
+<!ENTITY Dagger  "&#8225;"> <!-- double dagger, U+2021 ISOpub -->
+<!ENTITY permil  "&#8240;"> <!-- per mille sign, U+2030 ISOtech -->
+<!ENTITY lsaquo  "&#8249;"> <!-- single left-pointing angle quotation mark,
+                                    U+2039 ISO proposed -->
+<!-- lsaquo is proposed but not yet ISO standardized -->
+<!ENTITY rsaquo  "&#8250;"> <!-- single right-pointing angle quotation mark,
+                                    U+203A ISO proposed -->
+<!-- rsaquo is proposed but not yet ISO standardized -->
+<!ENTITY euro   "&#8364;"> <!-- euro sign, U+20AC NEW -->
+
 ]>
 '''
 
@@ -199,10 +475,13 @@
                 if i == 0 or i == len(text)-1:
                     text[0:0] = [" "] # del?            XXX this is either a bug or a missing comment - what does that mean? why 0:0?
                     i += 1
-                elif (text[i-1][-1] in (" ", "\n",) or  # last char of previous element is whitespace
-                      text[i+1][0] in (" ", "\n",)):    # or first char of next element is whitespace
+                elif (text[i-1][-1] in (" ", "\n",) or
+                      # last char of previous element is whitespace
+                      text[i+1][0] in (" ", "\n",)):
+                      # or first char of next element is whitespace
                     del text[i]
-                elif (text[i+1] is self.white_space or  # next element is white_space
+                elif (text[i+1] is self.white_space or
+                      # next element is white_space
                       text[i+1] is self.new_line):      # or new_line
                     del text[i]
                 else: