MoinMoin/Page.py
author Thomas Waldmann <tw AT waldmann-edv DOT de>
Sun, 28 Feb 2010 23:49:03 +0100
changeset 5597 ced05deb11ae
parent 5591 1dff6cfdcf90
child 5609 4c6a4adf0540
permissions -rw-r--r--
cfg.history_paging: fix grammar/remove performance warning (we don't do that at other places either)
     1 # -*- coding: iso-8859-1 -*-
     2 """
     3     MoinMoin - Page class
     4 
     5     Page is used for read-only access to a wiki page. For r/w access see PageEditor.
     6     A Page object is used to access a wiki page (in general) as well as to access
     7     some specific revision of a wiki page.
     8 
     9     The RootPage is some virtual page located at / and is mainly used to do namespace
    10     operations like getting the page list.
    11 
    12     Currently, this is all a big mixture between high-level page code, intermediate
    13     data/underlay layering code, caching code and low-level filesystem storage code.
    14     To see the filesystem storage layout we use, best is to look into data/pages/
    15     (underlay dir uses the same format).
    16 
    17     TODO:
    18     * Cleanly separate the code into packages for:
    19       * Page (or rather: Item)
    20       * Layering
    21       * Cache
    22       * Storage
    23     * ACLs should be handled on a low layer, raising an Exception when access
    24       is denied, so we won't have security issues just because someone forgot to check
    25       user.may.read(secretpage).
    26     * The distinction between a item and a item revision should be clearer.
    27     * Items can be anything, not just wiki pages, but also files of any mimetype.
    28       The mimetype hierarchy should be modelled by a MimeTypeItem class hierarchy.
    29 
    30     @copyright: 2000-2004 by Juergen Hermann <jh@web.de>,
    31                 2005-2008 by MoinMoin:ThomasWaldmann,
    32                 2006 by MoinMoin:FlorianFesti,
    33                 2007 by MoinMoin:ReimarBauer
    34     @license: GNU GPL, see COPYING for details.
    35 """
    36 
    37 import os, re, codecs
    38 
    39 from MoinMoin import log
    40 logging = log.getLogger(__name__)
    41 
    42 from MoinMoin import config, caching, user, util, wikiutil
    43 from MoinMoin.logfile import eventlog
    44 from MoinMoin.util import filesys
    45 
    46 def is_cache_exception(e):
    47     args = e.args
    48     return not (len(args) != 1 or args[0] != 'CacheNeedsUpdate')
    49 
    50 
    51 class ItemCache:
    52     """ Cache some page item related data, as meta data or pagelist
    53 
    54         We only cache this to RAM in request.cfg (this is the only kind of
    55         server object we have), because it might be too big for pickling it
    56         in and out.
    57     """
    58     def __init__(self, name):
    59         """ Initialize ItemCache object.
    60             @param name: name of the object, used for display in logging and
    61                          influences behaviour of refresh().
    62         """
    63         self.name = name
    64         self.cache = {}
    65         self.log_pos = None # TODO: initialize this to EOF pos of log
    66                             # to avoid reading in the whole log on first request
    67         self.requests = 0
    68         self.hits = 0
    69         self.loglevel = logging.NOTSET
    70 
    71     def putItem(self, request, name, key, data):
    72         """ Remembers some data for item name under a key.
    73             @param request: currently unused
    74             @param name: name of the item (page), unicode
    75             @param key: used as secondary access key after name
    76             @param data: the data item that should be remembered
    77         """
    78         d = self.cache.setdefault(name, {})
    79         d[key] = data
    80 
    81     def getItem(self, request, name, key):
    82         """ Returns some item stored for item name under key.
    83             @param request: the request object
    84             @param name: name of the item (page), unicode
    85             @param key: used as secondary access key after name
    86             @return: the data or None, if there is no such name or key.
    87         """
    88         self.refresh(request)
    89         try:
    90             data = self.cache[name][key]
    91             self.hits += 1
    92             hit_str = 'hit'
    93         except KeyError:
    94             data = None
    95             hit_str = 'miss'
    96         self.requests += 1
    97         logging.log(self.loglevel, "%s cache %s (h/r %2.1f%%) for %r %r" % (
    98             self.name,
    99             hit_str,
   100             float(self.hits * 100) / self.requests,
   101             name,
   102             key,
   103         ))
   104         return data
   105 
   106     def refresh(self, request):
   107         """ Refresh the cache - if anything has changed in the wiki, we see it
   108             in the edit-log and either delete cached data for the changed items
   109             (for 'meta') or the complete cache ('pagelists').
   110             @param request: the request object
   111         """
   112         from MoinMoin.logfile import editlog
   113         elog = editlog.EditLog(request)
   114         old_pos = self.log_pos
   115         new_pos, items = elog.news(old_pos)
   116         if items:
   117             if self.name == 'meta':
   118                 for item in items:
   119                     logging.log(self.loglevel, "cache: removing %r" % item)
   120                     try:
   121                         del self.cache[item]
   122                     except:
   123                         pass
   124             elif self.name == 'pagelists':
   125                 logging.log(self.loglevel, "cache: clearing pagelist cache")
   126                 self.cache = {}
   127         self.log_pos = new_pos # important to do this at the end -
   128                                # avoids threading race conditions
   129 
   130 
   131 class Page(object):
   132     """ Page - Manage an (immutable) page associated with a WikiName.
   133         To change a page's content, use the PageEditor class.
   134     """
   135     def __init__(self, request, page_name, **kw):
   136         """ Create page object.
   137 
   138         Note that this is a 'lean' operation, since the text for the page
   139         is loaded on demand. Thus, things like `Page(name).link_to()` are
   140         efficient.
   141 
   142         @param page_name: WikiName of the page
   143         @keyword rev: number of older revision
   144         @keyword formatter: formatter instance or mimetype str,
   145                             None or no kw arg will use default formatter
   146         @keyword include_self: if 1, include current user (default: 0)
   147         """
   148         self.request = request
   149         self.cfg = request.cfg
   150         self.page_name = page_name
   151         self.rev = kw.get('rev', 0) # revision of this page
   152         self.include_self = kw.get('include_self', 0)
   153 
   154         formatter = kw.get('formatter', None)
   155         if isinstance(formatter, (str, unicode)): # mimetype given
   156             mimetype = str(formatter)
   157             self.formatter = None
   158             self.output_mimetype = mimetype
   159             self.default_formatter = mimetype == "text/html"
   160         elif formatter is not None: # formatter instance given
   161             self.formatter = formatter
   162             self.default_formatter = 0
   163             self.output_mimetype = "text/todo" # TODO where do we get this value from?
   164         else:
   165             self.formatter = None
   166             self.default_formatter = 1
   167             self.output_mimetype = "text/html"
   168 
   169         self.output_charset = config.charset # correct for wiki pages
   170 
   171         self._text_filename_force = None
   172         self.hilite_re = None
   173 
   174         self.__body = None # unicode page body == metadata + data
   175         self.__body_modified = 0 # was __body modified in RAM so it differs from disk?
   176         self.__meta = None # list of raw tuples of page metadata (currently: the # stuff at top of the page)
   177         self.__pi = None # dict of preprocessed page metadata (processing instructions)
   178         self.__data = None # unicode page data = body - metadata
   179 
   180         self.reset()
   181 
   182     def reset(self):
   183         """ Reset page state """
   184         page_name = self.page_name
   185         # page_name quoted for file system usage, needs to be reset to
   186         # None when pagename changes
   187 
   188         qpagename = wikiutil.quoteWikinameFS(page_name)
   189         self.page_name_fs = qpagename
   190 
   191         # the normal and the underlay path used for this page
   192         normalpath = os.path.join(self.cfg.data_dir, "pages", qpagename)
   193         if not self.cfg.data_underlay_dir is None:
   194             underlaypath = os.path.join(self.cfg.data_underlay_dir, "pages", qpagename)
   195         else:
   196             underlaypath = None
   197 
   198         # TUNING - remember some essential values
   199 
   200         # does the page come from normal page storage (0) or from
   201         # underlay dir (1) (can be used as index into following list)
   202         self._underlay = None
   203 
   204         # path to normal / underlay page dir
   205         self._pagepath = [normalpath, underlaypath]
   206 
   207     # now we define some properties to lazy load some attributes on first access:
   208 
   209     def get_body(self):
   210         if self.__body is None:
   211             # try to open file
   212             try:
   213                 f = codecs.open(self._text_filename(), 'rb', config.charset)
   214             except IOError, er:
   215                 import errno
   216                 if er.errno == errno.ENOENT:
   217                     # just doesn't exist, return empty text (note that we
   218                     # never store empty pages, so this is detectable and also
   219                     # safe when passed to a function expecting a string)
   220                     return ""
   221                 else:
   222                     raise
   223 
   224             # read file content and make sure it is closed properly
   225             try:
   226                 text = f.read()
   227                 text = self.decodeTextMimeType(text)
   228                 self.__body = text
   229             finally:
   230                 f.close()
   231         return self.__body
   232 
   233     def set_body(self, newbody):
   234         self.__body = newbody
   235         self.__meta = None
   236         self.__data = None
   237     body = property(fget=get_body, fset=set_body) # complete page text
   238 
   239     def get_meta(self):
   240         if self.__meta is None:
   241             self.__meta, self.__data = wikiutil.get_processing_instructions(self.body)
   242         return self.__meta
   243     meta = property(fget=get_meta) # processing instructions, ACLs (upper part of page text)
   244 
   245     def get_data(self):
   246         if self.__data is None:
   247             self.__meta, self.__data = wikiutil.get_processing_instructions(self.body)
   248         return self.__data
   249     data = property(fget=get_data) # content (lower part of page text)
   250 
   251     def get_pi(self):
   252         if self.__pi is None:
   253             self.__pi = self.parse_processing_instructions()
   254         return self.__pi
   255     pi = property(fget=get_pi) # processed meta stuff
   256 
   257     def getlines(self):
   258         """ Return a list of all lines in body.
   259 
   260         @rtype: list
   261         @return: list of strs body_lines
   262         """
   263         return self.body.split('\n')
   264 
   265     def get_raw_body(self):
   266         """ Load the raw markup from the page file.
   267 
   268         @rtype: unicode
   269         @return: raw page contents of this page, unicode
   270         """
   271         return self.body
   272 
   273     def get_raw_body_str(self):
   274         """ Returns the raw markup from the page file, as a string.
   275 
   276         @rtype: str
   277         @return: raw page contents of this page, utf-8-encoded
   278         """
   279         return self.body.encode("utf-8")
   280 
   281     def set_raw_body(self, body, modified=0):
   282         """ Set the raw body text (prevents loading from disk).
   283 
   284         TODO: this should not be a public function, as Page is immutable.
   285 
   286         @param body: raw body text
   287         @param modified: 1 means that we internally modified the raw text and
   288             that it is not in sync with the page file on disk.  This is
   289             used e.g. by PageEditor when previewing the page.
   290         """
   291         self.body = body
   292         self.__body_modified = modified
   293 
   294     def get_current_from_pagedir(self, pagedir):
   295         """ Get the current revision number from an arbitrary pagedir.
   296             Does not modify page object's state, uncached, direct disk access.
   297             @param pagedir: the pagedir with the 'current' file to read
   298             @return: int currentrev
   299         """
   300         revfilename = os.path.join(pagedir, 'current')
   301         try:
   302             revfile = file(revfilename)
   303             revstr = revfile.read().strip()
   304             revfile.close()
   305             rev = int(revstr)
   306         except:
   307             rev = 99999999 # XXX do some better error handling
   308         return rev
   309 
   310     def get_rev_dir(self, pagedir, rev=0):
   311         """ Get a revision of a page from an arbitrary pagedir.
   312 
   313         Does not modify page object's state, uncached, direct disk access.
   314 
   315         @param pagedir: the path to the page storage area
   316         @param rev: int revision to get (default is 0 and means the current
   317                     revision (in this case, the real revint is returned)
   318         @return: (str path to file of the revision,
   319                   int realrevint,
   320                   bool exists)
   321         """
   322         if rev == 0:
   323             rev = self.get_current_from_pagedir(pagedir)
   324 
   325         revstr = '%08d' % rev
   326         pagefile = os.path.join(pagedir, 'revisions', revstr)
   327         if rev != 99999999:
   328             exists = os.path.exists(pagefile)
   329             if exists:
   330                 self._setRealPageName(pagedir)
   331         else:
   332             exists = False
   333         return pagefile, rev, exists
   334 
   335     def _setRealPageName(self, pagedir):
   336         """ Set page_name to the real case of page name
   337 
   338         On case insensitive file system, "pagename" exists even if the
   339         real page name is "PageName" or "PAGENAME". This leads to
   340         confusion in urls, links and logs.
   341         See MoinMoinBugs/MacHfsPlusCaseInsensitive
   342 
   343         Correct the case of the page name. Elements created from the
   344         page name in reset() are not updated because it's too messy, and
   345         this fix seems to be enough for now.
   346 
   347         Problems to fix later:
   348 
   349          - ["helponnavigation"] link to HelpOnNavigation but not
   350            considered as backlink.
   351 
   352         @param pagedir: the storage path to the page directory
   353         """
   354         if self._text_filename_force is None:
   355             # we only do this for normal pages, but not for the MissingPage,
   356             # because the code below is wrong in that case
   357             realPath = util.filesys.realPathCase(pagedir)
   358             if realPath is not None:
   359                 realPath = wikiutil.unquoteWikiname(realPath)
   360                 self.page_name = realPath[-len(self.page_name):]
   361 
   362     def get_rev(self, use_underlay=-1, rev=0):
   363         """ Get information about a revision.
   364 
   365         filename, number, and (existance test) of this page and revision.
   366 
   367         @param use_underlay: -1 == auto, 0 == normal, 1 == underlay
   368         @param rev: int revision to get (default is 0 and means the current
   369                     revision (in this case, the real revint is returned)
   370         @return: (str path to current revision of page,
   371                   int realrevint,
   372                   bool exists)
   373         """
   374         def layername(underlay):
   375             if underlay == -1:
   376                 return 'layer_auto'
   377             elif underlay == 0:
   378                 return 'layer_normal'
   379             else: # 1
   380                 return 'layer_underlay'
   381 
   382         request = self.request
   383         cache_name = self.page_name
   384         cache_key = layername(use_underlay)
   385         if self._text_filename_force is None:
   386             cache_data = request.cfg.cache.meta.getItem(request, cache_name, cache_key)
   387             if cache_data and (rev == 0 or rev == cache_data[1]):
   388                 # we got the correct rev data from the cache
   389                 #logging.debug("got data from cache: %r %r %r" % cache_data)
   390                 return cache_data
   391 
   392         # Figure out if we should use underlay or not, if needed.
   393         if use_underlay == -1:
   394             underlay, pagedir = self.getPageStatus(check_create=0)
   395         else:
   396             underlay, pagedir = use_underlay, self._pagepath[use_underlay]
   397 
   398         # Find current revision, if automatic selection is requested.
   399         if rev == 0:
   400             realrev = self.get_current_from_pagedir(pagedir)
   401         else:
   402             realrev = rev
   403 
   404         data = self.get_rev_dir(pagedir, realrev)
   405         if rev == 0 and self._text_filename_force is None:
   406             # we only save the current rev to the cache
   407             request.cfg.cache.meta.putItem(request, cache_name, cache_key, data)
   408 
   409         return data
   410 
   411     def current_rev(self):
   412         """ Return number of current revision.
   413 
   414         This is the same as get_rev()[1].
   415 
   416         @return: int revision
   417         """
   418         pagefile, rev, exists = self.get_rev()
   419         return rev
   420 
   421     def get_real_rev(self):
   422         """ Returns the real revision number of this page.
   423             A rev==0 is translated to the current revision.
   424 
   425         @returns: revision number > 0
   426         @rtype: int
   427         """
   428         if self.rev == 0:
   429             return self.current_rev()
   430         return self.rev
   431 
   432     def getPageBasePath(self, use_underlay=-1):
   433         """ Get full path to a page-specific storage area. `args` can
   434             contain additional path components that are added to the base path.
   435 
   436         @param use_underlay: force using a specific pagedir, default -1
   437                                 -1 = automatically choose page dir
   438                                 1 = use underlay page dir
   439                                 0 = use standard page dir
   440         @rtype: string
   441         @return: int underlay,
   442                  str the full path to the storage area
   443         """
   444         standardpath, underlaypath = self._pagepath
   445         if underlaypath is None:
   446             use_underlay = 0
   447 
   448         if use_underlay == -1: # automatic
   449             if self._underlay is None:
   450                 underlay, path = 0, standardpath
   451                 pagefile, rev, exists = self.get_rev(use_underlay=0)
   452                 if not exists:
   453                     pagefile, rev, exists = self.get_rev(use_underlay=1)
   454                     if exists:
   455                         underlay, path = 1, underlaypath
   456                 self._underlay = underlay
   457             else:
   458                 underlay = self._underlay
   459                 path = self._pagepath[underlay]
   460         else: # normal or underlay
   461             underlay, path = use_underlay, self._pagepath[use_underlay]
   462 
   463         return underlay, path
   464 
   465     def getPageStatus(self, *args, **kw):
   466         """ Get full path to a page-specific storage area. `args` can
   467             contain additional path components that are added to the base path.
   468 
   469         @param args: additional path components
   470         @keyword use_underlay: force using a specific pagedir, default '-1'
   471                                 -1 = automatically choose page dir
   472                                 1 = use underlay page dir
   473                                 0 = use standard page dir
   474         @keyword check_create: if true, ensures that the path requested really exists
   475                                (if it doesn't, create all directories automatically).
   476                                (default true)
   477         @keyword isfile: is the last component in args a filename? (default is false)
   478         @rtype: string
   479         @return: (int underlay (1 if using underlay, 0 otherwise),
   480                   str the full path to the storage area )
   481         """
   482         check_create = kw.get('check_create', 1)
   483         isfile = kw.get('isfile', 0)
   484         use_underlay = kw.get('use_underlay', -1)
   485         underlay, path = self.getPageBasePath(use_underlay)
   486         fullpath = os.path.join(*((path, ) + args))
   487         if check_create:
   488             if isfile:
   489                 dirname, filename = os.path.split(fullpath)
   490             else:
   491                 dirname = fullpath
   492             try:
   493                 os.makedirs(dirname)
   494             except OSError, err:
   495                 if not os.path.exists(dirname):
   496                     raise err
   497         return underlay, fullpath
   498 
   499     def getPagePath(self, *args, **kw):
   500         """ Return path to the page storage area. """
   501         return self.getPageStatus(*args, **kw)[1]
   502 
   503     def _text_filename(self, **kw):
   504         """ The name of the page file, possibly of an older page.
   505 
   506         @keyword rev: page revision, overriding self.rev
   507         @rtype: string
   508         @return: complete filename (including path) to this page
   509         """
   510         if self._text_filename_force is not None:
   511             return self._text_filename_force
   512         rev = kw.get('rev', 0)
   513         if not rev and self.rev:
   514             rev = self.rev
   515         fname, rev, exists = self.get_rev(-1, rev)
   516         return fname
   517 
   518     def editlog_entry(self):
   519         """ Return the edit-log entry for this Page object (can be an old revision).
   520         """
   521         request = self.request
   522         use_cache = self.rev == 0 # use the cache for current rev
   523         if use_cache:
   524             cache_name, cache_key = self.page_name, 'lastlog'
   525             entry = request.cfg.cache.meta.getItem(request, cache_name, cache_key)
   526         else:
   527             entry = None
   528         if entry is None:
   529             from MoinMoin.logfile import editlog
   530             wanted_rev = "%08d" % self.get_real_rev()
   531             edit_log = editlog.EditLog(request, rootpagename=self.page_name)
   532             for entry in edit_log.reverse():
   533                 if entry.rev == wanted_rev:
   534                     break
   535             else:
   536                 entry = () # don't use None
   537             if use_cache:
   538                 request.cfg.cache.meta.putItem(request, cache_name, cache_key, entry)
   539         return entry
   540 
   541     def edit_info(self):
   542         """ Return timestamp/editor info for this Page object (can be an old revision).
   543 
   544             Note: if you ask about a deleted revision, it will report timestamp and editor
   545                   for the delete action (in the edit-log, this is just a SAVE).
   546 
   547         This is used by MoinMoin/xmlrpc/__init__.py.
   548 
   549         @rtype: dict
   550         @return: timestamp and editor information
   551         """
   552         line = self.editlog_entry()
   553         if line:
   554             editordata = line.getInterwikiEditorData(self.request)
   555             if editordata[0] == 'interwiki':
   556                 editor = "%s:%s" % editordata[1]
   557             else:
   558                 editor = editordata[1] # ip or email or anon
   559             result = {
   560                 'timestamp': line.ed_time_usecs,
   561                 'editor': editor,
   562             }
   563             del line
   564         else:
   565             result = {}
   566         return result
   567 
   568     def last_edit(self, request):
   569         # XXX usage of last_edit is DEPRECATED - use edit_info()
   570         if not self.exists(): # XXX doesn't make much sense, but still kept
   571             return None       # XXX here until we remove last_edit()
   572         return self.edit_info()
   573 
   574     def lastEditInfo(self, request=None):
   575         """ Return the last edit info.
   576 
   577             Note: if you ask about a deleted revision, it will report timestamp and editor
   578                   for the delete action (in the edit-log, this is just a SAVE).
   579 
   580         @param request: the request object (DEPRECATED, unused)
   581         @rtype: dict
   582         @return: timestamp and editor information
   583         """
   584         log = self.editlog_entry()
   585         if log:
   586             request = self.request
   587             editor = log.getEditor(request)
   588             time = wikiutil.version2timestamp(log.ed_time_usecs)
   589             time = request.user.getFormattedDateTime(time) # Use user time format
   590             result = {'editor': editor, 'time': time}
   591             del log
   592         else:
   593             result = {}
   594         return result
   595 
   596     def isWritable(self):
   597         """ Can this page be changed?
   598 
   599         @rtype: bool
   600         @return: true, if this page is writable or does not exist
   601         """
   602         return os.access(self._text_filename(), os.W_OK) or not self.exists()
   603 
   604     def isUnderlayPage(self, includeDeleted=True):
   605         """ Does this page live in the underlay dir?
   606 
   607         Return true even if the data dir has a copy of this page. To
   608         check for underlay only page, use ifUnderlayPage() and not
   609         isStandardPage()
   610 
   611         @param includeDeleted: include deleted pages
   612         @rtype: bool
   613         @return: true if page lives in the underlay dir
   614         """
   615         return self.exists(domain='underlay', includeDeleted=includeDeleted)
   616 
   617     def isStandardPage(self, includeDeleted=True):
   618         """ Does this page live in the data dir?
   619 
   620         Return true even if this is a copy of an underlay page. To check
   621         for data only page, use isStandardPage() and not isUnderlayPage().
   622 
   623         @param includeDeleted: include deleted pages
   624         @rtype: bool
   625         @return: true if page lives in the data dir
   626         """
   627         return self.exists(domain='standard', includeDeleted=includeDeleted)
   628 
   629     def exists(self, rev=0, domain=None, includeDeleted=False):
   630         """ Does this page exist?
   631 
   632         This is the lower level method for checking page existence. Use
   633         the higher level methods isUnderlayPage and isStandardPage for
   634         cleaner code.
   635 
   636         @param rev: revision to look for. Default: check current
   637         @param domain: where to look for the page. Default: look in all,
   638                        available values: 'underlay', 'standard'
   639         @param includeDeleted: ignore page state, just check its pagedir
   640         @rtype: bool
   641         @return: true, if page exists
   642         """
   643         # Edge cases
   644         if domain == 'underlay' and not self.request.cfg.data_underlay_dir:
   645             return False
   646 
   647         if includeDeleted:
   648             # Look for page directory, ignore page state
   649             if domain is None:
   650                 checklist = [0, 1]
   651             else:
   652                 checklist = [domain == 'underlay']
   653             for use_underlay in checklist:
   654                 pagedir = self.getPagePath(use_underlay=use_underlay, check_create=0)
   655                 if os.path.exists(pagedir):
   656                     return True
   657             return False
   658         else:
   659             # Look for non-deleted pages only, using get_rev
   660             if not rev and self.rev:
   661                 rev = self.rev
   662 
   663             if domain is None:
   664                 use_underlay = -1
   665             else:
   666                 use_underlay = domain == 'underlay'
   667             d, d, exists = self.get_rev(use_underlay, rev)
   668             return exists
   669 
   670     def size(self, rev=0):
   671         """ Get Page size.
   672 
   673         @rtype: int
   674         @return: page size, 0 for non-existent pages.
   675         """
   676         if rev == self.rev: # same revision as self
   677             if self.__body is not None:
   678                 return len(self.__body)
   679 
   680         try:
   681             return os.path.getsize(self._text_filename(rev=rev))
   682         except EnvironmentError, e:
   683             import errno
   684             if e.errno == errno.ENOENT:
   685                 return 0
   686             raise
   687 
   688     def mtime_usecs(self):
   689         """ Get modification timestamp of this page (from edit-log, can be for an old revision).
   690 
   691         @rtype: int
   692         @return: mtime of page (or 0 if page / edit-log entry does not exist)
   693         """
   694         entry = self.editlog_entry()
   695         return entry and entry.ed_time_usecs or 0
   696 
   697     def mtime_printable(self, request):
   698         """ Get printable (as per user's preferences) modification timestamp of this page.
   699 
   700         @rtype: string
   701         @return: formatted string with mtime of page
   702         """
   703         t = self.mtime_usecs()
   704         if not t:
   705             result = "0" # TODO: i18n, "Ever", "Beginning of time"...?
   706         else:
   707             result = request.user.getFormattedDateTime(
   708                 wikiutil.version2timestamp(t))
   709         return result
   710 
   711     def split_title(self, force=0):
   712         """ Return a string with the page name split by spaces, if the user wants that.
   713 
   714         @param force: if != 0, then force splitting the page_name
   715         @rtype: unicode
   716         @return: pagename of this page, splitted into space separated words
   717         """
   718         request = self.request
   719         if not force and not request.user.wikiname_add_spaces:
   720             return self.page_name
   721 
   722         # look for the end of words and the start of a new word,
   723         # and insert a space there
   724         splitted = config.split_regex.sub(r'\1 \2', self.page_name)
   725         return splitted
   726 
   727     def url(self, request, querystr=None, anchor=None, relative=False, **kw):
   728         """ Return complete URL for this page, including scriptname.
   729             The URL is NOT escaped, if you write it to HTML, use wikiutil.escape
   730             (at least if you have a querystr, to escape the & chars).
   731 
   732         @param request: the request object
   733         @param querystr: the query string to add after a "?" after the url
   734             (str or dict, see wikiutil.makeQueryString)
   735         @param anchor: if specified, make a link to this anchor
   736         @param relative: create a relative link (default: False), note that this
   737                          changed in 1.7, in 1.6, the default was True.
   738         @rtype: str
   739         @return: complete url of this page, including scriptname
   740         """
   741         assert(isinstance(anchor, (type(None), str, unicode)))
   742         # Create url, excluding scriptname
   743         url = wikiutil.quoteWikinameURL(self.page_name)
   744         if querystr:
   745             if isinstance(querystr, dict):
   746                 action = querystr.get('action', None)
   747             else:
   748                 action = None # we don't support getting the action out of a str
   749 
   750             querystr = wikiutil.makeQueryString(querystr)
   751 
   752             # make action URLs denyable by robots.txt:
   753             if action is not None and request.cfg.url_prefix_action is not None:
   754                 url = "%s/%s/%s" % (request.cfg.url_prefix_action, action, url)
   755             url = '%s?%s' % (url, querystr)
   756 
   757         if not relative:
   758             url = '%s/%s' % (request.script_root, url)
   759 
   760         # Add anchor
   761         if anchor:
   762             fmt = getattr(self, 'formatter', request.html_formatter)
   763             if fmt:
   764                 anchor = fmt.sanitize_to_id(anchor)
   765             url = "%s#%s" % (url, anchor)
   766 
   767         return url
   768 
   769     def link_to_raw(self, request, text, querystr=None, anchor=None, **kw):
   770         """ core functionality of link_to, without the magic """
   771         url = self.url(request, querystr, anchor=anchor, relative=True) # scriptName is added by link_tag
   772         # escaping is done by link_tag -> formatter.url -> ._open()
   773         link = wikiutil.link_tag(request, url, text,
   774                                  formatter=getattr(self, 'formatter', None), **kw)
   775         return link
   776 
   777     def link_to(self, request, text=None, querystr=None, anchor=None, **kw):
   778         """ Return HTML markup that links to this page.
   779 
   780         See wikiutil.link_tag() for possible keyword parameters.
   781 
   782         @param request: the request object
   783         @param text: inner text of the link - it gets automatically escaped
   784         @param querystr: the query string to add after a "?" after the url
   785         @param anchor: if specified, make a link to this anchor
   786         @keyword on: opening/closing tag only
   787         @keyword attachment_indicator: if 1, add attachment indicator after link tag
   788         @keyword css_class: css class to use
   789         @rtype: string
   790         @return: formatted link
   791         """
   792         if not text:
   793             text = self.split_title()
   794         text = wikiutil.escape(text)
   795 
   796         # Add css class for non existing page
   797         if not self.exists():
   798             kw['css_class'] = 'nonexistent'
   799 
   800         attachment_indicator = kw.get('attachment_indicator')
   801         if attachment_indicator is None:
   802             attachment_indicator = 0 # default is off
   803         else:
   804             del kw['attachment_indicator'] # avoid having this as <a> tag attribute
   805 
   806         link = self.link_to_raw(request, text, querystr, anchor, **kw)
   807 
   808         # Create a link to attachments if any exist
   809         if attachment_indicator:
   810             from MoinMoin.action import AttachFile
   811             link += AttachFile.getIndicator(request, self.page_name)
   812 
   813         return link
   814 
   815     def getSubscribers(self, request, **kw):
   816         """ Get all subscribers of this page.
   817 
   818         @param request: the request object
   819         @keyword include_self: if 1, include current user (default: 0)
   820         @keyword return_users: if 1, return user instances (default: 0)
   821         @rtype: dict
   822         @return: lists of subscribed email addresses in a dict by language key
   823         """
   824         include_self = kw.get('include_self', self.include_self)
   825         return_users = kw.get('return_users', 0)
   826 
   827         # extract categories of this page
   828         pageList = self.getCategories(request)
   829 
   830         # add current page name for list matching
   831         pageList.append(self.page_name)
   832 
   833         if self.cfg.SecurityPolicy:
   834             UserPerms = self.cfg.SecurityPolicy
   835         else:
   836             from MoinMoin.security import Default as UserPerms
   837 
   838         # get email addresses of the all wiki user which have a profile stored;
   839         # add the address only if the user has subscribed to the page and
   840         # the user is not the current editor
   841         userlist = user.getUserList(request)
   842         subscriber_list = {}
   843         for uid in userlist:
   844             if uid == request.user.id and not include_self:
   845                 continue # no self notification
   846             subscriber = user.User(request, uid)
   847 
   848             # The following tests should be ordered in order of
   849             # decreasing computation complexity, in particular
   850             # the permissions check may be expensive; see the bug
   851             # MoinMoinBugs/GetSubscribersPerformanceProblem
   852 
   853             # This is a bit wrong if return_users=1 (which implies that the caller will process
   854             # user attributes and may, for example choose to send an SMS)
   855             # So it _should_ be "not (subscriber.email and return_users)" but that breaks at the moment.
   856             if not subscriber.email:
   857                 continue # skip empty email addresses
   858 
   859             # skip people not subscribed
   860             if not subscriber.isSubscribedTo(pageList):
   861                 continue
   862 
   863             # skip people who can't read the page
   864             if not UserPerms(subscriber).read(self.page_name):
   865                 continue
   866 
   867             # add the user to the list
   868             lang = subscriber.language or request.cfg.language_default
   869             if not lang in subscriber_list:
   870                 subscriber_list[lang] = []
   871             if return_users:
   872                 subscriber_list[lang].append(subscriber)
   873             else:
   874                 subscriber_list[lang].append(subscriber.email)
   875 
   876         return subscriber_list
   877 
   878     def parse_processing_instructions(self):
   879         """ Parse page text and extract processing instructions,
   880             return a dict of PIs and the non-PI rest of the body.
   881         """
   882         from MoinMoin import i18n
   883         from MoinMoin import security
   884         request = self.request
   885         pi = {} # we collect the processing instructions here
   886 
   887         # default language from cfg
   888         pi['language'] = self.cfg.language_default or "en"
   889 
   890         body = self.body
   891         # TODO: remove this hack once we have separate metadata and can use mimetype there
   892         if body.startswith('<?xml'): # check for XML content
   893             pi['lines'] = 0
   894             pi['format'] = "xslt"
   895             pi['formatargs'] = ''
   896             pi['acl'] = security.AccessControlList(request.cfg, []) # avoid KeyError on acl check
   897             return pi
   898 
   899         meta = self.meta
   900 
   901         # default is wiki markup
   902         pi['format'] = self.cfg.default_markup or "wiki"
   903         pi['formatargs'] = ''
   904         pi['lines'] = len(meta)
   905         acl = []
   906 
   907         for verb, args in meta:
   908             if verb == "format": # markup format
   909                 format, formatargs = (args + ' ').split(' ', 1)
   910                 pi['format'] = format.lower()
   911                 pi['formatargs'] = formatargs.strip()
   912 
   913             elif verb == "acl":
   914                 acl.append(args)
   915 
   916             elif verb == "language":
   917                 # Page language. Check if args is a known moin language
   918                 if args in i18n.wikiLanguages():
   919                     pi['language'] = args
   920 
   921             elif verb == "refresh":
   922                 if self.cfg.refresh:
   923                     try:
   924                         mindelay, targetallowed = self.cfg.refresh
   925                         args = args.split()
   926                         if len(args) >= 1:
   927                             delay = max(int(args[0]), mindelay)
   928                         if len(args) >= 2:
   929                             target = args[1]
   930                         else:
   931                             target = self.page_name
   932                         if '://' in target:
   933                             if targetallowed == 'internal':
   934                                 raise ValueError
   935                             elif targetallowed == 'external':
   936                                 url = target
   937                         else:
   938                             url = Page(request, target).url(request)
   939                         pi['refresh'] = (delay, url)
   940                     except (ValueError, ):
   941                         pass
   942 
   943             elif verb == "redirect":
   944                 pi['redirect'] = args
   945 
   946             elif verb == "deprecated":
   947                 pi['deprecated'] = True
   948 
   949             elif verb == "openiduser":
   950                 if request.cfg.openid_server_enable_user:
   951                     pi['openid.user'] = args
   952 
   953             elif verb == "pragma":
   954                 try:
   955                     key, val = args.split(' ', 1)
   956                 except (ValueError, TypeError):
   957                     pass
   958                 else:
   959                     request.setPragma(key, val)
   960 
   961         pi['acl'] = security.AccessControlList(request.cfg, acl)
   962         return pi
   963 
   964     def send_raw(self, content_disposition=None, mimetype=None):
   965         """ Output the raw page data (action=raw).
   966             With no content_disposition, the browser usually just displays the
   967             data on the screen, with content_disposition='attachment', it will
   968             offer a dialogue to save it to disk (used by Save action).
   969             Supplied mimetype overrides default text/plain.
   970         """
   971         request = self.request
   972         request.mimetype = mimetype or 'text/plain'
   973         if self.exists():
   974             # use the correct last-modified value from the on-disk file
   975             # to ensure cacheability where supported. Because we are sending
   976             # RAW (file) content, the file mtime is correct as Last-Modified header.
   977             request.status_code = 200
   978             request.last_modified = os.path.getmtime(self._text_filename())
   979             text = self.encodeTextMimeType(self.body)
   980             #request.headers['Content-Length'] = len(text)  # XXX WRONG! text is unicode obj, but we send utf-8!
   981             if content_disposition:
   982                 # TODO: fix the encoding here, plain 8 bit is not allowed according to the RFCs
   983                 # There is no solution that is compatible to IE except stripping non-ascii chars
   984                 filename_enc = "%s.txt" % self.page_name.encode(config.charset)
   985                 dispo_string = '%s; filename="%s"' % (content_disposition, filename_enc)
   986                 request.headers['Content-Disposition'] = dispo_string
   987         else:
   988             request.status_code = 404
   989             text = u"Page %s not found." % self.page_name
   990 
   991         request.write(text)
   992 
   993     def send_page(self, **keywords):
   994         """ Output the formatted page.
   995 
   996         TODO: "kill send_page(), quick" (since 2002 :)
   997 
   998         @keyword content_only: if 1, omit http headers, page header and footer
   999         @keyword content_id: set the id of the enclosing div
  1000         @keyword count_hit: if 1, add an event to the log
  1001         @keyword send_special: if True, this is a special page send
  1002         @keyword omit_footnotes: if True, do not send footnotes (used by include macro)
  1003         """
  1004         request = self.request
  1005         _ = request.getText
  1006         request.clock.start('send_page')
  1007         emit_headers = keywords.get('emit_headers', 1)
  1008         content_only = keywords.get('content_only', 0)
  1009         omit_footnotes = keywords.get('omit_footnotes', 0)
  1010         content_id = keywords.get('content_id', 'content')
  1011         do_cache = keywords.get('do_cache', 1)
  1012         send_special = keywords.get('send_special', False)
  1013         print_mode = keywords.get('print_mode', 0)
  1014         if print_mode:
  1015             media = request.values.get('media', 'print')
  1016         else:
  1017             media = 'screen'
  1018         self.hilite_re = (keywords.get('hilite_re') or
  1019                           request.values.get('highlight'))
  1020 
  1021         # count hit?
  1022         if keywords.get('count_hit', 0):
  1023             eventlog.EventLog(request).add(request, 'VIEWPAGE', {'pagename': self.page_name})
  1024 
  1025         # load the text
  1026         body = self.data
  1027         pi = self.pi
  1028 
  1029         if 'redirect' in pi and not (
  1030             'action' in request.values or 'redirect' in request.values or content_only):
  1031             # redirect to another page
  1032             # note that by including "action=show", we prevent endless looping
  1033             # (see code in "request") or any cascaded redirection
  1034             pagename, anchor = wikiutil.split_anchor(pi['redirect'])
  1035             redirect_url = Page(request, pagename).url(request,
  1036                                                        querystr={'action': 'show', 'redirect': self.page_name, },
  1037                                                        anchor=anchor)
  1038             request.http_redirect(redirect_url, code=301)
  1039             return
  1040 
  1041         # if necessary, load the formatter
  1042         if self.default_formatter:
  1043             from MoinMoin.formatter.text_html import Formatter
  1044             self.formatter = Formatter(request, store_pagelinks=1)
  1045         elif not self.formatter:
  1046             Formatter = wikiutil.searchAndImportPlugin(request.cfg, "formatter", self.output_mimetype)
  1047             self.formatter = Formatter(request)
  1048 
  1049         # save formatter
  1050         no_formatter = object()
  1051         old_formatter = getattr(request, "formatter", no_formatter)
  1052         request.formatter = self.formatter
  1053 
  1054         self.formatter.setPage(self)
  1055         if self.hilite_re:
  1056             try:
  1057                 self.formatter.set_highlight_re(self.hilite_re)
  1058             except re.error, err:
  1059                 request.theme.add_msg(_('Invalid highlighting regular expression "%(regex)s": %(error)s') % {
  1060                                           'regex': self.hilite_re,
  1061                                           'error': str(err),
  1062                                       }, "warning")
  1063                 self.hilite_re = None
  1064 
  1065         if 'deprecated' in pi:
  1066             # deprecated page, append last backup version to current contents
  1067             # (which should be a short reason why the page is deprecated)
  1068             request.theme.add_msg(_('The backed up content of this page is deprecated and will rank lower in search results!'), "warning")
  1069 
  1070             revisions = self.getRevList()
  1071             if len(revisions) >= 2: # XXX shouldn't that be ever the case!? Looks like not.
  1072                 oldpage = Page(request, self.page_name, rev=revisions[1])
  1073                 body += oldpage.get_data()
  1074                 del oldpage
  1075 
  1076         lang = self.pi.get('language', request.cfg.language_default)
  1077         request.setContentLanguage(lang)
  1078 
  1079         # start document output
  1080         page_exists = self.exists()
  1081         if not content_only:
  1082             if emit_headers:
  1083                 request.content_type = "%s; charset=%s" % (self.output_mimetype, self.output_charset)
  1084                 if page_exists:
  1085                     if not request.user.may.read(self.page_name):
  1086                         request.status_code = 403
  1087                     else:
  1088                         request.status_code = 200
  1089                     if not request.cacheable:
  1090                         # use "nocache" headers if we're using a method that is not simply "display"
  1091                         request.disableHttpCaching(level=2)
  1092                     elif request.user.valid:
  1093                         # use nocache headers if a user is logged in (which triggers personalisation features)
  1094                         request.disableHttpCaching(level=1)
  1095                     else:
  1096                         # TODO: we need to know if a page generates dynamic content -
  1097                         # if it does, we must not use the page file mtime as last modified value
  1098                         # The following code is commented because it is incorrect for dynamic pages:
  1099                         #lastmod = os.path.getmtime(self._text_filename())
  1100                         #request.headers['Last-Modified'] = util.timefuncs.formathttpdate(lastmod)
  1101                         pass
  1102                 else:
  1103                     request.status_code = 404
  1104 
  1105             if not page_exists and self.request.isSpiderAgent:
  1106                 # don't send any 404 content to bots
  1107                 return
  1108 
  1109             request.write(self.formatter.startDocument(self.page_name))
  1110 
  1111             # send the page header
  1112             if self.default_formatter:
  1113                 if self.rev:
  1114                     request.theme.add_msg("<strong>%s</strong><br>" % (
  1115                         _('Revision %(rev)d as of %(date)s') % {
  1116                             'rev': self.rev,
  1117                             'date': self.mtime_printable(request)
  1118                         }), "info")
  1119 
  1120                 # This redirect message is very annoying.
  1121                 # Less annoying now without the warning sign.
  1122                 if 'redirect' in request.values:
  1123                     redir = request.values['redirect']
  1124                     request.theme.add_msg('<strong>%s</strong><br>' % (
  1125                         _('Redirected from page "%(page)s"') % {'page':
  1126                             wikiutil.link_tag(request, wikiutil.quoteWikinameURL(redir) + "?action=show", self.formatter.text(redir))}), "info")
  1127                 if 'redirect' in pi:
  1128                     request.theme.add_msg('<strong>%s</strong><br>' % (
  1129                         _('This page redirects to page "%(page)s"') % {'page': wikiutil.escape(pi['redirect'])}), "info")
  1130 
  1131                 # Page trail
  1132                 trail = None
  1133                 if not print_mode:
  1134                     request.user.addTrail(self)
  1135                     trail = request.user.getTrail()
  1136 
  1137                 title = self.split_title()
  1138 
  1139                 html_head = ''
  1140                 if request.cfg.openid_server_enabled:
  1141                     openid_username = self.page_name
  1142                     userid = user.getUserId(request, openid_username)
  1143 
  1144                     if userid is None and 'openid.user' in self.pi:
  1145                         openid_username = self.pi['openid.user']
  1146                         userid = user.getUserId(request, openid_username)
  1147 
  1148                     openid_group_name = request.cfg.openid_server_restricted_users_group
  1149                     if userid is not None and (
  1150                         not openid_group_name or (
  1151                             openid_group_name in request.groups and
  1152                             openid_username in request.groups[openid_group_name])):
  1153                         html_head = '<link rel="openid2.provider" href="%s">' % \
  1154                                         wikiutil.escape(request.getQualifiedURL(self.url(request,
  1155                                                                                 querystr={'action': 'serveopenid'})), True)
  1156                         html_head += '<link rel="openid.server" href="%s">' % \
  1157                                         wikiutil.escape(request.getQualifiedURL(self.url(request,
  1158                                                                                 querystr={'action': 'serveopenid'})), True)
  1159                         html_head += '<meta http-equiv="x-xrds-location" content="%s">' % \
  1160                                         wikiutil.escape(request.getQualifiedURL(self.url(request,
  1161                                                                                 querystr={'action': 'serveopenid', 'yadis': 'ep'})), True)
  1162                     elif self.page_name == request.cfg.page_front_page:
  1163                         html_head = '<meta http-equiv="x-xrds-location" content="%s">' % \
  1164                                         wikiutil.escape(request.getQualifiedURL(self.url(request,
  1165                                                                                 querystr={'action': 'serveopenid', 'yadis': 'idp'})), True)
  1166 
  1167                 request.theme.send_title(title, page=self,
  1168                                     print_mode=print_mode,
  1169                                     media=media, pi_refresh=pi.get('refresh'),
  1170                                     allow_doubleclick=1, trail=trail,
  1171                                     html_head=html_head,
  1172                                     )
  1173 
  1174         # special pages handling, including denying access
  1175         special = None
  1176 
  1177         if not send_special:
  1178             if not page_exists and not body:
  1179                 special = 'missing'
  1180             elif not request.user.may.read(self.page_name):
  1181                 special = 'denied'
  1182 
  1183             # if we have a special page, output it, unless
  1184             #  - we should only output content (this is for say the pagelinks formatter)
  1185             #  - we have a non-default formatter
  1186             if special and not content_only and self.default_formatter:
  1187                 self._specialPageText(request, special) # this recursively calls send_page
  1188 
  1189         # if we didn't short-cut to a special page, output this page
  1190         if not special:
  1191             # start wiki content div
  1192             request.write(self.formatter.startContent(content_id))
  1193 
  1194             # parse the text and send the page content
  1195             self.send_page_content(request, body,
  1196                                    format=pi['format'],
  1197                                    format_args=pi['formatargs'],
  1198                                    do_cache=do_cache,
  1199                                    start_line=pi['lines'])
  1200 
  1201             # check for pending footnotes
  1202             if getattr(request, 'footnotes', None) and not omit_footnotes:
  1203                 from MoinMoin.macro.FootNote import emit_footnotes
  1204                 request.write(emit_footnotes(request, self.formatter))
  1205 
  1206             # end wiki content div
  1207             request.write(self.formatter.endContent())
  1208 
  1209         # end document output
  1210         if not content_only:
  1211             # send the page footer
  1212             if self.default_formatter:
  1213                 request.theme.send_footer(self.page_name, print_mode=print_mode)
  1214 
  1215             request.write(self.formatter.endDocument())
  1216 
  1217         request.clock.stop('send_page')
  1218         if not content_only and self.default_formatter:
  1219             request.theme.send_closing_html()
  1220 
  1221         # cache the pagelinks
  1222         if do_cache and self.default_formatter and page_exists:
  1223             cache = caching.CacheEntry(request, self, 'pagelinks', scope='item', use_pickle=True)
  1224             if cache.needsUpdate(self._text_filename()):
  1225                 links = self.formatter.pagelinks
  1226                 cache.update(links)
  1227 
  1228         # restore old formatter (hopefully we dont throw any exception that is catched again)
  1229         if old_formatter is no_formatter:
  1230             del request.formatter
  1231         else:
  1232             request.formatter = old_formatter
  1233 
  1234 
  1235     def getFormatterName(self):
  1236         """ Return a formatter name as used in the caching system
  1237 
  1238         @rtype: string
  1239         @return: formatter name as used in caching
  1240         """
  1241         if not hasattr(self, 'formatter') or self.formatter is None:
  1242             return ''
  1243         module = self.formatter.__module__
  1244         return module[module.rfind('.') + 1:]
  1245 
  1246     def canUseCache(self, parser=None):
  1247         """ Is caching available for this request?
  1248 
  1249         This make sure we can try to use the caching system for this
  1250         request, but it does not make sure that this will
  1251         succeed. Themes can use this to decide if a Refresh action
  1252         should be displayed.
  1253 
  1254         @param parser: the parser used to render the page
  1255         @rtype: bool
  1256         @return: if this page can use caching
  1257         """
  1258         if (not self.rev and
  1259             not self.hilite_re and
  1260             not self.__body_modified and
  1261             self.getFormatterName() in self.cfg.caching_formats):
  1262             # Everything is fine, now check the parser:
  1263             if parser is None:
  1264                 parser = wikiutil.searchAndImportPlugin(self.request.cfg, "parser", self.pi['format'])
  1265             return getattr(parser, 'caching', False)
  1266         return False
  1267 
  1268     def send_page_content(self, request, body, format='wiki', format_args='', do_cache=1, **kw):
  1269         """ Output the formatted wiki page, using caching if possible
  1270 
  1271         @param request: the request object
  1272         @param body: text of the wiki page
  1273         @param format: format of content, default 'wiki'
  1274         @param format_args: #format arguments, used by some parsers
  1275         @param do_cache: if True, use cached content
  1276         """
  1277         request.clock.start('send_page_content')
  1278         # Load the parser
  1279         Parser = wikiutil.searchAndImportPlugin(request.cfg, "parser", format)
  1280         parser = Parser(body, request, format_args=format_args, **kw)
  1281 
  1282         if not (do_cache and self.canUseCache(Parser)):
  1283             self.format(parser)
  1284         else:
  1285             try:
  1286                 code = self.loadCache(request)
  1287                 self.execute(request, parser, code)
  1288             except Exception, e:
  1289                 if not is_cache_exception(e):
  1290                     raise
  1291                 try:
  1292                     code = self.makeCache(request, parser)
  1293                     self.execute(request, parser, code)
  1294                 except Exception, e:
  1295                     if not is_cache_exception(e):
  1296                         raise
  1297                     logging.error('page cache failed after creation')
  1298                     self.format(parser)
  1299 
  1300         request.clock.stop('send_page_content')
  1301 
  1302     def format(self, parser):
  1303         """ Format and write page content without caching """
  1304         parser.format(self.formatter)
  1305 
  1306     def execute(self, request, parser, code):
  1307         """ Write page content by executing cache code """
  1308         formatter = self.formatter
  1309         request.clock.start("Page.execute")
  1310         try:
  1311             from MoinMoin.macro import Macro
  1312             macro_obj = Macro(parser)
  1313             # Fix __file__ when running from a zip package
  1314             import MoinMoin
  1315             if hasattr(MoinMoin, '__loader__'):
  1316                 __file__ = os.path.join(MoinMoin.__loader__.archive, 'dummy')
  1317             try:
  1318                 exec code
  1319             except "CacheNeedsUpdate": # convert the exception
  1320                 raise Exception("CacheNeedsUpdate")
  1321         finally:
  1322             request.clock.stop("Page.execute")
  1323 
  1324     def loadCache(self, request):
  1325         """ Return page content cache or raises 'CacheNeedsUpdate' """
  1326         cache = caching.CacheEntry(request, self, self.getFormatterName(), scope='item')
  1327         attachmentsPath = self.getPagePath('attachments', check_create=0)
  1328         if cache.needsUpdate(self._text_filename(), attachmentsPath):
  1329             raise Exception('CacheNeedsUpdate')
  1330 
  1331         import marshal
  1332         try:
  1333             return marshal.loads(cache.content())
  1334         except (EOFError, ValueError, TypeError):
  1335             # Bad marshal data, must update the cache.
  1336             # See http://docs.python.org/lib/module-marshal.html
  1337             raise Exception('CacheNeedsUpdate')
  1338         except Exception, err:
  1339             logging.info('failed to load "%s" cache: %s' %
  1340                         (self.page_name, str(err)))
  1341             raise Exception('CacheNeedsUpdate')
  1342 
  1343     def makeCache(self, request, parser):
  1344         """ Format content into code, update cache and return code """
  1345         import marshal
  1346         from MoinMoin.formatter.text_python import Formatter
  1347         formatter = Formatter(request, ["page"], self.formatter)
  1348 
  1349         # Save request state while formatting page
  1350         saved_current_lang = request.current_lang
  1351         try:
  1352             text = request.redirectedOutput(parser.format, formatter)
  1353         finally:
  1354             request.current_lang = saved_current_lang
  1355 
  1356         src = formatter.assemble_code(text)
  1357         code = compile(src.encode(config.charset),
  1358                        self.page_name.encode(config.charset), 'exec')
  1359         cache = caching.CacheEntry(request, self, self.getFormatterName(), scope='item')
  1360         cache.update(marshal.dumps(code))
  1361         return code
  1362 
  1363     def _specialPageText(self, request, special_type):
  1364         """ Output the default page content for new pages.
  1365 
  1366         @param request: the request object
  1367         """
  1368         _ = request.getText
  1369 
  1370         if special_type == 'missing':
  1371             if request.user.valid and request.user.name == self.page_name and \
  1372                request.cfg.user_homewiki in ('Self', request.cfg.interwikiname):
  1373                 page = wikiutil.getLocalizedPage(request, 'MissingHomePage')
  1374             else:
  1375                 page = wikiutil.getLocalizedPage(request, 'MissingPage')
  1376 
  1377             alternative_text = u"'''<<Action(action=edit, text=\"%s\")>>'''" % _('Create New Page')
  1378         elif special_type == 'denied':
  1379             page = wikiutil.getLocalizedPage(request, 'PermissionDeniedPage')
  1380             alternative_text = u"'''%s'''" % _('You are not allowed to view this page.')
  1381         else:
  1382             assert False
  1383 
  1384         special_exists = page.exists()
  1385 
  1386         if special_exists:
  1387             page._text_filename_force = page._text_filename()
  1388         else:
  1389             page.body = alternative_text
  1390             logging.warn('The page "%s" could not be found. Check your'
  1391                          ' underlay directory setting.' % page.page_name)
  1392         page.page_name = self.page_name
  1393 
  1394         page.send_page(content_only=True, do_cache=not special_exists, send_special=True)
  1395 
  1396 
  1397     def getRevList(self):
  1398         """ Get a page revision list of this page, including the current version,
  1399         sorted by revision number in descending order (current page first).
  1400 
  1401         @rtype: list of ints
  1402         @return: page revisions
  1403         """
  1404         revisions = []
  1405         if self.page_name:
  1406             rev_dir = self.getPagePath('revisions', check_create=0)
  1407             if os.path.isdir(rev_dir):
  1408                 for rev in filesys.dclistdir(rev_dir):
  1409                     try:
  1410                         revint = int(rev)
  1411                         revisions.append(revint)
  1412                     except ValueError:
  1413                         pass
  1414                 revisions.sort()
  1415                 revisions.reverse()
  1416         return revisions
  1417 
  1418     def olderrevision(self, rev=0):
  1419         """ Get revision of the next older page revision than rev.
  1420         rev == 0 means this page objects revision (that may be an old
  1421         revision already!)
  1422         """
  1423         if rev == 0:
  1424             rev = self.rev
  1425         revisions = self.getRevList()
  1426         for r in revisions:
  1427             if r < rev:
  1428                 older = r
  1429                 break
  1430         return older
  1431 
  1432     def getPageText(self, start=0, length=None):
  1433         """ Convenience function to get the page text, skipping the header
  1434 
  1435         @rtype: unicode
  1436         @return: page text, excluding the header
  1437         """
  1438         if length is None:
  1439             return self.data[start:]
  1440         else:
  1441             return self.data[start:start+length]
  1442 
  1443     def getPageHeader(self, start=0, length=None):
  1444         """ Convenience function to get the page header
  1445 
  1446         @rtype: unicode
  1447         @return: page header
  1448         """
  1449         header = ['#%s %s' % t for t in self.meta]
  1450         header = '\n'.join(header)
  1451         if header:
  1452             if length is None:
  1453                 return header[start:]
  1454             else:
  1455                 return header[start:start+length]
  1456         return ''
  1457 
  1458     def getPageLinks(self, request):
  1459         """ Get a list of the links on this page.
  1460 
  1461         @param request: the request object
  1462         @rtype: list
  1463         @return: page names this page links to
  1464         """
  1465         if self.exists():
  1466             cache = caching.CacheEntry(request, self, 'pagelinks', scope='item', do_locking=False, use_pickle=True)
  1467             if cache.needsUpdate(self._text_filename()):
  1468                 links = self.parsePageLinks(request)
  1469                 cache.update(links)
  1470             else:
  1471                 try:
  1472                     links = cache.content()
  1473                 except caching.CacheError:
  1474                     links = self.parsePageLinks(request)
  1475                     cache.update(links)
  1476         else:
  1477             links = []
  1478         return links
  1479 
  1480     def parsePageLinks(self, request):
  1481         """ Parse page links by formatting with a pagelinks formatter
  1482 
  1483         This is a old hack to get the pagelinks by rendering the page
  1484         with send_page. We can remove this hack after factoring
  1485         send_page and send_page_content into small reuseable methods.
  1486 
  1487         More efficient now by using special pagelinks formatter and
  1488         redirecting possible output into null file.
  1489         """
  1490         pagename = self.page_name
  1491         if request.parsePageLinks_running.get(pagename, False):
  1492             #logging.debug("avoid recursion for page %r" % pagename)
  1493             return [] # avoid recursion
  1494 
  1495         #logging.debug("running parsePageLinks for page %r" % pagename)
  1496         # remember we are already running this function for this page:
  1497         request.parsePageLinks_running[pagename] = True
  1498 
  1499         request.clock.start('parsePageLinks')
  1500 
  1501         class Null:
  1502             def write(self, data):
  1503                 pass
  1504 
  1505         request.redirect(Null())
  1506         request.mode_getpagelinks += 1
  1507         #logging.debug("mode_getpagelinks == %r" % request.mode_getpagelinks)
  1508         try:
  1509             try:
  1510                 from MoinMoin.formatter.pagelinks import Formatter
  1511                 formatter = Formatter(request, store_pagelinks=1)
  1512                 page = Page(request, pagename, formatter=formatter)
  1513                 page.send_page(content_only=1)
  1514             except:
  1515                 logging.exception("pagelinks formatter failed, traceback follows")
  1516         finally:
  1517             request.mode_getpagelinks -= 1
  1518             #logging.debug("mode_getpagelinks == %r" % request.mode_getpagelinks)
  1519             request.redirect()
  1520             if hasattr(request, '_fmt_hd_counters'):
  1521                 del request._fmt_hd_counters
  1522             request.clock.stop('parsePageLinks')
  1523         return formatter.pagelinks
  1524 
  1525     def getCategories(self, request):
  1526         """ Get categories this page belongs to.
  1527 
  1528         @param request: the request object
  1529         @rtype: list
  1530         @return: categories this page belongs to
  1531         """
  1532         return wikiutil.filterCategoryPages(request, self.getPageLinks(request))
  1533 
  1534     def getParentPage(self):
  1535         """ Return parent page or None
  1536 
  1537         @rtype: Page
  1538         @return: parent page or None
  1539         """
  1540         if self.page_name:
  1541             pos = self.page_name.rfind('/')
  1542             if pos > 0:
  1543                 parent = Page(self.request, self.page_name[:pos])
  1544                 if parent.exists():
  1545                     return parent
  1546         return None
  1547 
  1548     def getACL(self, request):
  1549         """ Get cached ACLs of this page.
  1550 
  1551         Return cached ACL or invoke parseACL and update the cache.
  1552 
  1553         @param request: the request object
  1554         @rtype: MoinMoin.security.AccessControlList
  1555         @return: ACL of this page
  1556         """
  1557         try:
  1558             return self.__acl # for request.page, this is n-1 times used
  1559         except AttributeError:
  1560             # the caching here is still useful for pages != request.page,
  1561             # when we have multiple page objects for the same page name.
  1562             request.clock.start('getACL')
  1563             # Try the cache or parse acl and update the cache
  1564             currentRevision = self.current_rev()
  1565             cache_name = self.page_name
  1566             cache_key = 'acl'
  1567             cache_data = request.cfg.cache.meta.getItem(request, cache_name, cache_key)
  1568             if cache_data is None:
  1569                 aclRevision, acl = None, None
  1570             else:
  1571                 aclRevision, acl = cache_data
  1572             #logging.debug("currrev: %r, cachedaclrev: %r" % (currentRevision, aclRevision))
  1573             if aclRevision != currentRevision:
  1574                 acl = self.parseACL()
  1575                 if currentRevision != 99999999:
  1576                     # don't use cache for non existing pages
  1577                     # otherwise in the process of creating copies by filesys.copytree (PageEditor.copyPage)
  1578                     # the first may test will create a cache entry with the default_acls for a non existing page
  1579                     # At the time the page is created acls on that page would be ignored until the process
  1580                     # is completed by adding a log entry into edit-log
  1581                     cache_data = (currentRevision, acl)
  1582                     request.cfg.cache.meta.putItem(request, cache_name, cache_key, cache_data)
  1583             self.__acl = acl
  1584             request.clock.stop('getACL')
  1585             return acl
  1586 
  1587     def parseACL(self):
  1588         """ Return ACLs parsed from the last available revision
  1589 
  1590         The effective ACL is always from the last revision, even if
  1591         you access an older revision.
  1592         """
  1593         from MoinMoin import security
  1594         if self.exists() and self.rev == 0:
  1595             return self.pi['acl']
  1596         try:
  1597             lastRevision = self.getRevList()[0]
  1598         except IndexError:
  1599             return security.AccessControlList(self.request.cfg)
  1600         if self.rev == lastRevision:
  1601             return self.pi['acl']
  1602 
  1603         return Page(self.request, self.page_name, rev=lastRevision).parseACL()
  1604 
  1605     # Text format -------------------------------------------------------
  1606 
  1607     def encodeTextMimeType(self, text):
  1608         """ Encode text from moin internal representation to text/* mime type
  1609 
  1610         Make sure text uses CRLF line ends, keep trailing newline.
  1611 
  1612         @param text: text to encode (unicode)
  1613         @rtype: unicode
  1614         @return: encoded text
  1615         """
  1616         if text:
  1617             lines = text.splitlines()
  1618             # Keep trailing newline
  1619             if text.endswith(u'\n') and not lines[-1] == u'':
  1620                 lines.append(u'')
  1621             text = u'\r\n'.join(lines)
  1622         return text
  1623 
  1624     def decodeTextMimeType(self, text):
  1625         """ Decode text from text/* mime type to moin internal representation
  1626 
  1627         @param text: text to decode (unicode). Text must use CRLF!
  1628         @rtype: unicode
  1629         @return: text using internal representation
  1630         """
  1631         text = text.replace(u'\r', u'')
  1632         return text
  1633 
  1634     def isConflict(self):
  1635         """ Returns true if there is a known editing conflict for that page.
  1636 
  1637         @return: true if there is a known conflict.
  1638         """
  1639 
  1640         cache = caching.CacheEntry(self.request, self, 'conflict', scope='item')
  1641         return cache.exists()
  1642 
  1643     def setConflict(self, state):
  1644         """ Sets the editing conflict flag.
  1645 
  1646         @param state: bool, true if there is a conflict.
  1647         """
  1648         cache = caching.CacheEntry(self.request, self, 'conflict', scope='item')
  1649         if state:
  1650             cache.update("") # touch it!
  1651         else:
  1652             cache.remove()
  1653 
  1654 
  1655 class RootPage(Page):
  1656     """ These functions were removed from the Page class to remove hierarchical
  1657         page storage support until after we have a storage api (and really need it).
  1658         Currently, there is only 1 instance of this class: request.rootpage
  1659     """
  1660     def __init__(self, request):
  1661         page_name = u''
  1662         Page.__init__(self, request, page_name)
  1663 
  1664     def getPageBasePath(self, use_underlay=0):
  1665         """ Get full path to a page-specific storage area. `args` can
  1666             contain additional path components that are added to the base path.
  1667 
  1668         @param use_underlay: force using a specific pagedir, default 0:
  1669                                 1 = use underlay page dir
  1670                                 0 = use standard page dir
  1671                                 Note: we do NOT have special support for -1
  1672                                       here, that will just behave as 0!
  1673         @rtype: string
  1674         @return: int underlay,
  1675                  str the full path to the storage area
  1676         """
  1677         if self.cfg.data_underlay_dir is None:
  1678             use_underlay = 0
  1679 
  1680         # 'auto' doesn't make sense here. maybe not even 'underlay':
  1681         if use_underlay == 1:
  1682             underlay, path = 1, self.cfg.data_underlay_dir
  1683         # no need to check 'standard' case, we just use path in that case!
  1684         else:
  1685             # this is the location of the virtual root page
  1686             underlay, path = 0, self.cfg.data_dir
  1687 
  1688         return underlay, path
  1689 
  1690     def getPageList(self, user=None, exists=1, filter=None, include_underlay=True, return_objects=False):
  1691         """ List user readable pages under current page
  1692 
  1693         Currently only request.rootpage is used to list pages, but if we
  1694         have true sub pages, any page can list its sub pages.
  1695 
  1696         The default behavior is listing all the pages readable by the
  1697         current user. If you want to get a page list for another user,
  1698         specify the user name.
  1699 
  1700         If you want to get the full page list, without user filtering,
  1701         call with user="". Use this only if really needed, and do not
  1702         display pages the user can not read.
  1703 
  1704         filter is usually compiled re match or search method, but can be
  1705         any method that get a unicode argument and return bool. If you
  1706         want to filter the page list, do it with this filter function,
  1707         and NOT on the output of this function. page.exists() and
  1708         user.may.read are very expensive, and should be done on the
  1709         smallest data set.
  1710 
  1711         @param user: the user requesting the pages (MoinMoin.user.User)
  1712         @param filter: filter function
  1713         @param exists: filter existing pages
  1714         @param include_underlay: determines if underlay pages are returned as well
  1715         @param return_objects: lets it return a list of Page objects instead of
  1716             names
  1717         @rtype: list of unicode strings
  1718         @return: user readable wiki page names
  1719         """
  1720         request = self.request
  1721         request.clock.start('getPageList')
  1722         # Check input
  1723         if user is None:
  1724             user = request.user
  1725 
  1726         # Get pages cache or create it
  1727         cachedlist = request.cfg.cache.pagelists.getItem(request, 'all', None)
  1728         if cachedlist is None:
  1729             cachedlist = {}
  1730             for name in self._listPages():
  1731                 # Unquote file system names
  1732                 pagename = wikiutil.unquoteWikiname(name)
  1733 
  1734                 # Filter those annoying editor backups - current moin does not create
  1735                 # those pages any more, but users have them already in data/pages
  1736                 # until we remove them by a mig script...
  1737                 if pagename.endswith(u'/MoinEditorBackup'):
  1738                     continue
  1739 
  1740                 cachedlist[pagename] = None
  1741             request.cfg.cache.pagelists.putItem(request, 'all', None, cachedlist)
  1742 
  1743         if user or exists or filter or not include_underlay or return_objects:
  1744             # Filter names
  1745             pages = []
  1746             for name in cachedlist:
  1747                 # First, custom filter - exists and acl check are very
  1748                 # expensive!
  1749                 if filter and not filter(name):
  1750                     continue
  1751 
  1752                 page = Page(request, name)
  1753 
  1754                 # Filter underlay pages
  1755                 if not include_underlay and page.getPageStatus()[0]: # is an underlay page
  1756                     continue
  1757 
  1758                 # Filter deleted pages
  1759                 if exists and not page.exists():
  1760                     continue
  1761 
  1762                 # Filter out page user may not read.
  1763                 if user and not user.may.read(name):
  1764                     continue
  1765 
  1766                 if return_objects:
  1767                     pages.append(page)
  1768                 else:
  1769                     pages.append(name)
  1770         else:
  1771             pages = cachedlist.keys()
  1772 
  1773         request.clock.stop('getPageList')
  1774         return pages
  1775 
  1776     def getPageDict(self, user=None, exists=1, filter=None, include_underlay=True):
  1777         """ Return a dictionary of filtered page objects readable by user
  1778 
  1779         Invoke getPageList then create a dict from the page list. See
  1780         getPageList docstring for more details.
  1781 
  1782         @param user: the user requesting the pages
  1783         @param filter: filter function
  1784         @param exists: only existing pages
  1785         @rtype: dict {unicode: Page}
  1786         @return: user readable pages
  1787         """
  1788         pages = {}
  1789         for name in self.getPageList(user=user, exists=exists, filter=filter, include_underlay=include_underlay):
  1790             pages[name] = Page(self.request, name)
  1791         return pages
  1792 
  1793     def _listPages(self):
  1794         """ Return a list of file system page names
  1795 
  1796         This is the lowest level disk access, don't use it unless you
  1797         really need it.
  1798 
  1799         NOTE: names are returned in file system encoding, not in unicode!
  1800 
  1801         @rtype: dict
  1802         @return: dict of page names using file system encoding
  1803         """
  1804         # Get pages in standard dir
  1805         path = self.getPagePath('pages')
  1806         pages = self._listPageInPath(path)
  1807 
  1808         if self.cfg.data_underlay_dir is not None:
  1809             # Merge with pages from underlay
  1810             path = self.getPagePath('pages', use_underlay=1)
  1811             underlay = self._listPageInPath(path)
  1812             pages.update(underlay)
  1813 
  1814         return pages
  1815 
  1816     def _listPageInPath(self, path):
  1817         """ List page names in domain, using path
  1818 
  1819         This is the lowest level disk access, don't use it unless you
  1820         really need it.
  1821 
  1822         NOTE: names are returned in file system encoding, not in unicode!
  1823 
  1824         @param path: directory to list (string)
  1825         @rtype: dict
  1826         @return: dict of page names using file system encoding
  1827         """
  1828         pages = {}
  1829         for name in filesys.dclistdir(path):
  1830             # Filter non-pages in quoted wiki names
  1831             # List all pages in pages directory - assume flat namespace.
  1832             # We exclude everything starting with '.' to get rid of . and ..
  1833             # directory entries. If we ever create pagedirs starting with '.'
  1834             # it will be with the intention to have them not show up in page
  1835             # list (like .name won't show up for ls command under UNIX).
  1836             # Note that a . within a wiki page name will be quoted to (2e).
  1837             if not name.startswith('.'):
  1838                 pages[name] = None
  1839 
  1840         if 'CVS' in pages:
  1841             del pages['CVS'] # XXX DEPRECATED: remove this directory name just in
  1842                              # case someone has the pages dir under CVS control.
  1843         return pages
  1844 
  1845     def getPageCount(self, exists=0):
  1846         """ Return page count
  1847 
  1848         The default value does the fastest listing, and return count of
  1849         all pages, including deleted pages, ignoring acl rights.
  1850 
  1851         If you want to get a more accurate number, call with
  1852         exists=1. This will be about 100 times slower though.
  1853 
  1854         @param exists: filter existing pages
  1855         @rtype: int
  1856         @return: number of pages
  1857         """
  1858         self.request.clock.start('getPageCount')
  1859         if exists:
  1860             # WARNING: SLOW
  1861             pages = self.getPageList(user='')
  1862         else:
  1863             pages = self._listPages()
  1864         count = len(pages)
  1865         self.request.clock.stop('getPageCount')
  1866 
  1867         return count