view MoinMoin/action/ @ 1529:76b4d2324d25

MoinMoin should not record the homepage links as pagelinks, fixes MoinMoinBugs/PageLinksIncludeUnrelatedLinks.
author Alexander Schremmer <alex AT alexanderweb DOT de>
date Tue, 05 Sep 2006 00:29:59 +0200
parents 5e4d0d49dd60
children ed44b93599b5
line wrap: on
line source
# -*- coding: iso-8859-1 -*-
    MoinMoin - Spelling Action
    Word adding based on code by Christian Bird <> 

    This action checks for spelling errors in a page using one or several
    word lists.

    MoinMoin looks for dictionary files in the directory "dict" within the
    MoinMoin package directory. To load the default UNIX word files, you
    have to manually create symbolic links to those files (usually
    '/usr/dict/words' or '/usr/share/dict/words').

    Additionally, all words on the page "LocalSpellingWords" are added to
    the list of valid words, if that page exists.

    @copyright: 2001 by Richard Jones <>  
    @copyright: 2001-2004 by Jürgen Hermann <>  
    @license: GNU GPL, see COPYING for details.  

import os, re, codecs
from MoinMoin import config, wikiutil
from MoinMoin.Page import Page

def _getWordsFiles(request):
    """Check a list of possible word files"""
    candidates = []

    # load a list of possible word files
    for basedir in (request.cfg.moinmoin_dir, request.cfg.data_dir):
        localdict = os.path.join(basedir, 'dict')
        if os.path.isdir(localdict):
                lambda f, d=localdict: os.path.join(d, f), os.listdir(localdict)))

    # validate candidate list (leave out directories!)
    wordsfiles = []
    for file in candidates:
        if os.path.isfile(file) and os.access(file, os.F_OK | os.R_OK):

    # return validated file list
    return wordsfiles

def _loadWords(lines, dict):
    for line in lines:
        words = line.split()
        for word in words:
            dict[word.encode(config.charset)] = ''

def _loadWordsFile(request, dict, filename):
            file =, 'rt', config.charset)
            lines = file.readlines()
        except UnicodeError:
            file =, 'rt', 'iso-8859-1')
            lines = file.readlines()
    _loadWords(lines, dict)

def _loadWordsPage(request, dict, page):
    lines = page.getlines()
    _loadWords(lines, dict)

def _loadDict(request):
    """ Load words from words files or cached dict """
    # check for "dbhash" module
        import dbhash
    except ImportError:
        dbhash = None

    # load the words
    cachename = os.path.join(request.cfg.data_dir, 'cache', 'spellchecker.dict')
    if dbhash and os.path.exists(cachename):
        wordsdict =, "r")
        wordsfiles = _getWordsFiles(request)
        if dbhash:
            wordsdict =, 'n')
            wordsdict = {}

        for wordsfile in wordsfiles:
            _loadWordsFile(request, wordsdict, wordsfile)

        if dbhash:

    return wordsdict

def _addLocalWords(request):
    from MoinMoin.PageEditor import PageEditor

    # get the new words as a string (if any are marked at all)
        newwords = request.form['newwords']
    except KeyError:
        # no new words checked
    newwords = u' '.join(newwords)

    # get the page contents
    lsw_page = PageEditor(request, request.cfg.page_local_spelling_words)
    words = lsw_page.get_raw_body()

    # add the words to the page and save it
    if words and words[-1] != '\n':
        words = words + '\n'
    lsw_page.saveText(words + '\n' + newwords, 0)

def checkSpelling(page, request, own_form=1):
    """ Do spell checking, return a tuple with the result.
    _ = request.getText

    # first check to see if we we're called with a "newwords" parameter
    if request.form.has_key('button_newwords'):

    # load words
    wordsdict = _loadDict(request)

    localwords = {}
    lsw_page = Page(request, request.cfg.page_local_spelling_words)
    if lsw_page.exists():
        _loadWordsPage(request, localwords, lsw_page)

    # init status vars & load page
    badwords = {}
    text = page.get_raw_body()

    # checker regex and matching substitute function
    word_re = re.compile(r'([%s]?[%s]+)' % (
        config.chars_upper, config.chars_lower), re.UNICODE)

    def checkword(match, wordsdict=wordsdict, badwords=badwords,
            localwords=localwords, num_re=re.compile(r'^\d+$', re.UNICODE)):
        word =
        if len(word) == 1:
            return ""
        w_enc = word.encode(config.charset)
        wl_enc = word.lower().encode(config.charset)
        if not (wordsdict.has_key(w_enc) or
                wordsdict.has_key(wl_enc) or
                localwords.has_key(w_enc) or
                localwords.has_key(wl_enc) ):
            if not num_re.match(word):
                badwords[word] = 1
        return ""

    # do the checking
    for line in text.split('\n'):
        if line == '' or line[0] == '#': continue
        word_re.sub(checkword, line)

    if badwords:
        badwords = badwords.keys()
        badwords.sort(lambda x, y: cmp(x.lower(), y.lower()))

        # build regex recognizing the bad words
        badwords_re = r'(^|(?<!\w))(%s)(?!\w)'
        badwords_re = badwords_re % ("|".join(map(re.escape, badwords)),)
        badwords_re = re.compile(badwords_re, re.UNICODE)

        lsw_msg = ''
        if localwords:
            lsw_msg = ' ' + _('(including %(localwords)d %(pagelink)s)') % {
                'localwords': len(localwords), 'pagelink': lsw_page.link_to(request)}
        msg = _('The following %(badwords)d words could not be found in the dictionary of '
                '%(totalwords)d words%(localwords)s and are highlighted below:') % {
            'badwords': len(badwords),
            'totalwords': len(wordsdict)+len(localwords),
            'localwords': lsw_msg} + "<br>"

        # figure out what this action is called
        action_name = os.path.splitext(os.path.basename(__file__))[0]

        # add a form containing the bad words
        if own_form:
            msg = msg + ('<form method="post" action="">\n'
                         '<input type="hidden" name="action" value="%s">\n') % action_name

        checkbox = '<input type="checkbox" name="newwords" value="%(word)s">%(word)s&nbsp;&nbsp;'
        msg = msg + (
            " ".join(map(lambda w, cb=checkbox: cb % {'word': wikiutil.escape(w), }, badwords)) +
            '<p><input type="submit" name="button_newwords" value="%s"></p>' %
                _('Add checked words to dictionary')
        if own_form:
            msg = msg + '</form>'
        badwords_re = None
        msg = _("No spelling errors found!")


    return badwords, badwords_re, msg

def execute(pagename, request):
    _ = request.getText
    page = Page(request, pagename)
        badwords, badwords_re, msg = checkSpelling(page, request)
        badwords = []
        msg = _("You can't check spelling on a page you can't read.")

    if badwords:
        page.send_page(request, msg=msg, hilite_re=badwords_re)
        page.send_page(request, msg=msg)