view contrib/googleimport/driver.py @ 5729:4507be75514a

googleimport: get rid of line anchors and lineXXX class attrs
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sat, 20 Nov 2010 20:57:38 +0100
parents bbfe34d68e6c
children d70b5147ef0b
line wrap: on
line source
#!/usr/bin/env python
"""
MoinMoin wiki project -> Google Project Hosting converter

full of evil antipatterns, incl. Exception exceptions

@copyright: 2007,2010 MoinMoin:AlexanderSchremmer
@license: GNU GPL v2
"""

import sys
import re
import urllib2
from urllib import quote
import xmlrpclib
import csv

from MoinMoin.web.contexts import ScriptContext
from MoinMoin.Page import Page

# monkeypatch the formatter to avoid line_anchors:
from MoinMoin.formatter import text_html
text_html.line_anchors = False

request = ScriptContext(None, None)


class DataNotFoundException(Exception): pass


class Task(object):
    def __init__(self, summary, desc, label, hours, mentors, difficulty, types):
        self.summary = summary
        self.label = label
        self.hours = hours
        self.mentors = mentors
        self.difficulty = difficulty
        self.types = types

        page = Page(request, "")
        page.set_raw_body(desc)
        desc = request.redirectedOutput(page.send_page, content_only=1)
        for s, r in [
                ('\n', ' '),
                (' class="line862"', ''),
                (' class="line867"', ''),
                (' class="line874"', ''),
                (' class="line891"', ''),
            ]:
            desc = desc.replace(s, r)
        self.desc = desc

    def __repr__(self):
        return (u"<Task summary=%r label=%r hours=%i mentors=%r difficulty=%r types=%r desc='''%s'''>" % (
            self.summary, self.label, self.hours, self.mentors, self.difficulty,
            self.types, self.desc[:100])).encode("utf-8")


def find_dict_entry(name, text):
    m = re.search(r"^ %s:: (.*)$" % (name, ), text, re.M | re.U)
    if not m:
        raise DataNotFoundException("%s not found" % (name, ))
    return m.groups()[0]


desc_pattern = r"""= Description =
([\s\S]*?)
= Discussion ="""

bugpage_pattern = r"""= Description =
([\s\S]*?)
="""

already_pushed_pages = set([x.strip() for x in """
""".split("\n")])

already_pushed_bugs = set([x.strip() for x in """
""".split("\n")])

gatherers = []


class Collector(object):
    def is_gatherer(function):
        gatherers.append(function)
        return function

    def __init__(self, url):
        self.url = url
        self.server = xmlrpclib.ServerProxy(url + "?action=xmlrpc2")

    def collect_tasks(self):
        tasks = []
        for gatherer in gatherers:
            new = list(gatherer(self))
            tasks.extend(new)

        return tasks

    @is_gatherer
    def easytodo_pages(self):
        pages = self.server.getAllPagesEx(dict(prefix="EasyToDo/"))
        for page in pages:
            if page in already_pushed_pages:
                continue
            page_contents = self.server.getPage(page)
            try:
                summary = find_dict_entry("Title", page_contents)
                count = int(find_dict_entry("Count", page_contents))
                label = find_dict_entry("Tags", page_contents)
                hours = int(find_dict_entry("Duration", page_contents))
                mentors = find_dict_entry("Mentors", page_contents)
                difficulty = find_dict_entry("Difficulty", page_contents)
                types = find_dict_entry("Type", page_contents)
            except (DataNotFoundException, ValueError), e:
                print >>sys.stderr, "Could not import %r because of %r" % (page, e)
                continue
            desc_m = re.search(desc_pattern, page_contents)
            if not desc_m:
                raise Exception("Desc not found")
            desc = desc_m.groups()[0]

            for i in range(1, count + 1):
                text = desc
                new_summary = summary
                text += "\n\nYou can discuss this issue at the MoinMoin wiki: %s" % (self.url + quote(page.encode("utf-8")), )
                if count > 1:
                    text += "\n\nThis issue is available multiple times. This one is %i of %i." % (i, count)
                    new_summary += " %i/%i" % (i, count)
                yield Task(new_summary, text, label, hours, mentors, difficulty, types)

    #@is_gatherer
    def moin_bugs(self):
        pages = [pagename for pagename, contents in self.server.searchPages(r"t:MoinMoinBugs/ r:CategoryEasy\b")]
        for page in pages:
            bug_name = page.replace("MoinMoinBugs/", "")
            if bug_name in already_pushed_bugs:
                continue
            page_contents = self.server.getPage(page)
            m = re.search(bugpage_pattern, page_contents)
            if not m:
                raise Exception("bug desc not found")
            desc = m.groups()[0]
            desc = "A user filed a bug report at the MoinMoin site. Here is a short description about the issue. A more detailed description is available at the MoinMoin wiki: %s\n\n" % (self.url + quote(page.encode("utf-8")), ) + desc
            yield Task(bug_name, desc, "Code")

    #@is_gatherer
    def translation_items(self):
        #languages = self.server.getPage(u"EasyToDoTranslation/Languages").strip().splitlines()
        #languages = ["Lithuanian (lt)"]
        languages = []
        for language in languages:
            page = u"EasyToDoTranslation"
            page_contents = self.server.getPage(page)
            page_contents = page_contents.replace("LANG", language)
            summary = find_dict_entry("Summary", page_contents)
            count = int(find_dict_entry("Count", page_contents))
            desc_m = re.search(desc_pattern, page_contents)
            if not desc_m:
                raise Exception("Desc not found")
            desc = desc_m.groups()[0]
            for i in range(1, count + 1):
                text = desc
                new_summary = summary
                text += "\n\nA more detailed description of this task is available at the MoinMoin wiki: %s" % (self.url + quote(page.encode("utf-8")), )
                if count > 1:
                    text += "\n\nThis task is available multiple times. This one is %i of %i." % (i, count)
                    new_summary += " %i/%i" % (i, count)
                yield Task(new_summary, text, "Translation")


def pull_and_gencsv():
    print >> sys.stderr, "Collecting tasks ..."
    tasks = Collector("http://moinmo.in/").collect_tasks()
    print >> sys.stderr, "Importing %i tasks ..." % (len(tasks), )
    print >> sys.stderr, "\n".join(repr(task) for task in tasks)

    summary_prefix = '' # "[TEST] " # EMPTY FOR PRODUCTION IMPORT!
    tmin, tmax = 0, None
    csvwriter = csv.writer(sys.stdout, delimiter=",", doublequote=True)
    for task in tasks[tmin:tmax]:
        csvwriter.writerow([summary_prefix + task.summary, task.desc, task.hours, task.mentors, task.difficulty, task.types, task.label])


if __name__ == "__main__":
    pull_and_gencsv()