changeset 1334:7775b101e182

implement EXTERNALLINKS metadata (modified/extended patch of Reimar Bauer) the link converter now extracts all external (non-wiki / non-wiki.local) links and writes them into meta[EXTERNALLINKS]. Potential usage scenarios: e.g. link checkers, anti-spam, ...
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sat, 21 Apr 2012 05:27:18 +0200
parents 27998494c507
children 792712fe85e8
files MoinMoin/constants/keys.py MoinMoin/converter/_tests/test_link.py MoinMoin/converter/link.py MoinMoin/storage/middleware/indexing.py
diffstat 4 files changed, 64 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/constants/keys.py	Sat Apr 21 02:43:10 2012 +0200
+++ b/MoinMoin/constants/keys.py	Sat Apr 21 05:27:18 2012 +0200
@@ -29,6 +29,7 @@
 CONTENTTYPE = "contenttype"
 SIZE = "size"
 LANGUAGE = "language"
+EXTERNALLINKS = "externallinks"
 ITEMLINKS = "itemlinks"
 ITEMTRANSCLUSIONS = "itemtransclusions"
 TAGS = "tags"
--- a/MoinMoin/converter/_tests/test_link.py	Sat Apr 21 02:43:10 2012 +0200
+++ b/MoinMoin/converter/_tests/test_link.py	Sat Apr 21 05:27:18 2012 +0200
@@ -65,6 +65,16 @@
         for i in pairs:
             yield (self._do_wikilocal, ) + i
 
+    def test_wikiexternal(self):
+        pairs = [
+            ('http://moinmo.in/',
+             'http://moinmo.in/'),
+            ('mailto:foo.bar@example.org',
+             'mailto:foo.bar@example.org'),
+        ]
+        for i in pairs:
+            yield (self._do_wikiexternal, ) + i
+
     def _do_wiki(self, input, output, skip=None):
         if skip:
             pytest.skip(skip)
@@ -79,6 +89,14 @@
         self.conv.handle_wikilocal_links(elem, Iri(input), Iri(page))
         assert elem.get(xlink.href) == output
 
+    def _do_wikiexternal(self, input, output, skip=None):
+        if skip:
+            pytest.skip(skip)
+        elem = ET.Element(None)
+        self.conv.handle_external_links(elem, Iri(input))
+        href = elem.get(xlink.href)
+        assert href == output
+
 
 class TestConverterRefs(object):
     def setup_class(self):
@@ -99,8 +117,9 @@
         """
         transclusions_expected = [u"moin_transcluded", u"moin2_transcluded"]
         links_expected = [u"moin_linked", u"moin2_linked"]
+        external_expected = []
 
-        self.runItemTest(tree_xml, links_expected, transclusions_expected)
+        self.runItemTest(tree_xml, links_expected, transclusions_expected, external_expected)
 
     def testRelativeItems(self):
         tree_xml = u"""
@@ -112,17 +131,33 @@
         """
         transclusions_expected = [u"Home/Subpage/moin2_transcluded", u"moin_transcluded"]
         links_expected = [u"moin_linked", u"Home/Subpage/moin2_linked"]
+        external_expected = []
 
-        self.runItemTest(tree_xml, links_expected, transclusions_expected)
+        self.runItemTest(tree_xml, links_expected, transclusions_expected, external_expected)
 
-    def runItemTest(self, tree_xml, links_expected, transclusions_expected):
+    def testExternal(self):
+        tree_xml = u"""
+        <ns0:page ns0:page-href="wiki:///Home/Subpage" xmlns:ns0="http://moinmo.in/namespaces/page" xmlns:ns1="http://www.w3.org/1999/xlink" xmlns:ns2="http://www.w3.org/2001/XInclude">
+        <ns0:body><ns0:p><ns0:a ns1:href="http://example.org/">test</ns0:a>
+        <ns0:a ns1:href="mailto:foo.bar@example.org">test</ns0:a>
+        </ns0:p></ns0:body></ns0:page>
+        """
+        transclusions_expected = []
+        links_expected = []
+        external_expected = [u"http://example.org/", u"mailto:foo.bar@example.org"]
+
+        self.runItemTest(tree_xml, links_expected, transclusions_expected, external_expected)
+
+    def runItemTest(self, tree_xml, links_expected, transclusions_expected, external_expected):
         tree = ET.XML(tree_xml)
         self.converter(tree)
         links_result = self.converter.get_links()
         transclusions_result = self.converter.get_transclusions()
+        external_result = self.converter.get_external_links()
 
         # sorting instead of sets
         # so that we avoid deduplicating duplicated items in the result
         assert sorted(links_result) == sorted(links_expected)
         assert sorted(transclusions_result) == sorted(transclusions_expected)
+        assert sorted(external_result) == sorted(external_expected)
 
--- a/MoinMoin/converter/link.py	Sat Apr 21 02:43:10 2012 +0200
+++ b/MoinMoin/converter/link.py	Sat Apr 21 05:27:18 2012 +0200
@@ -36,6 +36,9 @@
     def handle_wikilocal_transclusions(self, elem, link, page_name):
         pass
 
+    def handle_external_links(self, elem, link):
+        pass
+
     def __call__(self, *args, **kw):
         """
         Calls the self.traverse_tree method
@@ -63,7 +66,7 @@
             elif xlink_href.scheme == 'wiki':
                 self.handle_wiki_links(elem, xlink_href)
             elif xlink_href.scheme:
-                elem.set(moin_page.class_, 'moin-' + xlink_href.scheme)
+                self.handle_external_links(elem, xlink_href)
 
         elif xinclude_href:
             xinclude_href = Iri(xinclude_href)
@@ -175,6 +178,10 @@
         link = Iri(url, query=query, fragment=input.fragment)
         elem.set(self._tag_xlink_href, link)
 
+    def handle_external_links(self, elem, input):
+        elem.set(self._tag_xlink_href, input)
+        elem.set(moin_page.class_, 'moin-' + input.scheme)
+
 
 class ConverterItemRefs(ConverterBase):
     """
@@ -189,6 +196,7 @@
         super(ConverterItemRefs, self).__init__(**kw)
         self.links = set()
         self.transclusions = set()
+        self.external_links = set()
 
     def __call__(self, *args, **kw):
         """
@@ -198,6 +206,7 @@
         # in the handle methods
         self.links = set()
         self.transclusions = set()
+        self.external_links = set()
 
         super(ConverterItemRefs, self).__call__(*args, **kw)
 
@@ -229,6 +238,14 @@
         path = self.absolute_path(path, page.path)
         self.transclusions.add(unicode(path))
 
+    def handle_external_links(self, elem, input):
+        """
+        Adds the link item from the input param to self.external_links
+        :param elem: the element of the link
+        :param input: the iri of the link
+        """
+        self.external_links.add(unicode(input))
+
     def get_links(self):
         """
         return a list of unicode link target item names
@@ -241,6 +258,11 @@
         """
         return list(self.transclusions)
 
+    def get_external_links(self):
+        """
+        return a list of unicode external links target item names
+        """
+        return list(self.external_links)
 
 from . import default_registry
 default_registry.register(ConverterExternOutput._factory, type_moin_document, type_moin_document)
--- a/MoinMoin/storage/middleware/indexing.py	Sat Apr 21 02:43:10 2012 +0200
+++ b/MoinMoin/storage/middleware/indexing.py	Sat Apr 21 05:27:18 2012 +0200
@@ -77,7 +77,7 @@
 
 from MoinMoin.config import WIKINAME, NAME, NAME_EXACT, MTIME, CONTENTTYPE, TAGS, \
                             LANGUAGE, USERID, ADDRESS, HOSTNAME, SIZE, ACTION, COMMENT, \
-                            CONTENT, ITEMLINKS, ITEMTRANSCLUSIONS, ACL, EMAIL, OPENID, \
+                            CONTENT, EXTERNALLINKS, ITEMLINKS, ITEMTRANSCLUSIONS, ACL, EMAIL, OPENID, \
                             ITEMID, REVID, CURRENT, PARENTID, \
                             LATEST_REVS, ALL_REVS, \
                             CONTENTTYPE_USER
@@ -187,6 +187,7 @@
                 # side effect: we update some metadata:
                 meta[ITEMLINKS] = refs_conv.get_links()
                 meta[ITEMTRANSCLUSIONS] = refs_conv.get_transclusions()
+                meta[EXTERNALLINKS] = refs_conv.get_external_links()
             doc = output_conv(doc)
             return doc
         # no way