Mercurial > moin > 1.9
comparison MoinMoin/user.py @ 6028:1893da1d5213
userid lookup caches: use 1 on-disk cache file, update cache rather than rebuild
Before this, we maintained one cache file per attribute (e.g. name2id, openid2id,
...) - the related code did multiple passes over all user profiles to rebuild these
cache files.
Now doing a one-pass rebuild, writing all attribute -> userid mappings into
one on-disk cache file called "lookup".
Additionally to "name" and "openids", support fast lookup for "email" and "jid" also.
On profile save, we use to just kill the cache and let it rebuild. Now the cache
is read, updated and written back (which is much less expensive for wikis with more
than a few users).
Did some refactoring also, reducing duplication, breaking down the code into smaller
functions / methods.
author | Thomas Waldmann <tw AT waldmann-edv DOT de> |
---|---|
date | Wed, 12 Feb 2014 18:22:10 +0100 |
parents | 8618232296b5 |
children | e2da1c1183d8 |
comparison
equal
deleted
inserted
replaced
6027:8618232296b5 | 6028:1893da1d5213 |
---|---|
35 | 35 |
36 from MoinMoin import config, caching, wikiutil, i18n, events | 36 from MoinMoin import config, caching, wikiutil, i18n, events |
37 from werkzeug.security import safe_str_cmp as safe_str_equal | 37 from werkzeug.security import safe_str_cmp as safe_str_equal |
38 from MoinMoin.util import timefuncs, random_string | 38 from MoinMoin.util import timefuncs, random_string |
39 from MoinMoin.wikiutil import url_quote_plus | 39 from MoinMoin.wikiutil import url_quote_plus |
40 | |
41 # for efficient lookup <attr> -> userid, we keep an index of this in the cache. | |
42 # the attribute names in here should be uniquely identifying a user. | |
43 CACHED_USER_ATTRS = ['name', 'email', 'jid', 'openids', ] | |
40 | 44 |
41 | 45 |
42 def getUserList(request): | 46 def getUserList(request): |
43 """ Get a list of all (numerical) user IDs. | 47 """ Get a list of all (numerical) user IDs. |
44 | 48 |
67 def get_by_jabber_id(request, jabber_id): | 71 def get_by_jabber_id(request, jabber_id): |
68 """ Searches for an user with a perticular jabber id and returns it. """ | 72 """ Searches for an user with a perticular jabber id and returns it. """ |
69 filter_func = lambda user: user.valid and user.jid.lower() == jabber_id.lower() | 73 filter_func = lambda user: user.valid and user.jid.lower() == jabber_id.lower() |
70 return get_by_filter(request, filter_func) | 74 return get_by_filter(request, filter_func) |
71 | 75 |
76 | |
72 def _getUserIdByKey(request, key, search): | 77 def _getUserIdByKey(request, key, search): |
73 """ Get the user ID for a specified key/value pair. | 78 """ Get the user ID for a specified key/value pair. |
74 | 79 |
75 This method must only be called for keys that are | 80 This method must only be called for keys that are |
76 guaranteed to be unique. | 81 guaranteed to be unique. |
80 @return the corresponding user ID or None | 85 @return the corresponding user ID or None |
81 """ | 86 """ |
82 if not search or not key: | 87 if not search or not key: |
83 return None | 88 return None |
84 cfg = request.cfg | 89 cfg = request.cfg |
85 scope, arena, cachekey = 'userdir', 'users', '%s2id' % key | 90 cfg_cache_attr = key + "2id" |
86 try: | 91 try: |
87 _key2id = getattr(cfg.cache, cachekey) | 92 attr2id = getattr(cfg.cache, cfg_cache_attr) |
93 from_disk = False | |
88 except AttributeError: | 94 except AttributeError: |
89 cache = caching.CacheEntry(request, arena, cachekey, scope=scope, use_pickle=True) | 95 # no in-memory cache there - initialize it / load it from disk |
90 try: | 96 loadLookupCaches(request) |
91 _key2id = cache.content() | 97 attr2id = getattr(cfg.cache, cfg_cache_attr) |
92 except caching.CacheError: | 98 from_disk = True # we just loaded the stuff from disk |
93 _key2id = {} | 99 uid = attr2id.get(search, None) |
94 setattr(cfg.cache, cachekey, _key2id) | 100 if uid is None and not from_disk: |
95 uid = _key2id.get(search, None) | 101 # we do not have the entry we searched for. |
102 # we didn't find it in some in-memory cache, try refreshing these from disk | |
103 loadLookupCaches(request) | |
104 attr2id = getattr(cfg.cache, cfg_cache_attr) | |
105 from_disk = True # we just loaded the stuff from disk | |
106 uid = attr2id.get(search, None) | |
96 if uid is None: | 107 if uid is None: |
97 # complete cache rebuild on a cache miss! (expensive) | 108 # we do not have the entry we searched for. |
98 # note: we have this code block likely because we were not sure about | 109 # we don't have it in the on-disk cache, cache MISS. |
99 # cache consistency. if we can assure cache consistency, this | 110 # could be because: |
100 # block wouldn't be needed. | 111 # a) ok: we have no such search value in the profiles |
101 for userid in getUserList(request): | 112 # b) fault: the cache is incoherent with the profiles |
102 u = User(request, id=userid) | 113 # c) fault: reading the cache from disk failed, due to an error |
103 if hasattr(u, key): | 114 # d) ok: same as c), but just because no ondisk cache has been built yet |
104 value = getattr(u, key) | 115 rebuildLookupCaches(request) # XXX expensive |
116 attr2id = getattr(cfg.cache, cfg_cache_attr) | |
117 uid = attr2id.get(search, None) | |
118 return uid | |
119 | |
120 | |
121 def setMemoryLookupCaches(request, cache): | |
122 """set the in-memory cache from the given cache contents | |
123 | |
124 @param request: the request object | |
125 @param cache: either a dict of attrname -> attrcache to set the in-memory cache, | |
126 or None to delete the in-memory cache. | |
127 """ | |
128 for attrname in CACHED_USER_ATTRS: | |
129 cfg_cache_attr = attrname + "2id" | |
130 if cache is None: | |
131 try: | |
132 delattr(request.cfg.cache, cfg_cache_attr) | |
133 except: | |
134 pass | |
135 else: | |
136 setattr(request.cfg.cache, cfg_cache_attr, cache[attrname]) | |
137 | |
138 | |
139 def loadLookupCaches(request): | |
140 """load lookup cache contents into memory: cfg.cache.XXX2id""" | |
141 scope, arena, cachekey = 'userdir', 'users', 'lookup' | |
142 diskcache = caching.CacheEntry(request, arena, cachekey, scope=scope, use_pickle=True) | |
143 try: | |
144 cache = diskcache.content() | |
145 except caching.CacheError: | |
146 cache = {} | |
147 for attrname in CACHED_USER_ATTRS: | |
148 cache[attrname] = {} | |
149 setMemoryLookupCaches(request, cache) | |
150 | |
151 | |
152 def rebuildLookupCaches(request): | |
153 """complete attrs -> userid lookup cache rebuild""" | |
154 # as there may be thousands of users and reading all profiles is | |
155 # expensive, we just have 1 lookup cache for all interesting user attrs, | |
156 # so we only need to read all profiles ONCE to build the cache. | |
157 scope, arena, key = 'userdir', 'users', 'lookup' | |
158 diskcache = caching.CacheEntry(request, arena, key, scope=scope, use_pickle=True, do_locking=False) | |
159 diskcache.lock('w') | |
160 | |
161 cache = {} | |
162 for attrname in CACHED_USER_ATTRS: | |
163 cache[attrname] = {} | |
164 for userid in getUserList(request): | |
165 u = User(request, id=userid) | |
166 for attrname in CACHED_USER_ATTRS: | |
167 if hasattr(u, attrname): | |
168 attr2id = cache[attrname] | |
169 value = getattr(u, attrname) | |
105 if isinstance(value, list): | 170 if isinstance(value, list): |
106 for val in value: | 171 for val in value: |
107 _key2id[val] = userid | 172 attr2id[val] = userid |
108 else: | 173 else: |
109 _key2id[value] = userid | 174 attr2id[value] = userid |
110 cache = caching.CacheEntry(request, arena, cachekey, scope=scope, use_pickle=True) | 175 |
111 try: | 176 setMemoryLookupCaches(request, cache) |
112 cache.update(_key2id) | 177 diskcache.update(cache) |
113 except caching.CacheError: | 178 diskcache.unlock() |
114 pass | 179 return cache |
115 uid = _key2id.get(search, None) | 180 |
116 return uid | 181 |
117 | 182 def clearLookupCaches(request): |
118 | 183 """kill the userid lookup cache""" |
119 def clearUserIdLookupCaches(request): | |
120 """kill the userid lookup caches""" | |
121 # this triggers a rebuild of the cache. | 184 # this triggers a rebuild of the cache. |
122 # we maybe could rather update the caches, would be less expensive | 185 setMemoryLookupCaches(request, None) |
123 scope, arena = 'userdir', 'users' | 186 scope, arena, key = 'userdir', 'users', 'lookup' |
124 for key in ['name2id', 'openid2id', ]: | 187 caching.CacheEntry(request, arena, key, scope=scope).remove() |
125 caching.CacheEntry(request, arena, key, scope=scope).remove() | |
126 try: | |
127 delattr(request.cfg.cache, key) | |
128 except: | |
129 pass | |
130 | 188 |
131 | 189 |
132 def getUserId(request, searchName): | 190 def getUserId(request, searchName): |
133 """ Get the user ID for a specific user NAME. | 191 """ Get the user ID for a specific user NAME. |
134 | 192 |
709 line = u"%s=%s" % (key, unicode(value)) | 767 line = u"%s=%s" % (key, unicode(value)) |
710 line = line.replace('\n', ' ').replace('\r', ' ') # no lineseps | 768 line = line.replace('\n', ' ').replace('\r', ' ') # no lineseps |
711 data.write(line + '\n') | 769 data.write(line + '\n') |
712 data.close() | 770 data.close() |
713 | 771 |
714 clearUserIdLookupCaches(self._request) | 772 self.updateLookupCaches() |
715 | 773 |
716 if not self.disabled: | 774 if not self.disabled: |
717 self.valid = 1 | 775 self.valid = 1 |
718 | 776 |
719 if not self._stored: | 777 if not self._stored: |
906 | 964 |
907 if changed: | 965 if changed: |
908 self.save() | 966 self.save() |
909 return not self.isSubscribedTo([pagename]) | 967 return not self.isSubscribedTo([pagename]) |
910 | 968 |
911 # update page subscribers cache | |
912 def updatePageSubCache(self): | 969 def updatePageSubCache(self): |
913 """ When a user changes his preferences, we update the | 970 """ When a user profile is saved, we update the page subscriber's cache """ |
914 page subscriber's cache | |
915 """ | |
916 | 971 |
917 scope, arena, key = 'userdir', 'users', 'pagesubscriptions' | 972 scope, arena, key = 'userdir', 'users', 'pagesubscriptions' |
918 | 973 |
919 page_sub = {} | |
920 cache = caching.CacheEntry(self._request, arena=arena, key=key, scope=scope, use_pickle=True, do_locking=False) | 974 cache = caching.CacheEntry(self._request, arena=arena, key=key, scope=scope, use_pickle=True, do_locking=False) |
921 if not cache.exists(): | 975 if not cache.exists(): |
922 return # if no cache file exists, just don't do anything | 976 return # if no cache file exists, just don't do anything |
923 | 977 |
924 cache.lock('w') | 978 cache.lock('w') |
934 elif page_sub.get(self.id): | 988 elif page_sub.get(self.id): |
935 del page_sub[self.id] | 989 del page_sub[self.id] |
936 | 990 |
937 cache.update(page_sub) | 991 cache.update(page_sub) |
938 cache.unlock() | 992 cache.unlock() |
993 | |
994 def updateLookupCaches(self): | |
995 """ When a user profile is saved, we update the userid lookup caches """ | |
996 | |
997 scope, arena, key = 'userdir', 'users', 'lookup' | |
998 | |
999 diskcache = caching.CacheEntry(self._request, arena=arena, key=key, scope=scope, use_pickle=True, do_locking=False) | |
1000 if not diskcache.exists(): | |
1001 return # if no cache file exists, just don't do anything | |
1002 | |
1003 diskcache.lock('w') | |
1004 cache = diskcache.content() | |
1005 userid = self.id | |
1006 | |
1007 # first remove all old entries mapping to this userid: | |
1008 for attrname in CACHED_USER_ATTRS: | |
1009 attr2id = cache[attrname] | |
1010 for key, value in attr2id.items(): | |
1011 if value == userid: | |
1012 print "deleting old cached attr %s -> %s" % (key, value) | |
1013 del attr2id[key] | |
1014 | |
1015 # then update with the current attr values: | |
1016 for attrname in CACHED_USER_ATTRS: | |
1017 if hasattr(self, attrname): | |
1018 value = getattr(self, attrname) | |
1019 if value: | |
1020 # we do not store empty values, likely not unique | |
1021 print "setting new cached attr %s -> %r" % (attrname, value) | |
1022 attr2id = cache[attrname] | |
1023 if isinstance(value, list): | |
1024 for val in value: | |
1025 attr2id[val] = userid | |
1026 else: | |
1027 attr2id[value] = userid | |
1028 | |
1029 setMemoryLookupCaches(self._request, cache) | |
1030 diskcache.update(cache) | |
1031 diskcache.unlock() | |
939 | 1032 |
940 # ----------------------------------------------------------------- | 1033 # ----------------------------------------------------------------- |
941 # Quicklinks | 1034 # Quicklinks |
942 | 1035 |
943 def getQuickLinks(self): | 1036 def getQuickLinks(self): |