comparison MoinMoin/user.py @ 6028:1893da1d5213

userid lookup caches: use 1 on-disk cache file, update cache rather than rebuild Before this, we maintained one cache file per attribute (e.g. name2id, openid2id, ...) - the related code did multiple passes over all user profiles to rebuild these cache files. Now doing a one-pass rebuild, writing all attribute -> userid mappings into one on-disk cache file called "lookup". Additionally to "name" and "openids", support fast lookup for "email" and "jid" also. On profile save, we use to just kill the cache and let it rebuild. Now the cache is read, updated and written back (which is much less expensive for wikis with more than a few users). Did some refactoring also, reducing duplication, breaking down the code into smaller functions / methods.
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Wed, 12 Feb 2014 18:22:10 +0100
parents 8618232296b5
children e2da1c1183d8
comparison
equal deleted inserted replaced
6027:8618232296b5 6028:1893da1d5213
35 35
36 from MoinMoin import config, caching, wikiutil, i18n, events 36 from MoinMoin import config, caching, wikiutil, i18n, events
37 from werkzeug.security import safe_str_cmp as safe_str_equal 37 from werkzeug.security import safe_str_cmp as safe_str_equal
38 from MoinMoin.util import timefuncs, random_string 38 from MoinMoin.util import timefuncs, random_string
39 from MoinMoin.wikiutil import url_quote_plus 39 from MoinMoin.wikiutil import url_quote_plus
40
41 # for efficient lookup <attr> -> userid, we keep an index of this in the cache.
42 # the attribute names in here should be uniquely identifying a user.
43 CACHED_USER_ATTRS = ['name', 'email', 'jid', 'openids', ]
40 44
41 45
42 def getUserList(request): 46 def getUserList(request):
43 """ Get a list of all (numerical) user IDs. 47 """ Get a list of all (numerical) user IDs.
44 48
67 def get_by_jabber_id(request, jabber_id): 71 def get_by_jabber_id(request, jabber_id):
68 """ Searches for an user with a perticular jabber id and returns it. """ 72 """ Searches for an user with a perticular jabber id and returns it. """
69 filter_func = lambda user: user.valid and user.jid.lower() == jabber_id.lower() 73 filter_func = lambda user: user.valid and user.jid.lower() == jabber_id.lower()
70 return get_by_filter(request, filter_func) 74 return get_by_filter(request, filter_func)
71 75
76
72 def _getUserIdByKey(request, key, search): 77 def _getUserIdByKey(request, key, search):
73 """ Get the user ID for a specified key/value pair. 78 """ Get the user ID for a specified key/value pair.
74 79
75 This method must only be called for keys that are 80 This method must only be called for keys that are
76 guaranteed to be unique. 81 guaranteed to be unique.
80 @return the corresponding user ID or None 85 @return the corresponding user ID or None
81 """ 86 """
82 if not search or not key: 87 if not search or not key:
83 return None 88 return None
84 cfg = request.cfg 89 cfg = request.cfg
85 scope, arena, cachekey = 'userdir', 'users', '%s2id' % key 90 cfg_cache_attr = key + "2id"
86 try: 91 try:
87 _key2id = getattr(cfg.cache, cachekey) 92 attr2id = getattr(cfg.cache, cfg_cache_attr)
93 from_disk = False
88 except AttributeError: 94 except AttributeError:
89 cache = caching.CacheEntry(request, arena, cachekey, scope=scope, use_pickle=True) 95 # no in-memory cache there - initialize it / load it from disk
90 try: 96 loadLookupCaches(request)
91 _key2id = cache.content() 97 attr2id = getattr(cfg.cache, cfg_cache_attr)
92 except caching.CacheError: 98 from_disk = True # we just loaded the stuff from disk
93 _key2id = {} 99 uid = attr2id.get(search, None)
94 setattr(cfg.cache, cachekey, _key2id) 100 if uid is None and not from_disk:
95 uid = _key2id.get(search, None) 101 # we do not have the entry we searched for.
102 # we didn't find it in some in-memory cache, try refreshing these from disk
103 loadLookupCaches(request)
104 attr2id = getattr(cfg.cache, cfg_cache_attr)
105 from_disk = True # we just loaded the stuff from disk
106 uid = attr2id.get(search, None)
96 if uid is None: 107 if uid is None:
97 # complete cache rebuild on a cache miss! (expensive) 108 # we do not have the entry we searched for.
98 # note: we have this code block likely because we were not sure about 109 # we don't have it in the on-disk cache, cache MISS.
99 # cache consistency. if we can assure cache consistency, this 110 # could be because:
100 # block wouldn't be needed. 111 # a) ok: we have no such search value in the profiles
101 for userid in getUserList(request): 112 # b) fault: the cache is incoherent with the profiles
102 u = User(request, id=userid) 113 # c) fault: reading the cache from disk failed, due to an error
103 if hasattr(u, key): 114 # d) ok: same as c), but just because no ondisk cache has been built yet
104 value = getattr(u, key) 115 rebuildLookupCaches(request) # XXX expensive
116 attr2id = getattr(cfg.cache, cfg_cache_attr)
117 uid = attr2id.get(search, None)
118 return uid
119
120
121 def setMemoryLookupCaches(request, cache):
122 """set the in-memory cache from the given cache contents
123
124 @param request: the request object
125 @param cache: either a dict of attrname -> attrcache to set the in-memory cache,
126 or None to delete the in-memory cache.
127 """
128 for attrname in CACHED_USER_ATTRS:
129 cfg_cache_attr = attrname + "2id"
130 if cache is None:
131 try:
132 delattr(request.cfg.cache, cfg_cache_attr)
133 except:
134 pass
135 else:
136 setattr(request.cfg.cache, cfg_cache_attr, cache[attrname])
137
138
139 def loadLookupCaches(request):
140 """load lookup cache contents into memory: cfg.cache.XXX2id"""
141 scope, arena, cachekey = 'userdir', 'users', 'lookup'
142 diskcache = caching.CacheEntry(request, arena, cachekey, scope=scope, use_pickle=True)
143 try:
144 cache = diskcache.content()
145 except caching.CacheError:
146 cache = {}
147 for attrname in CACHED_USER_ATTRS:
148 cache[attrname] = {}
149 setMemoryLookupCaches(request, cache)
150
151
152 def rebuildLookupCaches(request):
153 """complete attrs -> userid lookup cache rebuild"""
154 # as there may be thousands of users and reading all profiles is
155 # expensive, we just have 1 lookup cache for all interesting user attrs,
156 # so we only need to read all profiles ONCE to build the cache.
157 scope, arena, key = 'userdir', 'users', 'lookup'
158 diskcache = caching.CacheEntry(request, arena, key, scope=scope, use_pickle=True, do_locking=False)
159 diskcache.lock('w')
160
161 cache = {}
162 for attrname in CACHED_USER_ATTRS:
163 cache[attrname] = {}
164 for userid in getUserList(request):
165 u = User(request, id=userid)
166 for attrname in CACHED_USER_ATTRS:
167 if hasattr(u, attrname):
168 attr2id = cache[attrname]
169 value = getattr(u, attrname)
105 if isinstance(value, list): 170 if isinstance(value, list):
106 for val in value: 171 for val in value:
107 _key2id[val] = userid 172 attr2id[val] = userid
108 else: 173 else:
109 _key2id[value] = userid 174 attr2id[value] = userid
110 cache = caching.CacheEntry(request, arena, cachekey, scope=scope, use_pickle=True) 175
111 try: 176 setMemoryLookupCaches(request, cache)
112 cache.update(_key2id) 177 diskcache.update(cache)
113 except caching.CacheError: 178 diskcache.unlock()
114 pass 179 return cache
115 uid = _key2id.get(search, None) 180
116 return uid 181
117 182 def clearLookupCaches(request):
118 183 """kill the userid lookup cache"""
119 def clearUserIdLookupCaches(request):
120 """kill the userid lookup caches"""
121 # this triggers a rebuild of the cache. 184 # this triggers a rebuild of the cache.
122 # we maybe could rather update the caches, would be less expensive 185 setMemoryLookupCaches(request, None)
123 scope, arena = 'userdir', 'users' 186 scope, arena, key = 'userdir', 'users', 'lookup'
124 for key in ['name2id', 'openid2id', ]: 187 caching.CacheEntry(request, arena, key, scope=scope).remove()
125 caching.CacheEntry(request, arena, key, scope=scope).remove()
126 try:
127 delattr(request.cfg.cache, key)
128 except:
129 pass
130 188
131 189
132 def getUserId(request, searchName): 190 def getUserId(request, searchName):
133 """ Get the user ID for a specific user NAME. 191 """ Get the user ID for a specific user NAME.
134 192
709 line = u"%s=%s" % (key, unicode(value)) 767 line = u"%s=%s" % (key, unicode(value))
710 line = line.replace('\n', ' ').replace('\r', ' ') # no lineseps 768 line = line.replace('\n', ' ').replace('\r', ' ') # no lineseps
711 data.write(line + '\n') 769 data.write(line + '\n')
712 data.close() 770 data.close()
713 771
714 clearUserIdLookupCaches(self._request) 772 self.updateLookupCaches()
715 773
716 if not self.disabled: 774 if not self.disabled:
717 self.valid = 1 775 self.valid = 1
718 776
719 if not self._stored: 777 if not self._stored:
906 964
907 if changed: 965 if changed:
908 self.save() 966 self.save()
909 return not self.isSubscribedTo([pagename]) 967 return not self.isSubscribedTo([pagename])
910 968
911 # update page subscribers cache
912 def updatePageSubCache(self): 969 def updatePageSubCache(self):
913 """ When a user changes his preferences, we update the 970 """ When a user profile is saved, we update the page subscriber's cache """
914 page subscriber's cache
915 """
916 971
917 scope, arena, key = 'userdir', 'users', 'pagesubscriptions' 972 scope, arena, key = 'userdir', 'users', 'pagesubscriptions'
918 973
919 page_sub = {}
920 cache = caching.CacheEntry(self._request, arena=arena, key=key, scope=scope, use_pickle=True, do_locking=False) 974 cache = caching.CacheEntry(self._request, arena=arena, key=key, scope=scope, use_pickle=True, do_locking=False)
921 if not cache.exists(): 975 if not cache.exists():
922 return # if no cache file exists, just don't do anything 976 return # if no cache file exists, just don't do anything
923 977
924 cache.lock('w') 978 cache.lock('w')
934 elif page_sub.get(self.id): 988 elif page_sub.get(self.id):
935 del page_sub[self.id] 989 del page_sub[self.id]
936 990
937 cache.update(page_sub) 991 cache.update(page_sub)
938 cache.unlock() 992 cache.unlock()
993
994 def updateLookupCaches(self):
995 """ When a user profile is saved, we update the userid lookup caches """
996
997 scope, arena, key = 'userdir', 'users', 'lookup'
998
999 diskcache = caching.CacheEntry(self._request, arena=arena, key=key, scope=scope, use_pickle=True, do_locking=False)
1000 if not diskcache.exists():
1001 return # if no cache file exists, just don't do anything
1002
1003 diskcache.lock('w')
1004 cache = diskcache.content()
1005 userid = self.id
1006
1007 # first remove all old entries mapping to this userid:
1008 for attrname in CACHED_USER_ATTRS:
1009 attr2id = cache[attrname]
1010 for key, value in attr2id.items():
1011 if value == userid:
1012 print "deleting old cached attr %s -> %s" % (key, value)
1013 del attr2id[key]
1014
1015 # then update with the current attr values:
1016 for attrname in CACHED_USER_ATTRS:
1017 if hasattr(self, attrname):
1018 value = getattr(self, attrname)
1019 if value:
1020 # we do not store empty values, likely not unique
1021 print "setting new cached attr %s -> %r" % (attrname, value)
1022 attr2id = cache[attrname]
1023 if isinstance(value, list):
1024 for val in value:
1025 attr2id[val] = userid
1026 else:
1027 attr2id[value] = userid
1028
1029 setMemoryLookupCaches(self._request, cache)
1030 diskcache.update(cache)
1031 diskcache.unlock()
939 1032
940 # ----------------------------------------------------------------- 1033 # -----------------------------------------------------------------
941 # Quicklinks 1034 # Quicklinks
942 1035
943 def getQuickLinks(self): 1036 def getQuickLinks(self):