comparison MoinMoin/support/lupy/index/segment.py @ 0:77665d8e2254

tag of nonpublic@localhost--archive/moin--enterprise--1.5--base-0 (automatically generated log message) imported from: moin--main--1.5--base-0
author Thomas Waldmann <tw-public@gmx.de>
date Thu, 22 Sep 2005 15:09:50 +0000
parents
children f0ecf4656a58
comparison
equal deleted inserted replaced
-1:000000000000 0:77665d8e2254
1 # -*- test-case-name: lupy.test -*-
2 # This module is part of the Lupy project and is Copyright 2003 Amir
3 # Bakhtiar (amir@divmod.org). This is free software; you can redistribute
4 # it and/or modify it under the terms of version 2.1 of the GNU Lesser
5 # General Public License as published by the Free Software Foundation.
6
7 from MoinMoin.support.lupy.index import term
8 import copy
9
10 class SegmentTermEnum:
11
12 def __init__(self, i, fis, isi):
13 self.input = i
14 self.fieldInfos = fis
15 self.size = self.input.readInt()
16 self.isIndex = isi
17
18 self.indexPointer = 0
19 self.position = -1
20 self.prev = None
21 self.prevTxt = ''
22 self.term = term.Term('','')
23 self.trmInfo = term.TermInfo()
24
25
26 def clone(self):
27 """Return a copy of self.
28 """
29
30 # TODO: implement as __copy__
31 clone = copy.copy(self)
32 clone.input = self.input.clone()
33
34 clone.trmInfo = term.TermInfo()
35 clone.trmInfo.setTo(self.trmInfo)
36 #clone.prevTxt = self.term.text()
37 return clone
38
39
40 def close(self):
41 self.input.close()
42
43
44 def docFreq(self):
45 return self.trmInfo.docFreq
46
47
48 def freqPointer(self):
49 return self.trmInfo.freqPointer
50
51
52 def next(self):
53 self.position += 1
54
55 if self.position > self.size -1:
56 self.position += 1
57 self.term = None
58 raise StopIteration
59
60 self.prev = self.term
61 self.term = self.readTerm()
62
63 self.trmInfo.docFreq = self.input.readVInt()
64 self.trmInfo.freqPointer += self.input.readVLong()
65 self.trmInfo.proxPointer += self.input.readVLong()
66
67 if self.isIndex:
68 self.indexPointer += self.input.readVLong()
69
70 return self.term, self.indexPointer
71
72 def __iter__(self):
73 return self
74
75 def proxPointer(self):
76 return self.trmInfo.proxPointer
77
78
79 def readTerm(self):
80 # this bit is a mite tricky. in the java version they use a
81 # buffer for reading and just use 'start' as the offset for
82 # putting the read string into the buffer; when strings with
83 # common prefixes were read in, the offset would preserve the
84 # prefix. So here we just remember the last string and slice
85 # the common prefix from it.
86 start = self.input.readVInt()
87 self.prevTxt = txt = self.prevTxt[:start] + self.input.readString()
88 fi = self.input.readVInt()
89 fld = self.fieldInfos.fieldName(fi)
90 t = term.Term(fld,txt,False)
91 return t
92
93
94 def seek(self, pointer, p, t, ti):
95 self.input.seek(pointer)
96 self.position = p
97 self.term = t
98 self.prev = None
99 self.trmInfo.setTo(ti)
100 self.prevTxt = self.term.text()
101
102 def termInfo(self, ti=None):
103 if ti is None:
104 nti = term.TermInfo()
105 nti.setTo(self.trmInfo)
106 return nti
107 else:
108 ti.setTo(self.trmInfo)
109
110 def __cmp__(a, b):
111 return cmp(a.term, b.term)
112
113
114 class SegmentInfo(object):
115
116 def __init__(self, name, docCount, d):
117 self.name = name
118 self.docCount = docCount
119 self.dir = d
120
121
122 class SegmentInfos(list):
123
124 def __init__(self, lst = None):
125 self.counter = 0
126 if lst is not None:
127 self.extend(lst)
128
129 def __getslice__(self, lo, hi):
130 res = SegmentInfos(list.__getslice__(self, lo, hi))
131 res.counter = self.counter
132 return res
133
134 def read(self, directory):
135 input = directory.openFile('segments')
136 try:
137 self.counter = input.readInt() # read counter
138 i = input.readInt()
139 while i > 0: # read segment infos
140 si = SegmentInfo(input.readString(),
141 input.readInt(),
142 directory)
143 self.append(si)
144 i -= 1
145 finally:
146 input.close()
147
148 def write(self, directory):
149 output = directory.createFile('segments.new')
150 try:
151 output.writeInt(self.counter)
152 output.writeInt(len(self))
153 for si in self:
154 output.writeString(si.name)
155 output.writeInt(si.docCount)
156 finally:
157 output.close()
158
159 # Install new segment info
160 directory.renameFile('segments.new','segments')
161
162 def __repr__(self):
163 return 'SegInfo' + list.__repr__(self)
164
165