comparison MoinMoin/support/lupy/index/field.py @ 0:77665d8e2254

tag of nonpublic@localhost--archive/moin--enterprise--1.5--base-0 (automatically generated log message) imported from: moin--main--1.5--base-0
author Thomas Waldmann <tw-public@gmx.de>
date Thu, 22 Sep 2005 15:09:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:77665d8e2254
1 # This module is part of the Lupy project and is Copyright 2003 Amir
2 # Bakhtiar (amir@divmod.org). This is free software; you can redistribute
3 # it and/or modify it under the terms of version 2.1 of the GNU Lesser
4 # General Public License as published by the Free Software Foundation.
5
6 from MoinMoin.support.lupy import document
7
8 class FieldInfo(object):
9
10 def __init__(self, na, tk, nu):
11 self.name = na
12 self.isIndexed = tk
13 self.number = nu
14
15
16 class FieldInfos(object):
17
18 def __init__(self, d=None, name=None):
19 self.byNumber = []
20 self.byName = {}
21 if d is None and name is None:
22 self.addString('',False)
23 else:
24 input = d.openFile(name)
25 try:
26 self.read(input)
27 finally:
28 input.close()
29
30 def add(self, doc):
31 """Adds field info for a Document"""
32 for field in doc.fields():
33 self.addString(field.name(), field.isIndexed)
34
35 def addString(self, name, isIndxd):
36 fi = self.fieldInfo(name)
37 if fi is None:
38 self.addInternal(name, isIndxd)
39 elif fi.isIndexed is not isIndxd:
40 fi.isIndexed = True
41
42 def addFieldInfos(self, other):
43 """Merges in information from another FieldInfos"""
44 for i in range(len(other)):
45 fi = other.fieldInfoInt(i)
46 self.addString(fi.name, fi.isIndexed)
47
48 def addInternal(self, name, isIndexed):
49 fi = FieldInfo(name, isIndexed, len(self.byNumber))
50
51 self.byNumber.append(fi)
52 self.byName[name]=fi
53
54 def fieldNumber(self, fieldName):
55 fi = self.fieldInfo(fieldName)
56 if fi is not None:
57 return fi.number
58 else:
59 return -1
60
61 def fieldInfo(self, fieldName):
62 return self.byName.get(fieldName, None)
63
64 def fieldName(self, fieldNumber):
65 return self.byNumber[fieldNumber].name
66
67 def fieldInfoInt(self, fieldNumber):
68 return self.byNumber[fieldNumber]
69
70 def __len__(self):
71 return len(self.byNumber)
72
73 def writeDir(self, d, name):
74 output = d.createFile(name)
75 try:
76 self.write(output)
77 finally:
78 output.close()
79
80 def write(self, output):
81 output.writeVInt(len(self))
82
83 for i in range(len(self)):
84 fi = self.fieldInfoInt(i)
85 output.writeString(fi.name)
86 if fi.isIndexed:
87 output.writeByte(1)
88 else:
89 output.writeByte(0)
90
91 def read(self, input):
92 size = input.readVInt()
93 for i in range(size):
94 self.addInternal(input.readString(), (input.readByte() != 0))
95
96 def fieldNames(self):
97 # Experimental for auto-queries
98 return self.byName.keys()
99
100 class FieldsWriter(object):
101
102 def __init__(self, d, segment, fn):
103 self.fieldInfos = fn
104 self.fieldsStream = d.createFile(segment + '.fdt')
105 self.indexStream = d.createFile(segment + '.fdx')
106
107
108 def addDocument(self, doc):
109 self.indexStream.writeLong(self.fieldsStream.getFilePointer())
110 storedCount = 0
111 for field in doc.fields():
112 if field.isStored:
113 storedCount += 1
114
115 self.fieldsStream.writeVInt(storedCount)
116
117 for field in doc.fields():
118 if field.isStored:
119 self.fieldsStream.writeVInt(self.fieldInfos.fieldNumber(field.name()))
120
121 bits = 0
122 if field.isTokenized:
123 bits |= 1
124 self.fieldsStream.writeByte(bits)
125
126 self.fieldsStream.writeString(field.stringValue())
127
128
129 def close(self):
130 self.fieldsStream.close()
131 self.indexStream.close()
132
133
134 class FieldsReader(object):
135
136 def __init__(self, d, segment, fn):
137 self.fieldInfos = fn
138
139 self.fieldsStream = d.openFile(segment + '.fdt')
140 self.indexStream = d.openFile(segment + '.fdx')
141
142 self.sze = self.indexStream.length / 8
143
144
145 def close(self):
146 self.fieldsStream.close()
147 self.indexStream.close()
148
149
150 def size(self):
151 return self.sze
152
153
154 def doc(self, n):
155 self.indexStream.seek(n * 8L)
156 position = self.indexStream.readLong()
157 self.fieldsStream.seek(position)
158
159 doc = document.Document()
160 numFields = self.fieldsStream.readVInt()
161 for i in range(numFields):
162 fieldNumber = self.fieldsStream.readVInt()
163 fi = self.fieldInfos.fieldInfoInt(fieldNumber)
164
165 bits = self.fieldsStream.readByte()
166 tokenized = ((bits & 1) != 0)
167
168 doc.add(document.Field(fi.name, self.fieldsStream.readString(),
169 True, fi.isIndexed, tokenized))
170
171 return doc
172
173