Lucene++ - a full-featured, c++ search engine
API Documentation


CheckIndex.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef CHECKINDEX_H
8 #define CHECKINDEX_H
9 
10 #include "SegmentTermDocs.h"
11 
12 namespace Lucene {
13 
20 class LPPAPI CheckIndex : public LuceneObject {
21 public:
23  CheckIndex(const DirectoryPtr& dir);
24  virtual ~CheckIndex();
25 
27 
28 protected:
29  InfoStreamPtr infoStream;
31 
32  static bool _assertsOn;
33 
34 public:
36  void setInfoStream(const InfoStreamPtr& out);
37 
43  IndexStatusPtr checkIndex();
44 
53  IndexStatusPtr checkIndex(Collection<String> onlySegments);
54 
63  void fixIndex(const IndexStatusPtr& result);
64 
65  static bool testAsserts();
66  static bool assertsOn();
67 
89  static int main(Collection<String> args);
90 
91 protected:
92  void msg(const String& msg);
93 
95  FieldNormStatusPtr testFieldNorms(Collection<String> fieldNames, const SegmentReaderPtr& reader);
96 
98  TermIndexStatusPtr testTermIndex(const SegmentInfoPtr& info, const SegmentReaderPtr& reader);
99 
101  StoredFieldStatusPtr testStoredFields(const SegmentInfoPtr& info, const SegmentReaderPtr& reader);
102 
104  TermVectorStatusPtr testTermVectors(const SegmentInfoPtr& info, const SegmentReaderPtr& reader);
105 };
106 
108 class LPPAPI IndexStatus : public LuceneObject {
109 public:
110  IndexStatus();
111  virtual ~IndexStatus();
112 
114 
115 public:
117  bool clean;
118 
121 
124 
127 
130 
132  int32_t numSegments;
133 
136 
140 
143 
146 
149 
153 
156 
158  int32_t numBadSegments;
159 
161  bool partial;
162 
164  MapStringString userData;
165 };
166 
168 class LPPAPI SegmentInfoStatus : public LuceneObject {
169 public:
171  virtual ~SegmentInfoStatus();
172 
174 
175 public:
177  String name;
178 
180  int32_t docCount;
181 
183  bool compound;
184 
186  int32_t numFiles;
187 
189  double sizeMB;
190 
193  int32_t docStoreOffset;
194 
197 
200 
203 
206 
208  int32_t numDeleted;
209 
212 
214  int32_t numFields;
215 
218  bool hasProx;
219 
221  MapStringString diagnostics;
222 
225 
228 
231 
234 };
235 
237 class LPPAPI FieldNormStatus : public LuceneObject {
238 public:
239  FieldNormStatus();
240  virtual ~FieldNormStatus();
241 
243 
244 public:
246  int64_t totFields;
247 
250 };
251 
253 class LPPAPI TermIndexStatus : public LuceneObject {
254 public:
255  TermIndexStatus();
256  virtual ~TermIndexStatus();
257 
259 
260 public:
262  int64_t termCount;
263 
265  int64_t totFreq;
266 
268  int64_t totPos;
269 
272 };
273 
275 class LPPAPI StoredFieldStatus : public LuceneObject {
276 public:
278  virtual ~StoredFieldStatus();
279 
281 
282 public:
284  int32_t docCount;
285 
287  int64_t totFields;
288 
291 };
292 
294 class LPPAPI TermVectorStatus : public LuceneObject {
295 public:
297  virtual ~TermVectorStatus();
298 
300 
301 public:
303  int32_t docCount;
304 
306  int64_t totVectors;
307 
310 };
311 
312 }
313 
314 #endif
Lucene::SegmentInfoStatus::openReaderPassed
bool openReaderPassed
True if we were able to open a SegmentReader on this segment.
Definition: CheckIndex.h:211
LUCENE_CLASS
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
Lucene::StoredFieldStatusPtr
boost::shared_ptr< StoredFieldStatus > StoredFieldStatusPtr
Definition: LuceneTypes.h:229
Lucene::IndexStatus::newSegments
SegmentInfosPtr newSegments
SegmentInfos instance containing only segments that had no problems (this is used with the CheckIndex...
Definition: CheckIndex.h:152
Lucene::SegmentInfoStatus::fieldNormStatus
FieldNormStatusPtr fieldNormStatus
Status for testing of field norms (null if field norms could not be tested).
Definition: CheckIndex.h:224
Lucene::IndexStatus::totLoseDocCount
int32_t totLoseDocCount
How many documents will be lost to bad segments.
Definition: CheckIndex.h:155
Lucene::Collection< String >
Lucene::IndexStatusPtr
boost::shared_ptr< IndexStatus > IndexStatusPtr
Definition: LuceneTypes.h:159
Lucene::FieldNormStatusPtr
boost::shared_ptr< FieldNormStatus > FieldNormStatusPtr
Definition: LuceneTypes.h:129
Lucene::SegmentInfoPtr
boost::shared_ptr< SegmentInfo > SegmentInfoPtr
Definition: LuceneTypes.h:208
Lucene::IndexStatus::segmentFormat
String segmentFormat
String description of the version of the index.
Definition: CheckIndex.h:135
Lucene::TermVectorStatus::totVectors
int64_t totVectors
Total number of term vectors tested.
Definition: CheckIndex.h:306
Lucene::SegmentInfoStatus::docStoreOffset
int32_t docStoreOffset
Doc store offset, if this segment shares the doc store files (stored fields and term vectors) with ot...
Definition: CheckIndex.h:193
Lucene::StoredFieldStatus
Status from testing stored fields.
Definition: CheckIndex.h:275
Lucene::SegmentInfoStatus::sizeMB
double sizeMB
Net size (MB) of the files referenced by this segment.
Definition: CheckIndex.h:189
Lucene::SegmentInfoStatus::docStoreSegment
String docStoreSegment
String of the shared doc store segment, or null if this segment does not share the doc store files.
Definition: CheckIndex.h:196
Lucene::StoredFieldStatus::totFields
int64_t totFields
Total number of stored fields tested.
Definition: CheckIndex.h:287
Lucene::CheckIndex::dir
DirectoryPtr dir
Definition: CheckIndex.h:30
Lucene::TermVectorStatus
Status from testing stored fields.
Definition: CheckIndex.h:294
Lucene::IndexStatus::numSegments
int32_t numSegments
Number of segments in the index.
Definition: CheckIndex.h:132
Lucene::SegmentInfoStatus::hasDeletions
bool hasDeletions
True if this segment has pending deletions.
Definition: CheckIndex.h:202
Lucene::SegmentInfoStatus::numDeleted
int32_t numDeleted
Number of deleted documents.
Definition: CheckIndex.h:208
Lucene::SegmentReaderPtr
boost::shared_ptr< SegmentReader > SegmentReaderPtr
Definition: LuceneTypes.h:215
Lucene::TermIndexStatus::error
LuceneException error
Exception thrown during term index test (null on success)
Definition: CheckIndex.h:271
Lucene::IndexStatus::segmentsChecked
Collection< String > segmentsChecked
Empty unless you passed specific segments list to check as optional 3rd argument.
Definition: CheckIndex.h:139
Lucene::SegmentInfoStatus::deletionsFileName
String deletionsFileName
Name of the current deletions file name.
Definition: CheckIndex.h:205
Lucene::FieldNormStatus
Status from testing field norms.
Definition: CheckIndex.h:237
Lucene::SegmentInfoStatus::hasProx
bool hasProx
True if at least one of the fields in this segment does not omitTermFreqAndPositions.
Definition: CheckIndex.h:218
Lucene::SegmentInfoStatus::numFields
int32_t numFields
Number of fields in this segment.
Definition: CheckIndex.h:214
Lucene::IndexStatus::missingSegments
bool missingSegments
True if we were unable to locate and load the segments_N file.
Definition: CheckIndex.h:120
Lucene
Definition: AbstractAllTermDocs.h:12
Lucene::IndexStatus::cantOpenSegments
bool cantOpenSegments
True if we were unable to open the segments_N file.
Definition: CheckIndex.h:123
Lucene::LuceneObject
Base class for all Lucene classes.
Definition: LuceneObject.h:31
Lucene::FieldNormStatus::error
LuceneException error
Exception thrown during term index test (null on success)
Definition: CheckIndex.h:249
Lucene::SegmentInfoStatus::docCount
int32_t docCount
Document count (does not take deletions into account).
Definition: CheckIndex.h:180
SegmentTermDocs.h
Lucene::IndexStatus
Returned from checkIndex() detailing the health and status of the index.
Definition: CheckIndex.h:108
Lucene::SegmentInfoStatus::diagnostics
MapStringString diagnostics
Map that includes certain debugging details that IndexWriter records into each segment it creates.
Definition: CheckIndex.h:221
Lucene::SegmentInfoStatus
Holds the status of each segment in the index. See segmentInfos.
Definition: CheckIndex.h:168
Lucene::IndexStatus::userData
MapStringString userData
Holds the userData of the last commit in the index.
Definition: CheckIndex.h:164
Lucene::TermIndexStatus::termCount
int64_t termCount
Total term count.
Definition: CheckIndex.h:258
Lucene::TermIndexStatus
Status from testing term index.
Definition: CheckIndex.h:253
Lucene::FieldNormStatus::totFields
int64_t totFields
Number of fields successfully tested.
Definition: CheckIndex.h:242
Lucene::SegmentInfoStatus::numFiles
int32_t numFiles
Number of files referenced by this segment.
Definition: CheckIndex.h:186
Lucene::SegmentInfoStatus::termIndexStatus
TermIndexStatusPtr termIndexStatus
Status for testing of indexed terms (null if indexed terms could not be tested).
Definition: CheckIndex.h:227
Lucene::IndexStatus::partial
bool partial
True if we checked only specific segments (checkIndex(List)) was called with non-null argument).
Definition: CheckIndex.h:161
Lucene::StoredFieldStatus::error
LuceneException error
Exception thrown during stored fields test (null on success)
Definition: CheckIndex.h:290
Lucene::CheckIndex
Basic tool and API to check the health of an index and write a new segments file that removes referen...
Definition: CheckIndex.h:20
Lucene::SegmentInfoStatus::docStoreCompoundFile
bool docStoreCompoundFile
True if the shared doc store files are compound file format.
Definition: CheckIndex.h:199
Lucene::LuceneException
Lucene exception container.
Definition: LuceneException.h:15
Lucene::CheckIndex::_assertsOn
static bool _assertsOn
Definition: CheckIndex.h:32
Lucene::TermVectorStatusPtr
boost::shared_ptr< TermVectorStatus > TermVectorStatusPtr
Definition: LuceneTypes.h:257
Lucene::IndexStatus::dir
DirectoryPtr dir
Directory index is in.
Definition: CheckIndex.h:148
Lucene::IndexStatus::segmentInfos
Collection< SegmentInfoStatusPtr > segmentInfos
List of SegmentInfoStatus instances, detailing status of each segment.
Definition: CheckIndex.h:145
Lucene::SegmentInfosPtr
boost::shared_ptr< SegmentInfos > SegmentInfosPtr
Definition: LuceneTypes.h:210
Lucene::IndexStatus::toolOutOfDate
bool toolOutOfDate
True if the index was created with a newer version of Lucene than the CheckIndex tool.
Definition: CheckIndex.h:142
Lucene::DirectoryPtr
boost::shared_ptr< Directory > DirectoryPtr
Definition: LuceneTypes.h:489
Lucene::SegmentInfoStatus::storedFieldStatus
StoredFieldStatusPtr storedFieldStatus
Status for testing of stored fields (null if stored fields could not be tested).
Definition: CheckIndex.h:230
Lucene::InfoStreamPtr
boost::shared_ptr< InfoStream > InfoStreamPtr
Definition: LuceneTypes.h:532
Lucene::SegmentInfoStatus::termVectorStatus
TermVectorStatusPtr termVectorStatus
Status for testing of term vectors (null if term vectors could not be tested).
Definition: CheckIndex.h:233
Lucene::IndexStatus::numBadSegments
int32_t numBadSegments
How many bad segments were found.
Definition: CheckIndex.h:158
Lucene::TermIndexStatus::totFreq
int64_t totFreq
Total frequency across all terms.
Definition: CheckIndex.h:265
Lucene::TermIndexStatus::totPos
int64_t totPos
Total number of positions.
Definition: CheckIndex.h:268
Lucene::TermVectorStatus::error
LuceneException error
Exception thrown during term vector test (null on success)
Definition: CheckIndex.h:309
Lucene::SegmentInfoStatus::compound
bool compound
True if segment is compound file format.
Definition: CheckIndex.h:183
Lucene::IndexStatus::segmentsFileName
String segmentsFileName
Name of latest segments_N file in the index.
Definition: CheckIndex.h:129
Lucene::TermIndexStatusPtr
boost::shared_ptr< TermIndexStatus > TermIndexStatusPtr
Definition: LuceneTypes.h:238
Lucene::IndexStatus::missingSegmentVersion
bool missingSegmentVersion
True if we were unable to read the version number from segments_N file.
Definition: CheckIndex.h:126

clucene.sourceforge.net