Lucene++ - a full-featured, c++ search engine
API Documentation


SegmentReader.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef SEGMENTREADER_H
8 #define SEGMENTREADER_H
9 
10 #include "IndexReader.h"
11 #include "CloseableThreadLocal.h"
12 
13 namespace Lucene {
14 
15 class LPPAPI SegmentReader : public IndexReader {
16 public:
18  virtual ~SegmentReader();
19 
21 
22 protected:
23  bool readOnly;
24 
25 INTERNAL:
26  BitVectorPtr deletedDocs;
33 
37 
40  MapStringNorm _norms;
41 
42 private:
43  SegmentInfoPtr si;
44  int32_t readBufferSize;
45  bool deletedDocsDirty;
46  bool normsDirty;
47  int32_t pendingDeleteCount;
48 
49  bool rollbackHasChanges;
50  bool rollbackDeletedDocsDirty;
51  bool rollbackNormsDirty;
52  int32_t rollbackPendingDeleteCount;
53 
54  // optionally used for the .nrm file shared by multiple norms
55  IndexInputPtr singleNormStream;
56  SegmentReaderRefPtr singleNormRef;
57 
58 public:
59  virtual void initialize();
60 
63 
64  static SegmentReaderPtr get(bool readOnly, const SegmentInfoPtr& si, int32_t termInfosIndexDivisor);
65  static SegmentReaderPtr get(bool readOnly, const DirectoryPtr& dir, const SegmentInfoPtr& si, int32_t readBufferSize, bool doOpenStores, int32_t termInfosIndexDivisor);
66 
67  void openDocStores();
68 
70  virtual LuceneObjectPtr clone(bool openReadOnly, const LuceneObjectPtr& other = LuceneObjectPtr());
71  SegmentReaderPtr reopenSegment(const SegmentInfoPtr& si, bool doClone, bool openReadOnly);
72 
73  static bool hasDeletions(const SegmentInfoPtr& si);
74 
76  virtual bool hasDeletions();
77 
78  static bool usesCompoundFile(const SegmentInfoPtr& si);
79  static bool hasSeparateNorms(const SegmentInfoPtr& si);
80 
82 
84  virtual TermEnumPtr terms();
85 
87  virtual TermEnumPtr terms(const TermPtr& t);
88 
90  virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector);
91 
93  virtual bool isDeleted(int32_t n);
94 
96  virtual TermDocsPtr termDocs(const TermPtr& term);
97 
99  virtual TermDocsPtr termDocs();
100 
103 
105  virtual int32_t docFreq(const TermPtr& t);
106 
108  virtual int32_t numDocs();
109 
111  virtual int32_t maxDoc();
112 
115 
117  virtual bool hasNorms(const String& field);
118 
120  virtual ByteArray norms(const String& field);
121 
123  virtual void norms(const String& field, ByteArray norms, int32_t offset);
124 
126 
129  void loadTermsIndex(int32_t termsIndexDivisor);
130 
131  bool normsClosed(); // for testing only
132  bool normsClosed(const String& field); // for testing only
133 
137  virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field);
138 
141  virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper);
142 
144  virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper);
145 
150 
152  String getSegmentName();
153 
156  void setSegmentInfo(const SegmentInfoPtr& info);
157 
158  void startCommit();
160 
163 
168 
170  virtual int64_t getUniqueTermCount();
171 
174 
175  virtual int32_t getTermInfosIndexDivisor();
176 
177 protected:
180 
184  virtual ByteArray cloneNormBytes(ByteArray bytes);
185 
190 
192  virtual void doCommit(MapStringString commitUserData);
193 
194  virtual void commitChanges(MapStringString commitUserData);
195 
197  virtual void doClose();
198 
201  virtual void doDelete(int32_t docNum);
202 
204  virtual void doUndeleteAll();
205 
207  ByteArray getNorms(const String& field);
208 
210  virtual void doSetNorm(int32_t doc, const String& field, uint8_t value);
211 
212  void openNorms(const DirectoryPtr& cfsDir, int32_t readBufferSize);
213 
214  friend class ReaderPool;
215  friend class IndexWriter;
216  friend class Norm;
217 };
218 
219 }
220 
221 #endif
Lucene::IndexReader::document
virtual DocumentPtr document(int32_t n)
Returns the stored fields of the n'th Document in this index.
Lucene::SegmentReader::fieldInfos
FieldInfosPtr fieldInfos()
Lucene::SegmentReaderRefPtr
boost::shared_ptr< SegmentReaderRef > SegmentReaderRefPtr
Definition: LuceneTypes.h:216
Lucene::SegmentReader::initialize
virtual void initialize()
Called directly after instantiation to create objects that depend on this object being fully construc...
Lucene::SegmentReader::getNorms
ByteArray getNorms(const String &field)
can return null if norms aren't stored
Lucene::SegmentReader::normsClosed
bool normsClosed()
Lucene::FieldsReaderLocalPtr
boost::shared_ptr< FieldsReaderLocal > FieldsReaderLocalPtr
Definition: LuceneTypes.h:132
Lucene::SegmentReader::cloneDeletedDocs
virtual BitVectorPtr cloneDeletedDocs(const BitVectorPtr &bv)
Clones the deleteDocs BitVector. May be overridden by subclasses.
LUCENE_CLASS
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
Lucene::SegmentReader::get
static SegmentReaderPtr get(bool readOnly, const DirectoryPtr &dir, const SegmentInfoPtr &si, int32_t readBufferSize, bool doOpenStores, int32_t termInfosIndexDivisor)
Lucene::SegmentReader::getTermVectorsReaderOrig
TermVectorsReaderPtr getTermVectorsReaderOrig()
Lucene::TermPtr
boost::shared_ptr< Term > TermPtr
Definition: LuceneTypes.h:233
Lucene::Collection
Utility template class to handle collections that can be safely copied and shared.
Definition: Collection.h:17
Lucene::SegmentReader::directory
virtual DirectoryPtr directory()
Returns the directory this index resides in.
Lucene::SegmentReader::hasNorms
virtual bool hasNorms(const String &field)
Returns true if there are norms stored for this field.
Lucene::SegmentInfoPtr
boost::shared_ptr< SegmentInfo > SegmentInfoPtr
Definition: LuceneTypes.h:208
Lucene::LuceneObjectPtr
boost::shared_ptr< LuceneObject > LuceneObjectPtr
Definition: LuceneTypes.h:539
Lucene::SegmentReader::norms
virtual ByteArray norms(const String &field)
Returns the byte-encoded normalization factor for the named field of every document.
Lucene::SegmentReader::usesCompoundFile
static bool usesCompoundFile(const SegmentInfoPtr &si)
Lucene::CloseableThreadLocal
General purpose thread-local map.
Definition: CloseableThreadLocal.h:16
Lucene::SegmentReader::SegmentReader
SegmentReader()
Lucene::SegmentReader::termPositions
virtual TermPositionsPtr termPositions()
Returns an unpositioned TermPositions enumerator.
Lucene::SegmentReader::rollbackCommit
void rollbackCommit()
Lucene::SegmentReader::getFieldCacheKey
virtual LuceneObjectPtr getFieldCacheKey()
This is necessary so that cloned SegmentReaders (which share the underlying postings data) will map t...
Lucene::TermVectorMapperPtr
boost::shared_ptr< TermVectorMapper > TermVectorMapperPtr
Definition: LuceneTypes.h:254
Lucene::SegmentReader::getTermFreqVector
virtual void getTermFreqVector(int32_t docNumber, const String &field, const TermVectorMapperPtr &mapper)
Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of ...
Lucene::SegmentReader::getOnlySegmentReader
static SegmentReaderPtr getOnlySegmentReader(const IndexReaderPtr &reader)
Lucene::FieldInfosPtr
boost::shared_ptr< FieldInfos > FieldInfosPtr
Definition: LuceneTypes.h:127
Lucene::SegmentReader::getTermVectorsReader
TermVectorsReaderPtr getTermVectorsReader()
Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
Lucene::SegmentReader::commitChanges
virtual void commitChanges(MapStringString commitUserData)
Lucene::SegmentReader::doDelete
virtual void doDelete(int32_t docNum)
Implements deletion of the document numbered docNum. Applications should call deleteDocument(int) or ...
Lucene::SegmentReaderPtr
boost::shared_ptr< SegmentReader > SegmentReaderPtr
Definition: LuceneTypes.h:215
Lucene::BitVectorPtr
boost::shared_ptr< BitVector > BitVectorPtr
Definition: LuceneTypes.h:523
Lucene::SegmentReader::getTermInfosIndexDivisor
virtual int32_t getTermInfosIndexDivisor()
For IndexReader implementations that use TermInfosReader to read terms, this returns the current inde...
Lucene::SegmentReader::termsIndexLoaded
bool termsIndexLoaded()
Lucene::SegmentReader::reopenSegment
SegmentReaderPtr reopenSegment(const SegmentInfoPtr &si, bool doClone, bool openReadOnly)
Lucene::SegmentReader::openDocStores
void openDocStores()
Lucene::SegmentReader::getSegmentName
String getSegmentName()
Return the name of the segment this reader is reading.
Lucene::SegmentReader::setSegmentInfo
void setSegmentInfo(const SegmentInfoPtr &info)
Lucene::SegmentReader::termDocs
virtual TermDocsPtr termDocs()
Returns an unpositioned TermDocs enumerator.
Lucene::SegmentReader::checkDeletedCounts
bool checkDeletedCounts()
Lucene::DocumentPtr
boost::shared_ptr< Document > DocumentPtr
Definition: LuceneTypes.h:74
Lucene::SegmentReader::clone
virtual LuceneObjectPtr clone(bool openReadOnly, const LuceneObjectPtr &other=LuceneObjectPtr())
Clones the IndexReader and optionally changes readOnly. A readOnly reader cannot open a writable read...
Lucene::SegmentReader::doUndeleteAll
virtual void doUndeleteAll()
Implements actual undeleteAll() in subclass.
Lucene::IndexReader::termPositions
virtual TermPositionsPtr termPositions()=0
Returns an unpositioned TermPositions enumerator.
Lucene
Definition: AbstractAllTermDocs.h:12
Lucene::SegmentReader::deletedDocsRef
SegmentReaderRefPtr deletedDocsRef
Definition: SegmentReader.h:27
Lucene::SegmentReader::termVectorsLocal
CloseableThreadLocal< TermVectorsReader > termVectorsLocal
Definition: SegmentReader.h:31
Lucene::SegmentReader::terms
virtual TermEnumPtr terms(const TermPtr &t)
Returns an enumeration of all terms starting at a given term.
Lucene::TermEnumPtr
boost::shared_ptr< TermEnum > TermEnumPtr
Definition: LuceneTypes.h:235
Lucene::IndexReaderPtr
boost::shared_ptr< IndexReader > IndexReaderPtr
Definition: LuceneTypes.h:157
Lucene::SegmentReader::getTermFreqVector
virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr &mapper)
Map all the term vectors for all fields in a Document.
Lucene::TermDocsPtr
boost::shared_ptr< TermDocs > TermDocsPtr
Definition: LuceneTypes.h:236
Lucene::SegmentReader::terms
virtual TermEnumPtr terms()
Returns an enumeration of all the terms in the index.
Lucene::IndexInputPtr
boost::shared_ptr< IndexInput > IndexInputPtr
Definition: LuceneTypes.h:493
Lucene::SegmentReader::getFieldNames
virtual HashSet< String > getFieldNames(FieldOption fieldOption)
Get a list of unique field names that exist in this index and have the specified field option informa...
Lucene::SegmentReader::cloneNormBytes
virtual ByteArray cloneNormBytes(ByteArray bytes)
Clones the norm bytes. May be overridden by subclasses.
Lucene::SegmentReader
Definition: SegmentReader.h:15
Lucene::HashSet< String >
Lucene::SegmentReader::rollbackSegmentInfo
SegmentInfoPtr rollbackSegmentInfo
Definition: SegmentReader.h:30
Lucene::SegmentReader::openNorms
void openNorms(const DirectoryPtr &cfsDir, int32_t readBufferSize)
Lucene::IndexReader
IndexReader is an abstract class, providing an interface for accessing an index. Search of an index i...
Definition: IndexReader.h:39
Lucene::TermFreqVectorPtr
boost::shared_ptr< TermFreqVector > TermFreqVectorPtr
Definition: LuceneTypes.h:237
IndexReader.h
Lucene::SegmentReader::docFreq
virtual int32_t docFreq(const TermPtr &t)
Returns the number of documents containing the term t.
Lucene::IndexReader::FieldOption
FieldOption
Constants describing field properties, for example used for IndexReader#getFieldNames(FieldOption).
Definition: IndexReader.h:48
Lucene::SegmentReader::fieldsReaderLocal
FieldsReaderLocalPtr fieldsReaderLocal
Definition: SegmentReader.h:29
Lucene::SegmentReader::getDeletesCacheKey
virtual LuceneObjectPtr getDeletesCacheKey()
This returns null if the reader has no deletions.
Lucene::SegmentReader::loadDeletedDocs
void loadDeletedDocs()
Lucene::SegmentReader::numDocs
virtual int32_t numDocs()
Returns the number of documents in this index.
Lucene::TermPositionsPtr
boost::shared_ptr< TermPositions > TermPositionsPtr
Definition: LuceneTypes.h:243
Lucene::CoreReadersPtr
boost::shared_ptr< CoreReaders > CoreReadersPtr
Definition: LuceneTypes.h:100
Lucene::SegmentReader::doClose
virtual void doClose()
Implements close.
Lucene::SegmentReader::getUniqueTermCount
virtual int64_t getUniqueTermCount()
Returns the number of unique terms (across all fields) in this reader.
Lucene::SegmentReader::core
CoreReadersPtr core
Definition: SegmentReader.h:28
Lucene::SegmentReader::~SegmentReader
virtual ~SegmentReader()
Lucene::SegmentReader::startCommit
void startCommit()
Lucene::SegmentReader::getFieldsReader
FieldsReaderPtr getFieldsReader()
Lucene::SegmentReader::doCommit
virtual void doCommit(MapStringString commitUserData)
Implements commit.
Lucene::SegmentReader::hasSeparateNorms
static bool hasSeparateNorms(const SegmentInfoPtr &si)
Lucene::SegmentReader::maxDoc
virtual int32_t maxDoc()
Returns one greater than the largest possible document number.
Lucene::DirectoryPtr
boost::shared_ptr< Directory > DirectoryPtr
Definition: LuceneTypes.h:489
Lucene::SegmentReader::hasDeletions
static bool hasDeletions(const SegmentInfoPtr &si)
Lucene::SegmentReader::getSegmentInfo
SegmentInfoPtr getSegmentInfo()
Return the SegmentInfo of the segment this reader is reading.
Lucene::SegmentReader::doSetNorm
virtual void doSetNorm(int32_t doc, const String &field, uint8_t value)
Implements setNorm in subclass.
Lucene::SegmentReader::getTermFreqVector
virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String &field)
Return a term frequency vector for the specified document and field. The vector returned contains ter...
Lucene::FieldSelectorPtr
boost::shared_ptr< FieldSelector > FieldSelectorPtr
Definition: LuceneTypes.h:77
Lucene::SegmentReader::normsClosed
bool normsClosed(const String &field)
Lucene::IndexWriter
An IndexWriter creates and maintains an index.
Definition: IndexWriter.h:90
Lucene::SegmentReader::files
HashSet< String > files()
Lucene::SegmentReader::_norms
MapStringNorm _norms
Definition: SegmentReader.h:40
Lucene::FieldsReaderPtr
boost::shared_ptr< FieldsReader > FieldsReaderPtr
Definition: LuceneTypes.h:131
Lucene::SegmentReader::termDocs
virtual TermDocsPtr termDocs(const TermPtr &term)
Returns an enumeration of all the documents which contain term.
Lucene::SegmentReader::norms
virtual void norms(const String &field, ByteArray norms, int32_t offset)
Read norms into a pre-allocated array.
Lucene::TermVectorsReaderPtr
boost::shared_ptr< TermVectorsReader > TermVectorsReaderPtr
Definition: LuceneTypes.h:256
Lucene::SegmentReader::isDeleted
virtual bool isDeleted(int32_t n)
Returns true if document n has been deleted.
Lucene::SegmentReader::readOnly
bool readOnly
Definition: SegmentReader.h:20
Lucene::SegmentReader::loadTermsIndex
void loadTermsIndex(int32_t termsIndexDivisor)
NOTE: only called from IndexWriter when a near real-time reader is opened, or applyDeletes is run,...
Lucene::SegmentReader::document
virtual DocumentPtr document(int32_t n, const FieldSelectorPtr &fieldSelector)
Get the Document at the n'th position.
Lucene::SegmentReader::hasDeletions
virtual bool hasDeletions()
Returns true if any documents have been deleted.
Lucene::SegmentReader::getTermFreqVectors
virtual Collection< TermFreqVectorPtr > getTermFreqVectors(int32_t docNumber)
Return an array of term frequency vectors for the specified document. The array contains a vector for...
Lucene::SegmentReader::getOnlySegmentReader
static SegmentReaderPtr getOnlySegmentReader(const DirectoryPtr &dir)
Lucene::SegmentReader::get
static SegmentReaderPtr get(bool readOnly, const SegmentInfoPtr &si, int32_t termInfosIndexDivisor)
CloseableThreadLocal.h
Lucene::SegmentReader::clone
virtual LuceneObjectPtr clone(const LuceneObjectPtr &other=LuceneObjectPtr())
Efficiently clones the IndexReader (sharing most internal state).

clucene.sourceforge.net