Lucene++ - a full-featured, c++ search engine
API Documentation


DocumentsWriter.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef DOCUMENTSWRITER_H
8 #define DOCUMENTSWRITER_H
9 
10 #include "ByteBlockPool.h"
11 #include "RAMFile.h"
12 
13 namespace Lucene {
14 
54 class LPPAPI DocumentsWriter : public LuceneObject {
55 public:
56  DocumentsWriter(const DirectoryPtr& directory, const IndexWriterPtr& writer, const IndexingChainPtr& indexingChain);
57  virtual ~DocumentsWriter();
58 
60 
61 protected:
62  String docStoreSegment; // Current doc-store segment we are writing
63  int32_t docStoreOffset; // Current starting doc-store offset of current segment
64 
65  int32_t nextDocID; // Next docID to be added
66  int32_t numDocsInRAM; // # docs buffered in RAM
67 
69  static const int32_t MAX_THREAD_STATE;
71  MapThreadDocumentsWriterThreadState threadBindings;
72 
73  int32_t pauseThreads; // Non-zero when we need all threads to pause (eg to flush)
74  bool aborting; // True if an abort is pending
75 
77 
80 
83 
86 
88  int64_t ramBufferSize;
91 
93  int64_t freeTrigger;
94  int64_t freeLevel;
95 
97  int32_t maxBufferedDocs;
98 
101 
102  bool closed;
103 
107 
110 
111 public:
113  static const int32_t OBJECT_HEADER_BYTES;
114  static const int32_t POINTER_NUM_BYTE;
115  static const int32_t INT_NUM_BYTE;
116  static const int32_t CHAR_NUM_BYTE;
117 
123  static const int32_t BYTES_PER_DEL_TERM;
124 
127  static const int32_t BYTES_PER_DEL_DOCID;
128 
132  static const int32_t BYTES_PER_DEL_QUERY;
133 
135  static const int32_t BYTE_BLOCK_SHIFT;
136  static const int32_t BYTE_BLOCK_SIZE;
137  static const int32_t BYTE_BLOCK_MASK;
138  static const int32_t BYTE_BLOCK_NOT_MASK;
139 
141  static const int32_t CHAR_BLOCK_SHIFT;
142  static const int32_t CHAR_BLOCK_SIZE;
143  static const int32_t CHAR_BLOCK_MASK;
144 
145  static const int32_t MAX_TERM_LENGTH;
146 
148  static const int32_t INT_BLOCK_SHIFT;
149  static const int32_t INT_BLOCK_SIZE;
150  static const int32_t INT_BLOCK_MASK;
151 
152  static const int32_t PER_DOC_BLOCK_SIZE;
153 
154 INTERNAL:
155  IndexWriterWeakPtr _writer;
158  String segment; // Current segment we are working on
159 
160  int32_t numDocsInStore; // # docs written to doc stores
161 
162  bool flushPending; // True when a thread has decided to flush
163  bool bufferIsFull; // True when it's time to write segment
164 
166  int32_t maxFieldLength;
168 
170 
173 
176 
179 
180  int64_t numBytesAlloc;
181  int64_t numBytesUsed;
182 
183  // used only by assert
185 
186 public:
187  virtual void initialize();
188 
191 
193 
194  void updateFlushedDocCount(int32_t n);
196  void setFlushedDocCount(int32_t n);
197 
199  bool hasProx();
200 
202  void setInfoStream(const InfoStreamPtr& infoStream);
203 
204  void setMaxFieldLength(int32_t maxFieldLength);
205  void setSimilarity(const SimilarityPtr& similarity);
206 
208  void setRAMBufferSizeMB(double mb);
210 
212  void setMaxBufferedDocs(int32_t count);
214 
216  String getSegment();
217 
219  int32_t getNumDocsInRAM();
220 
223 
225  int32_t getDocStoreOffset();
226 
229  String closeDocStore();
230 
232 
233  void message(const String& message);
234 
238 
239  void addOpenFile(const String& name);
240  void removeOpenFile(const String& name);
241 
242  void setAborting();
243 
246  void abort();
247 
251 
252  bool anyChanges();
253 
254  void initFlushState(bool onlyDocStore);
255 
257  int32_t flush(bool _closeDocStore);
258 
260 
262  void createCompoundFile(const String& segment);
263 
268 
269  void pushDeletes();
270 
271  void close();
272 
273  void initSegmentName(bool onlyDocStore);
274 
279 
281  bool addDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer);
282 
283  bool updateDocument(const TermPtr& t, const DocumentPtr& doc, const AnalyzerPtr& analyzer);
284  bool updateDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer, const TermPtr& delTerm);
285 
286  int32_t getNumBufferedDeleteTerms(); // for testing
287  MapTermNum getBufferedDeleteTerms(); // for testing
288 
290  void remapDeletes(const SegmentInfosPtr& infos, Collection< Collection<int32_t> > docMaps, Collection<int32_t> delCounts, const OneMergePtr& merge, int32_t mergeDocCount);
291 
293  bool bufferDeleteTerm(const TermPtr& term);
295  bool bufferDeleteQuery(const QueryPtr& query);
296  bool deletesFull();
298 
299  void setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms);
301 
302  bool hasDeletes();
303  bool applyDeletes(const SegmentInfosPtr& infos);
304  bool doBalanceRAM();
305 
307 
308  int64_t getRAMUsed();
309 
310  IntArray getIntBlock(bool trackAllocations);
311  void bytesAllocated(int64_t numBytes);
312  void bytesUsed(int64_t numBytes);
313  void recycleIntBlocks(Collection<IntArray> blocks, int32_t start, int32_t end);
314 
315  CharArray getCharBlock();
316  void recycleCharBlocks(Collection<CharArray> blocks, int32_t numBlocks);
317 
318  String toMB(int64_t v);
319 
328  void balanceRAM();
329 
330 protected:
332  void doAfterFlush();
333 
335 
337 
339 
340  // used only by assert
341  bool checkDeleteTerm(const TermPtr& term);
342 
343  bool applyDeletes(const IndexReaderPtr& reader, int32_t docIDStart);
344  void addDeleteTerm(const TermPtr& term, int32_t docCount);
345 
347  void addDeleteDocID(int32_t docID);
348  void addDeleteQuery(const QueryPtr& query, int32_t docID);
349 
351  void finishDocument(const DocumentsWriterThreadStatePtr& perThread, const DocWriterPtr& docWriter);
352 
353  friend class WaitQueue;
354 };
355 
356 class DocState : public LuceneObject {
357 public:
359  virtual ~DocState();
360 
362 
363 public:
366  int32_t maxFieldLength;
369  int32_t docID;
372 
373 public:
375  virtual bool testPoint(const String& name);
376 
377  void clear();
378 };
379 
381 class PerDocBuffer : public RAMFile {
382 public:
383  PerDocBuffer(const DocumentsWriterPtr& docWriter);
384  virtual ~PerDocBuffer();
385 
387 
388 protected:
390 
391 public:
393  void recycle();
394 
395 protected:
397  virtual ByteArray newBuffer(int32_t size);
398 };
399 
402 class DocWriter : public LuceneObject {
403 public:
405  virtual ~DocWriter();
406 
408 
409 public:
411  int32_t docID;
412 
413 public:
414  virtual void finish() = 0;
415  virtual void abort() = 0;
416  virtual int64_t sizeInBytes() = 0;
417 
418  virtual void setNext(const DocWriterPtr& next);
419 };
420 
423 class IndexingChain : public LuceneObject {
424 public:
425  virtual ~IndexingChain();
426 
428 
429 public:
430  virtual DocConsumerPtr getChain(const DocumentsWriterPtr& documentsWriter) = 0;
431 };
432 
448 public:
450 
452 
453 public:
454  virtual DocConsumerPtr getChain(const DocumentsWriterPtr& documentsWriter);
455 };
456 
457 class SkipDocWriter : public DocWriter {
458 public:
459  virtual ~SkipDocWriter();
460 
462 
463 public:
464  virtual void finish();
465  virtual void abort();
466  virtual int64_t sizeInBytes();
467 };
468 
469 class WaitQueue : public LuceneObject {
470 public:
471  WaitQueue(const DocumentsWriterPtr& docWriter);
472  virtual ~WaitQueue();
473 
475 
476 protected:
478 
479 public:
481  int32_t nextWriteDocID;
482  int32_t nextWriteLoc;
483  int32_t numWaiting;
484  int64_t waitingBytes;
485 
486 public:
487  void reset();
488  bool doResume();
489  bool doPause();
490  void abort();
491  bool add(const DocWriterPtr& doc);
492 
493 protected:
494  void writeDocument(const DocWriterPtr& doc);
495 };
496 
498 public:
499  ByteBlockAllocator(const DocumentsWriterPtr& docWriter, int32_t blockSize);
501 
503 
504 protected:
506 
507 public:
508  int32_t blockSize;
510 
511 public:
513  virtual ByteArray getByteBlock(bool trackAllocations);
514 
516  virtual void recycleByteBlocks(Collection<ByteArray> blocks, int32_t start, int32_t end);
518 };
519 
520 }
521 
522 #endif
Lucene::DocumentsWriter::lastDeleteTerm
TermPtr lastDeleteTerm
Definition: DocumentsWriter.h:184
Lucene::DocumentsWriter::BYTES_PER_DEL_DOCID
static const int32_t BYTES_PER_DEL_DOCID
Rough logic: del docIDs are List<Integer>. Say list allocates ~2X size (2*POINTER)....
Definition: DocumentsWriter.h:127
Lucene::ByteBlockAllocator::blockSize
int32_t blockSize
Definition: DocumentsWriter.h:508
Lucene::DefaultIndexingChain::getChain
virtual DocConsumerPtr getChain(const DocumentsWriterPtr &documentsWriter)
Lucene::DocumentsWriter::getBufferedDeleteTerms
MapTermNum getBufferedDeleteTerms()
Lucene::DocumentsWriter::finishDocument
void finishDocument(const DocumentsWriterThreadStatePtr &perThread, const DocWriterPtr &docWriter)
Does the synchronized work to finish/flush the inverted document.
Lucene::DocumentsWriter::MAX_TERM_LENGTH
static const int32_t MAX_TERM_LENGTH
Definition: DocumentsWriter.h:145
Lucene::DocumentsWriter::CHAR_NUM_BYTE
static const int32_t CHAR_NUM_BYTE
Definition: DocumentsWriter.h:116
Lucene::IndexingChain
The IndexingChain must define the getChain(DocumentsWriter) method which returns the DocConsumer that...
Definition: DocumentsWriter.h:423
Lucene::DocumentsWriter::bufferDeleteQueries
bool bufferDeleteQueries(Collection< QueryPtr > queries)
Lucene::DocumentsWriter::byteBlockAllocator
ByteBlockAllocatorPtr byteBlockAllocator
Definition: DocumentsWriter.h:177
Lucene::DocumentsWriter::getFlushedDocCount
int32_t getFlushedDocCount()
Lucene::DocumentsWriter::checkDeleteTerm
bool checkDeleteTerm(const TermPtr &term)
LUCENE_CLASS
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
Lucene::DocumentsWriter::addDeleteDocID
void addDeleteDocID(int32_t docID)
Buffer a specific docID for deletion. Currently only used when we hit a exception when adding a docum...
Lucene::DocumentsWriter::createCompoundFile
void createCompoundFile(const String &segment)
Build compound file for the segment we just flushed.
Lucene::DocumentsWriter::recycleCharBlocks
void recycleCharBlocks(Collection< CharArray > blocks, int32_t numBlocks)
Lucene::DocumentsWriter::newPerDocBuffer
PerDocBufferPtr newPerDocBuffer()
Create and return a new DocWriterBuffer.
Lucene::TermPtr
boost::shared_ptr< Term > TermPtr
Definition: LuceneTypes.h:233
Lucene::DocumentsWriter::freeTrigger
int64_t freeTrigger
If we've allocated 5% over our RAM budget, we then free down to 95%.
Definition: DocumentsWriter.h:93
Lucene::DocWriter::DocWriter
DocWriter()
Lucene::PerDocBuffer::~PerDocBuffer
virtual ~PerDocBuffer()
Lucene::DocumentsWriter::CHAR_BLOCK_MASK
static const int32_t CHAR_BLOCK_MASK
Definition: DocumentsWriter.h:143
Lucene::DocumentsWriter::updateDocument
bool updateDocument(const DocumentPtr &doc, const AnalyzerPtr &analyzer, const TermPtr &delTerm)
Lucene::DocumentsWriterWeakPtr
boost::weak_ptr< DocumentsWriter > DocumentsWriterWeakPtr
Definition: LuceneTypes.h:123
Lucene::DocumentsWriter::segment
String segment
Definition: DocumentsWriter.h:158
Lucene::PerDocBufferPtr
boost::shared_ptr< PerDocBuffer > PerDocBufferPtr
Definition: LuceneTypes.h:199
Lucene::Collection< DocumentsWriterThreadStatePtr >
Lucene::DocumentsWriter::bufferDeleteQuery
bool bufferDeleteQuery(const QueryPtr &query)
Lucene::OneMergePtr
boost::shared_ptr< OneMerge > OneMergePtr
Definition: LuceneTypes.h:192
Lucene::SkipDocWriter::sizeInBytes
virtual int64_t sizeInBytes()
Lucene::DocumentsWriter::setMaxBufferedDeleteTerms
void setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms)
Lucene::DocWriter
Consumer returns this on each doc. This holds any state that must be flushed synchronized "in docID o...
Definition: DocumentsWriter.h:402
Lucene::DocumentsWriter::waitForWaitQueue
void waitForWaitQueue()
Lucene::DocumentsWriter::pauseAllThreads
bool pauseAllThreads()
Returns true if an abort is in progress.
Lucene::DocumentsWriter::doAfterFlush
void doAfterFlush()
Reset after a flush.
Lucene::DocumentsWriter::maxBufferedDeleteTerms
int32_t maxBufferedDeleteTerms
The max number of delete terms that can be buffered before they must be flushed to disk.
Definition: DocumentsWriter.h:85
Lucene::DocumentsWriter::timeToFlushDeletes
bool timeToFlushDeletes()
Lucene::PerDocBuffer
RAMFile buffer for DocWriters.
Definition: DocumentsWriter.h:381
Lucene::DocWriter::abort
virtual void abort()=0
Lucene::DocState::clear
void clear()
Lucene::DocumentsWriter::doBalanceRAM
bool doBalanceRAM()
Lucene::DocumentsWriter::skipDocWriter
SkipDocWriterPtr skipDocWriter
Definition: DocumentsWriter.h:175
Lucene::DocumentsWriter::addDocument
bool addDocument(const DocumentPtr &doc, const AnalyzerPtr &analyzer)
Returns true if the caller (IndexWriter) should now flush.
Lucene::DocumentsWriterPtr
boost::shared_ptr< DocumentsWriter > DocumentsWriterPtr
Definition: LuceneTypes.h:123
Lucene::DocumentsWriter::updateFlushedDocCount
void updateFlushedDocCount(int32_t n)
Lucene::DocState::~DocState
virtual ~DocState()
Lucene::DocumentsWriter::BYTE_BLOCK_MASK
static const int32_t BYTE_BLOCK_MASK
Definition: DocumentsWriter.h:137
Lucene::DocumentsWriter::getCharBlock
CharArray getCharBlock()
Lucene::QueryPtr
boost::shared_ptr< Query > QueryPtr
Definition: LuceneTypes.h:420
Lucene::DocumentsWriter::abortedFiles
HashSet< String > abortedFiles()
Lucene::DocWriter::~DocWriter
virtual ~DocWriter()
Lucene::DocumentsWriter::getNumBufferedDeleteTerms
int32_t getNumBufferedDeleteTerms()
Lucene::SkipDocWriterPtr
boost::shared_ptr< SkipDocWriter > SkipDocWriterPtr
Definition: LuceneTypes.h:226
Lucene::DocumentsWriter::bufferIsFull
bool bufferIsFull
Definition: DocumentsWriter.h:163
Lucene::DocumentsWriter::allThreadsIdle
bool allThreadsIdle()
Lucene::DocWriterPtr
boost::shared_ptr< DocWriter > DocWriterPtr
Definition: LuceneTypes.h:125
Lucene::DocumentsWriter::balanceRAM
void balanceRAM()
We have four pools of RAM: Postings, byte blocks (holds freq/prox posting data), char blocks (holds c...
Lucene::DocumentsWriter::directory
DirectoryPtr directory
Definition: DocumentsWriter.h:156
Lucene::DocumentsWriter::PER_DOC_BLOCK_SIZE
static const int32_t PER_DOC_BLOCK_SIZE
Definition: DocumentsWriter.h:152
Lucene::DocumentsWriter::_abortedFiles
HashSet< String > _abortedFiles
List of files that were written before last abort()
Definition: DocumentsWriter.h:105
Lucene::DocumentsWriter::bufferDeleteTerm
bool bufferDeleteTerm(const TermPtr &term)
Lucene::WaitQueue::nextWriteLoc
int32_t nextWriteLoc
Definition: DocumentsWriter.h:482
Lucene::ByteBlockAllocatorPtr
boost::shared_ptr< ByteBlockAllocator > ByteBlockAllocatorPtr
Definition: LuceneTypes.h:88
Lucene::DocumentsWriter::CHAR_BLOCK_SHIFT
static const int32_t CHAR_BLOCK_SHIFT
Initial chunk size of the shared char[] blocks used to store term text.
Definition: DocumentsWriter.h:141
Lucene::DocumentsWriter::anyChanges
bool anyChanges()
Lucene::DocState::maxFieldLength
int32_t maxFieldLength
Definition: DocumentsWriter.h:366
Lucene::SegmentWriteStatePtr
boost::shared_ptr< SegmentWriteState > SegmentWriteStatePtr
Definition: LuceneTypes.h:222
Lucene::DocumentsWriter::closedFiles
HashSet< String > closedFiles()
Lucene::DocumentsWriter::pauseThreads
int32_t pauseThreads
Definition: DocumentsWriter.h:73
Lucene::ByteBlockAllocator
Definition: DocumentsWriter.h:497
Lucene::DocumentsWriter::initFlushState
void initFlushState(bool onlyDocStore)
Lucene::DocumentsWriter::setSimilarity
void setSimilarity(const SimilarityPtr &similarity)
Lucene::DocumentsWriter::numDocsInStore
int32_t numDocsInStore
Definition: DocumentsWriter.h:160
Lucene::DocumentsWriter::addDeleteQuery
void addDeleteQuery(const QueryPtr &query, int32_t docID)
Lucene::DocWriter::sizeInBytes
virtual int64_t sizeInBytes()=0
Lucene::DocumentsWriter::freeIntBlocks
Collection< IntArray > freeIntBlocks
Definition: DocumentsWriter.h:108
Lucene::DocumentsWriter::waitReady
void waitReady(const DocumentsWriterThreadStatePtr &state)
Lucene::ByteBlockPoolAllocatorBase
Definition: ByteBlockPool.h:54
Lucene::DocumentsWriter::getMaxBufferedDeleteTerms
int32_t getMaxBufferedDeleteTerms()
Lucene::ByteBlockAllocator::~ByteBlockAllocator
virtual ~ByteBlockAllocator()
Lucene::DocumentsWriter::_openFiles
HashSet< String > _openFiles
Definition: DocumentsWriter.h:171
Lucene::DocumentsWriter::ramBufferSize
int64_t ramBufferSize
How much RAM we can use before flushing. This is 0 if we are flushing by doc count instead.
Definition: DocumentsWriter.h:88
Lucene::DocumentsWriter::updateDocument
bool updateDocument(const TermPtr &t, const DocumentPtr &doc, const AnalyzerPtr &analyzer)
Lucene::ByteBlockAllocator::getByteBlock
virtual ByteArray getByteBlock(bool trackAllocations)
Allocate another byte[] from the shared pool.
Lucene::ByteBlockAllocator::recycleByteBlocks
virtual void recycleByteBlocks(Collection< ByteArray > blocks, int32_t start, int32_t end)
Return byte[]'s to the pool.
Lucene::DocumentsWriter::closeDocStore
String closeDocStore()
Closes the current open doc stores an returns the doc store segment name. This returns null if there ...
Lucene::WaitQueue::writeDocument
void writeDocument(const DocWriterPtr &doc)
Lucene::WaitQueue::add
bool add(const DocWriterPtr &doc)
Lucene::DocumentsWriter::initialize
virtual void initialize()
Called directly after instantiation to create objects that depend on this object being fully construc...
Lucene::DocState
Definition: DocumentsWriter.h:356
Lucene::DocumentsWriter::toMB
String toMB(int64_t v)
Lucene::DocumentsWriter::hasDeletes
bool hasDeletes()
Lucene::DocumentsWriter::INT_BLOCK_SIZE
static const int32_t INT_BLOCK_SIZE
Definition: DocumentsWriter.h:149
Lucene::DocumentsWriter::remapDeletes
void remapDeletes(const SegmentInfosPtr &infos, Collection< Collection< int32_t > > docMaps, Collection< int32_t > delCounts, const OneMergePtr &merge, int32_t mergeDocCount)
Called whenever a merge has completed and the merged segments had deletions.
Lucene::DocumentsWriter::getRAMBufferSizeMB
double getRAMBufferSizeMB()
Lucene::DocumentsWriter::freeLevel
int64_t freeLevel
Definition: DocumentsWriter.h:94
Lucene::DocumentsWriter::abort
void abort()
Called if we hit an exception at a bad time (when updating the index files) and must discard all curr...
Lucene::DocumentsWriter::waitQueueResumeBytes
int64_t waitQueueResumeBytes
Definition: DocumentsWriter.h:90
Lucene::DocumentsWriter::bufferDeleteTerms
bool bufferDeleteTerms(Collection< TermPtr > terms)
Lucene::DocumentsWriter::getIntBlock
IntArray getIntBlock(bool trackAllocations)
Lucene::WaitQueue::nextWriteDocID
int32_t nextWriteDocID
Definition: DocumentsWriter.h:481
Lucene::DocumentsWriter
This class accepts multiple added documents and directly writes a single segment file....
Definition: DocumentsWriter.h:54
Lucene::AnalyzerPtr
boost::shared_ptr< Analyzer > AnalyzerPtr
Definition: LuceneTypes.h:20
Lucene::SkipDocWriter::~SkipDocWriter
virtual ~SkipDocWriter()
Lucene::DocumentsWriter::deletesFull
bool deletesFull()
Lucene::DocumentsWriter::setFlushPending
bool setFlushPending()
Set flushPending if it is not already set and returns whether it was set. This is used by IndexWriter...
Lucene::DocumentsWriter::doApplyDeletes
bool doApplyDeletes()
Lucene::DocumentsWriter::addOpenFile
void addOpenFile(const String &name)
Lucene::DocumentsWriter::message
void message(const String &message)
Lucene::RAMFile
File used as buffer in RAMDirectory.
Definition: RAMFile.h:15
Lucene::DocumentsWriter::bytesUsed
void bytesUsed(int64_t numBytes)
Lucene::DocumentsWriter::numBytesAlloc
int64_t numBytesAlloc
Definition: DocumentsWriter.h:180
Lucene::WaitQueuePtr
boost::shared_ptr< WaitQueue > WaitQueuePtr
Definition: LuceneTypes.h:265
Lucene::WaitQueue::WaitQueue
WaitQueue(const DocumentsWriterPtr &docWriter)
Lucene::DocumentsWriter::BYTE_BLOCK_SHIFT
static const int32_t BYTE_BLOCK_SHIFT
Initial chunks size of the shared byte[] blocks used to store postings data.
Definition: DocumentsWriter.h:135
Lucene::DocumentsWriter::getDefaultIndexingChain
static IndexingChainPtr getDefaultIndexingChain()
Lucene::DocFieldProcessorPtr
boost::shared_ptr< DocFieldProcessor > DocFieldProcessorPtr
Definition: LuceneTypes.h:115
Lucene::IndexWriterWeakPtr
boost::weak_ptr< IndexWriter > IndexWriterWeakPtr
Definition: LuceneTypes.h:160
Lucene::ByteBlockAllocator::ByteBlockAllocator
ByteBlockAllocator(const DocumentsWriterPtr &docWriter, int32_t blockSize)
Lucene::WaitQueue
Definition: DocumentsWriter.h:469
Lucene::DocumentPtr
boost::shared_ptr< Document > DocumentPtr
Definition: LuceneTypes.h:74
Lucene::DocumentsWriter::setInfoStream
void setInfoStream(const InfoStreamPtr &infoStream)
If non-null, various details of indexing are printed here.
Lucene::DocumentsWriter::deletesInRAM
BufferedDeletesPtr deletesInRAM
Deletes done after the last flush; these are discarded on abort.
Definition: DocumentsWriter.h:79
Lucene
Definition: AbstractAllTermDocs.h:12
Lucene::DocWriter::docID
int32_t docID
Definition: DocumentsWriter.h:411
Lucene::SkipDocWriter::finish
virtual void finish()
Lucene::DocumentsWriter::numDocsInRAM
int32_t numDocsInRAM
Definition: DocumentsWriter.h:66
Lucene::DocumentsWriter::threadBindings
MapThreadDocumentsWriterThreadState threadBindings
Definition: DocumentsWriter.h:71
Lucene::LuceneObject
Base class for all Lucene classes.
Definition: LuceneObject.h:31
Lucene::WaitQueue::waiting
Collection< DocWriterPtr > waiting
Definition: DocumentsWriter.h:480
Lucene::PerDocBuffer::newBuffer
virtual ByteArray newBuffer(int32_t size)
Allocate bytes used from shared pool.
Lucene::DocumentsWriter::similarity
SimilarityPtr similarity
Definition: DocumentsWriter.h:167
Lucene::WaitQueue::reset
void reset()
Lucene::IndexReaderPtr
boost::shared_ptr< IndexReader > IndexReaderPtr
Definition: LuceneTypes.h:157
Lucene::ByteBlockAllocator::_docWriter
DocumentsWriterWeakPtr _docWriter
Definition: DocumentsWriter.h:502
Lucene::DocumentsWriter::flushState
SegmentWriteStatePtr flushState
Definition: DocumentsWriter.h:106
Lucene::DocumentsWriter::waitQueuePauseBytes
int64_t waitQueuePauseBytes
Definition: DocumentsWriter.h:89
Lucene::WaitQueue::doResume
bool doResume()
Lucene::DocumentsWriter::~DocumentsWriter
virtual ~DocumentsWriter()
Lucene::DocumentsWriter::threadStates
Collection< DocumentsWriterThreadStatePtr > threadStates
Definition: DocumentsWriter.h:70
Lucene::DocumentsWriter::_closedFiles
HashSet< String > _closedFiles
Definition: DocumentsWriter.h:172
Lucene::WaitQueue::_docWriter
DocumentsWriterWeakPtr _docWriter
Definition: DocumentsWriter.h:474
Lucene::DocumentsWriter::clearFlushPending
void clearFlushPending()
Lucene::DocumentsWriter::bytesAllocated
void bytesAllocated(int64_t numBytes)
Lucene::HashSet< String >
Lucene::DocumentsWriter::maxFieldLength
int32_t maxFieldLength
Definition: DocumentsWriter.h:166
Lucene::DocState::infoStream
InfoStreamPtr infoStream
Definition: DocumentsWriter.h:367
Lucene::DocumentsWriter::getSegment
String getSegment()
Get current segment name we are writing.
Lucene::WaitQueue::numWaiting
int32_t numWaiting
Definition: DocumentsWriter.h:483
Lucene::DocWriter::finish
virtual void finish()=0
Lucene::DocState::similarity
SimilarityPtr similarity
Definition: DocumentsWriter.h:368
Lucene::DocumentsWriter::infoStream
InfoStreamPtr infoStream
Definition: DocumentsWriter.h:165
Lucene::SkipDocWriter
Definition: DocumentsWriter.h:457
Lucene::WaitQueue::waitingBytes
int64_t waitingBytes
Definition: DocumentsWriter.h:484
Lucene::DocumentsWriter::consumer
DocConsumerPtr consumer
Definition: DocumentsWriter.h:169
Lucene::DocumentsWriter::getThreadState
DocumentsWriterThreadStatePtr getThreadState(const DocumentPtr &doc, const TermPtr &delTerm)
Returns a free (idle) ThreadState that may be used for indexing this one document....
Lucene::DocumentsWriter::applyDeletes
bool applyDeletes(const SegmentInfosPtr &infos)
Lucene::DocumentsWriter::getFlushedFiles
HashSet< String > getFlushedFiles()
Lucene::DocState::testPoint
virtual bool testPoint(const String &name)
Only called by asserts.
Lucene::DocumentsWriterThreadStatePtr
boost::shared_ptr< DocumentsWriterThreadState > DocumentsWriterThreadStatePtr
Definition: LuceneTypes.h:124
Lucene::DocumentsWriter::resumeAllThreads
void resumeAllThreads()
Lucene::DocumentsWriter::getMaxBufferedDocs
int32_t getMaxBufferedDocs()
Lucene::DocumentsWriter::CHAR_BLOCK_SIZE
static const int32_t CHAR_BLOCK_SIZE
Definition: DocumentsWriter.h:142
Lucene::DefaultIndexingChain
This is the current indexing chain: DocConsumer / DocConsumerPerThread --> code: DocFieldProcessor / ...
Definition: DocumentsWriter.h:447
Lucene::DefaultIndexingChain::~DefaultIndexingChain
virtual ~DefaultIndexingChain()
Lucene::DocumentsWriter::setMaxBufferedDocs
void setMaxBufferedDocs(int32_t count)
Set max buffered docs, which means we will flush by doc count instead of by RAM usage.
Lucene::DocumentsWriter::INT_NUM_BYTE
static const int32_t INT_NUM_BYTE
Definition: DocumentsWriter.h:115
Lucene::DocumentsWriter::BYTE_BLOCK_NOT_MASK
static const int32_t BYTE_BLOCK_NOT_MASK
Definition: DocumentsWriter.h:138
Lucene::WaitQueue::abort
void abort()
Lucene::DocState::doc
DocumentPtr doc
Definition: DocumentsWriter.h:370
Lucene::DocumentsWriter::close
void close()
Lucene::DocumentsWriter::getDocStoreOffset
int32_t getDocStoreOffset()
Returns the doc offset into the shared doc store for the current buffered docs.
Lucene::DocState::analyzer
AnalyzerPtr analyzer
Definition: DocumentsWriter.h:365
Lucene::DocumentsWriter::POINTER_NUM_BYTE
static const int32_t POINTER_NUM_BYTE
Definition: DocumentsWriter.h:114
Lucene::DocumentsWriter::flush
int32_t flush(bool _closeDocStore)
Flush all pending docs to a new segment.
Lucene::DocumentsWriter::setAborting
void setAborting()
Lucene::WaitQueue::doPause
bool doPause()
Lucene::DocumentsWriter::addDeleteTerm
void addDeleteTerm(const TermPtr &term, int32_t docCount)
Lucene::DocState::_docWriter
DocumentsWriterWeakPtr _docWriter
Definition: DocumentsWriter.h:361
Lucene::DocumentsWriter::DocumentsWriter
DocumentsWriter(const DirectoryPtr &directory, const IndexWriterPtr &writer, const IndexingChainPtr &indexingChain)
Lucene::DocumentsWriter::waitQueue
WaitQueuePtr waitQueue
Definition: DocumentsWriter.h:174
Lucene::DocumentsWriter::setFlushedDocCount
void setFlushedDocCount(int32_t n)
Lucene::DocumentsWriter::indexingChain
IndexingChainPtr indexingChain
Definition: DocumentsWriter.h:157
Lucene::WaitQueue::~WaitQueue
virtual ~WaitQueue()
Lucene::DocumentsWriter::pushDeletes
void pushDeletes()
Lucene::IndexingChain::getChain
virtual DocConsumerPtr getChain(const DocumentsWriterPtr &documentsWriter)=0
Lucene::DocumentsWriter::getDocStoreSegment
String getDocStoreSegment()
Returns the current doc store segment we are writing to.
Lucene::DocState::docID
int32_t docID
Definition: DocumentsWriter.h:369
Lucene::PerDocBuffer::recycle
void recycle()
Recycle the bytes used.
Lucene::DocumentsWriter::recycleIntBlocks
void recycleIntBlocks(Collection< IntArray > blocks, int32_t start, int32_t end)
Lucene::DocState::DocState
DocState()
RAMFile.h
Lucene::PerDocBuffer::PerDocBuffer
PerDocBuffer(const DocumentsWriterPtr &docWriter)
Lucene::DocWriter::next
DocWriterPtr next
Definition: DocumentsWriter.h:407
Lucene::DocumentsWriter::docFieldProcessor
DocFieldProcessorPtr docFieldProcessor
Definition: DocumentsWriter.h:76
Lucene::DocumentsWriter::closed
bool closed
Definition: DocumentsWriter.h:102
Lucene::DocumentsWriter::BYTES_PER_DEL_QUERY
static const int32_t BYTES_PER_DEL_QUERY
Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER)....
Definition: DocumentsWriter.h:132
Lucene::DocumentsWriter::OBJECT_HEADER_BYTES
static const int32_t OBJECT_HEADER_BYTES
Coarse estimates used to measure RAM usage of buffered deletes.
Definition: DocumentsWriter.h:113
Lucene::DocumentsWriter::getNumDocsInRAM
int32_t getNumDocsInRAM()
Returns how many docs are currently buffered in RAM.
Lucene::SegmentInfosPtr
boost::shared_ptr< SegmentInfos > SegmentInfosPtr
Definition: LuceneTypes.h:210
Lucene::SkipDocWriter::abort
virtual void abort()
Lucene::DirectoryPtr
boost::shared_ptr< Directory > DirectoryPtr
Definition: LuceneTypes.h:489
Lucene::DocumentsWriter::INT_BLOCK_MASK
static const int32_t INT_BLOCK_MASK
Definition: DocumentsWriter.h:150
Lucene::DocumentsWriter::maxBufferedDocs
int32_t maxBufferedDocs
Flush @ this number of docs. If ramBufferSize is non-zero we will flush by RAM usage instead.
Definition: DocumentsWriter.h:97
Lucene::PerDocBuffer::_docWriter
DocumentsWriterWeakPtr _docWriter
Definition: DocumentsWriter.h:386
Lucene::DocumentsWriter::BYTE_BLOCK_SIZE
static const int32_t BYTE_BLOCK_SIZE
Definition: DocumentsWriter.h:136
Lucene::DocConsumerPtr
boost::shared_ptr< DocConsumer > DocConsumerPtr
Definition: LuceneTypes.h:106
Lucene::DocumentsWriter::getRAMUsed
int64_t getRAMUsed()
Lucene::DocumentsWriter::nextDocID
int32_t nextDocID
Definition: DocumentsWriter.h:65
Lucene::InfoStreamPtr
boost::shared_ptr< InfoStream > InfoStreamPtr
Definition: LuceneTypes.h:532
Lucene::DocumentsWriter::docStoreSegment
String docStoreSegment
Definition: DocumentsWriter.h:59
Lucene::DocumentsWriter::setMaxFieldLength
void setMaxFieldLength(int32_t maxFieldLength)
Lucene::DocumentsWriter::hasProx
bool hasProx()
Returns true if any of the fields in the current buffered docs have omitTermFreqAndPositions==false.
Lucene::DocWriter::setNext
virtual void setNext(const DocWriterPtr &next)
Lucene::SimilarityPtr
boost::shared_ptr< Similarity > SimilarityPtr
Definition: LuceneTypes.h:435
Lucene::ByteBlockAllocator::recycleByteBlocks
virtual void recycleByteBlocks(Collection< ByteArray > blocks)
Lucene::DocumentsWriter::freeCharBlocks
Collection< CharArray > freeCharBlocks
Definition: DocumentsWriter.h:109
Lucene::BufferedDeletesPtr
boost::shared_ptr< BufferedDeletes > BufferedDeletesPtr
Definition: LuceneTypes.h:87
Lucene::DocumentsWriter::perDocAllocator
ByteBlockAllocatorPtr perDocAllocator
Definition: DocumentsWriter.h:178
Lucene::DocumentsWriter::aborting
bool aborting
Definition: DocumentsWriter.h:74
Lucene::DocumentsWriter::applyDeletes
bool applyDeletes(const IndexReaderPtr &reader, int32_t docIDStart)
Lucene::DocumentsWriter::numBytesUsed
int64_t numBytesUsed
Definition: DocumentsWriter.h:181
Lucene::ByteBlockAllocator::freeByteBlocks
Collection< ByteArray > freeByteBlocks
Definition: DocumentsWriter.h:509
Lucene::DocumentsWriter::removeOpenFile
void removeOpenFile(const String &name)
Lucene::IndexWriterPtr
boost::shared_ptr< IndexWriter > IndexWriterPtr
Definition: LuceneTypes.h:160
Lucene::DocumentsWriter::initSegmentName
void initSegmentName(bool onlyDocStore)
Lucene::DocumentsWriter::BYTES_PER_DEL_TERM
static const int32_t BYTES_PER_DEL_TERM
Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER)....
Definition: DocumentsWriter.h:123
Lucene::DocState::maxTermPrefix
String maxTermPrefix
Definition: DocumentsWriter.h:371
Lucene::DocumentsWriter::INT_BLOCK_SHIFT
static const int32_t INT_BLOCK_SHIFT
Initial chunks size of the shared int[] blocks used to store postings data.
Definition: DocumentsWriter.h:148
Lucene::DocumentsWriter::flushedDocCount
int32_t flushedDocCount
How many docs already flushed to index.
Definition: DocumentsWriter.h:100
Lucene::DocumentsWriter::MAX_THREAD_STATE
static const int32_t MAX_THREAD_STATE
Max # ThreadState instances; if there are more threads than this they share ThreadStates.
Definition: DocumentsWriter.h:69
Lucene::DocumentsWriter::setRAMBufferSizeMB
void setRAMBufferSizeMB(double mb)
Set how much RAM we can use before flushing.
Lucene::IndexingChainPtr
boost::shared_ptr< IndexingChain > IndexingChainPtr
Definition: LuceneTypes.h:156
Lucene::IndexingChain::~IndexingChain
virtual ~IndexingChain()
Lucene::DocumentsWriter::docStoreOffset
int32_t docStoreOffset
Definition: DocumentsWriter.h:63
Lucene::DocumentsWriter::flushPending
bool flushPending
Definition: DocumentsWriter.h:162
Lucene::DocumentsWriter::deletesFlushed
BufferedDeletesPtr deletesFlushed
Deletes done before the last flush; these are still kept on abort.
Definition: DocumentsWriter.h:82
ByteBlockPool.h
Lucene::DocumentsWriter::openFiles
HashSet< String > openFiles()
Returns Collection of files in use by this instance, including any flushed segments.

clucene.sourceforge.net