Lucene++ - a full-featured, c++ search engine
API Documentation
Go to the documentation of this file.
7 #ifndef STANDARDTOKENIZER_H
8 #define STANDARDTOKENIZER_H
70 static const int32_t
NUM;
71 static const int32_t
CJ;
void setMaxTokenLength(int32_t length)
Set the max allowed token length. Any token longer than this is skipped.
bool replaceInvalidAcronym
Definition: StandardTokenizer.h:54
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
TypeAttributePtr typeAtt
Definition: StandardTokenizer.h:61
void setReplaceInvalidAcronym(bool replaceInvalidAcronym)
PositionIncrementAttributePtr posIncrAtt
Definition: StandardTokenizer.h:60
Version
Definition: Constants.h:40
static const int32_t ALPHANUM
Definition: StandardTokenizer.h:64
static const int32_t CJ
Definition: StandardTokenizer.h:71
boost::shared_ptr< Reader > ReaderPtr
Definition: LuceneTypes.h:547
StandardTokenizerImplPtr scanner
A private instance of the scanner.
Definition: StandardTokenizer.h:48
StandardTokenizer(LuceneVersion::Version matchVersion, const AttributeSourcePtr &source, const ReaderPtr &input)
Creates a new StandardTokenizer with a given AttributeSource.
boost::shared_ptr< PositionIncrementAttribute > PositionIncrementAttributePtr
Definition: LuceneTypes.h:45
A grammar-based tokenizer.
Definition: StandardTokenizer.h:34
static const int32_t APOSTROPHE
Definition: StandardTokenizer.h:65
int32_t getMaxTokenLength()
Definition: AbstractAllTermDocs.h:12
boost::shared_ptr< OffsetAttribute > OffsetAttributePtr
Definition: LuceneTypes.h:40
boost::shared_ptr< StandardTokenizerImpl > StandardTokenizerImplPtr
Definition: LuceneTypes.h:53
boost::shared_ptr< AttributeSource > AttributeSourcePtr
Definition: LuceneTypes.h:520
static const int32_t NUM
Definition: StandardTokenizer.h:70
A Tokenizer is a TokenStream whose input is a Reader.
Definition: Tokenizer.h:20
virtual bool incrementToken()
static const Collection< String > TOKEN_TYPES()
String token types that correspond to token type int constants.
boost::shared_ptr< TermAttribute > TermAttributePtr
Definition: LuceneTypes.h:58
StandardTokenizer(LuceneVersion::Version matchVersion, const ReaderPtr &input)
Creates a new instance of the StandardTokenizer. Attaches the input to the newly created scanner.
virtual ~StandardTokenizer()
boost::shared_ptr< TypeAttribute > TypeAttributePtr
Definition: LuceneTypes.h:64
int32_t maxTokenLength
Definition: StandardTokenizer.h:55
static const int32_t COMPANY
Definition: StandardTokenizer.h:67
StandardTokenizer(LuceneVersion::Version matchVersion, const AttributeFactoryPtr &factory, const ReaderPtr &input)
Creates a new StandardTokenizer with a given AttributeSource.AttributeFactory.
virtual void end()
This method is called by the consumer after the last token has been consumed, after incrementToken() ...
static const int32_t ACRONYM
Definition: StandardTokenizer.h:66
void init(const ReaderPtr &input, LuceneVersion::Version matchVersion)
static const int32_t EMAIL
Definition: StandardTokenizer.h:68
OffsetAttributePtr offsetAtt
Definition: StandardTokenizer.h:59
TermAttributePtr termAtt
Definition: StandardTokenizer.h:58
static const int32_t HOST
Definition: StandardTokenizer.h:69
static const int32_t ACRONYM_DEP
Definition: StandardTokenizer.h:74
virtual void reset(const ReaderPtr &input)
Reset the tokenizer to a new reader. Typically, an analyzer (in its reusableTokenStream method) will ...
boost::shared_ptr< AttributeFactory > AttributeFactoryPtr
Definition: LuceneTypes.h:519
bool isReplaceInvalidAcronym()
clucene.sourceforge.net