001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3.text; 018 019import java.util.Arrays; 020 021import org.apache.commons.lang3.ArraySorter; 022import org.apache.commons.lang3.ArrayUtils; 023import org.apache.commons.lang3.StringUtils; 024 025/** 026 * A matcher class that can be queried to determine if a character array 027 * portion matches. 028 * <p> 029 * This class comes complete with various factory methods. 030 * If these do not suffice, you can subclass and implement your own matcher. 031 * </p> 032 * 033 * @since 2.2 034 * @deprecated As of 3.6, use Apache Commons Text 035 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/matcher/StringMatcherFactory.html"> 036 * StringMatcherFactory</a> instead 037 */ 038@Deprecated 039public abstract class StrMatcher { 040 041 /** 042 * Matches the comma character. 043 */ 044 private static final StrMatcher COMMA_MATCHER = new CharMatcher(','); 045 /** 046 * Matches the tab character. 047 */ 048 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t'); 049 /** 050 * Matches the space character. 051 */ 052 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' '); 053 /** 054 * Matches the same characters as StringTokenizer, 055 * namely space, tab, newline, formfeed. 056 */ 057 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray()); 058 /** 059 * Matches the String trim() whitespace characters. 060 */ 061 private static final StrMatcher TRIM_MATCHER = new TrimMatcher(); 062 /** 063 * Matches the double quote character. 064 */ 065 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\''); 066 /** 067 * Matches the double quote character. 068 */ 069 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"'); 070 /** 071 * Matches the single or double quote character. 072 */ 073 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray()); 074 /** 075 * Matches no characters. 076 */ 077 private static final StrMatcher NONE_MATCHER = new NoMatcher(); 078 079 /** 080 * Returns a matcher which matches the comma character. 081 * 082 * @return a matcher for a comma 083 */ 084 public static StrMatcher commaMatcher() { 085 return COMMA_MATCHER; 086 } 087 088 /** 089 * Returns a matcher which matches the tab character. 090 * 091 * @return a matcher for a tab 092 */ 093 public static StrMatcher tabMatcher() { 094 return TAB_MATCHER; 095 } 096 097 /** 098 * Returns a matcher which matches the space character. 099 * 100 * @return a matcher for a space 101 */ 102 public static StrMatcher spaceMatcher() { 103 return SPACE_MATCHER; 104 } 105 106 /** 107 * Matches the same characters as StringTokenizer, 108 * namely space, tab, newline and formfeed. 109 * 110 * @return the split matcher 111 */ 112 public static StrMatcher splitMatcher() { 113 return SPLIT_MATCHER; 114 } 115 116 /** 117 * Matches the String trim() whitespace characters. 118 * 119 * @return the trim matcher 120 */ 121 public static StrMatcher trimMatcher() { 122 return TRIM_MATCHER; 123 } 124 125 /** 126 * Returns a matcher which matches the single quote character. 127 * 128 * @return a matcher for a single quote 129 */ 130 public static StrMatcher singleQuoteMatcher() { 131 return SINGLE_QUOTE_MATCHER; 132 } 133 134 /** 135 * Returns a matcher which matches the double quote character. 136 * 137 * @return a matcher for a double quote 138 */ 139 public static StrMatcher doubleQuoteMatcher() { 140 return DOUBLE_QUOTE_MATCHER; 141 } 142 143 /** 144 * Returns a matcher which matches the single or double quote character. 145 * 146 * @return a matcher for a single or double quote 147 */ 148 public static StrMatcher quoteMatcher() { 149 return QUOTE_MATCHER; 150 } 151 152 /** 153 * Matches no characters. 154 * 155 * @return a matcher that matches nothing 156 */ 157 public static StrMatcher noneMatcher() { 158 return NONE_MATCHER; 159 } 160 161 /** 162 * Constructor that creates a matcher from a character. 163 * 164 * @param ch the character to match, must not be null 165 * @return a new Matcher for the given char 166 */ 167 public static StrMatcher charMatcher(final char ch) { 168 return new CharMatcher(ch); 169 } 170 171 /** 172 * Constructor that creates a matcher from a set of characters. 173 * 174 * @param chars the characters to match, null or empty matches nothing 175 * @return a new matcher for the given char[] 176 */ 177 public static StrMatcher charSetMatcher(final char... chars) { 178 if (ArrayUtils.isEmpty(chars)) { 179 return NONE_MATCHER; 180 } 181 if (chars.length == 1) { 182 return new CharMatcher(chars[0]); 183 } 184 return new CharSetMatcher(chars); 185 } 186 187 /** 188 * Constructor that creates a matcher from a string representing a set of characters. 189 * 190 * @param chars the characters to match, null or empty matches nothing 191 * @return a new Matcher for the given characters 192 */ 193 public static StrMatcher charSetMatcher(final String chars) { 194 if (StringUtils.isEmpty(chars)) { 195 return NONE_MATCHER; 196 } 197 if (chars.length() == 1) { 198 return new CharMatcher(chars.charAt(0)); 199 } 200 return new CharSetMatcher(chars.toCharArray()); 201 } 202 203 /** 204 * Constructor that creates a matcher from a string. 205 * 206 * @param str the string to match, null or empty matches nothing 207 * @return a new Matcher for the given String 208 */ 209 public static StrMatcher stringMatcher(final String str) { 210 if (StringUtils.isEmpty(str)) { 211 return NONE_MATCHER; 212 } 213 return new StringMatcher(str); 214 } 215 216 /** 217 * Constructor. 218 */ 219 protected StrMatcher() { 220 } 221 222 /** 223 * Returns the number of matching characters, zero for no match. 224 * <p> 225 * This method is called to check for a match. 226 * The parameter {@code pos} represents the current position to be 227 * checked in the string {@code buffer} (a character array which must 228 * not be changed). 229 * The API guarantees that {@code pos} is a valid index for {@code buffer}. 230 * </p> 231 * <p> 232 * The character array may be larger than the active area to be matched. 233 * Only values in the buffer between the specified indices may be accessed. 234 * </p> 235 * <p> 236 * The matching code may check one character or many. 237 * It may check characters preceding {@code pos} as well as those 238 * after, so long as no checks exceed the bounds specified. 239 * </p> 240 * <p> 241 * It must return zero for no match, or a positive number if a match was found. 242 * The number indicates the number of characters that matched. 243 * </p> 244 * 245 * @param buffer the text content to match against, do not change 246 * @param pos the starting position for the match, valid for buffer 247 * @param bufferStart the first active index in the buffer, valid for buffer 248 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer 249 * @return the number of matching characters, zero for no match 250 */ 251 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd); 252 253 /** 254 * Returns the number of matching characters, zero for no match. 255 * <p> 256 * This method is called to check for a match. 257 * The parameter {@code pos} represents the current position to be 258 * checked in the string {@code buffer} (a character array which must 259 * not be changed). 260 * The API guarantees that {@code pos} is a valid index for {@code buffer}. 261 * </p> 262 * <p> 263 * The matching code may check one character or many. 264 * It may check characters preceding {@code pos} as well as those after. 265 * </p> 266 * <p> 267 * It must return zero for no match, or a positive number if a match was found. 268 * The number indicates the number of characters that matched. 269 * </p> 270 * 271 * @param buffer the text content to match against, do not change 272 * @param pos the starting position for the match, valid for buffer 273 * @return the number of matching characters, zero for no match 274 * @since 2.4 275 */ 276 public int isMatch(final char[] buffer, final int pos) { 277 return isMatch(buffer, pos, 0, buffer.length); 278 } 279 280 /** 281 * Class used to define a set of characters for matching purposes. 282 */ 283 static final class CharSetMatcher extends StrMatcher { 284 /** The set of characters to match. */ 285 private final char[] chars; 286 287 /** 288 * Constructor that creates a matcher from a character array. 289 * 290 * @param chars the characters to match, must not be null 291 */ 292 CharSetMatcher(final char[] chars) { 293 this.chars = ArraySorter.sort(chars.clone()); 294 } 295 296 /** 297 * Returns whether or not the given character matches. 298 * 299 * @param buffer the text content to match against, do not change 300 * @param pos the starting position for the match, valid for buffer 301 * @param bufferStart the first active index in the buffer, valid for buffer 302 * @param bufferEnd the end index of the active buffer, valid for buffer 303 * @return the number of matching characters, zero for no match 304 */ 305 @Override 306 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 307 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0; 308 } 309 } 310 311 /** 312 * Class used to define a character for matching purposes. 313 */ 314 static final class CharMatcher extends StrMatcher { 315 /** The character to match. */ 316 private final char ch; 317 318 /** 319 * Constructor that creates a matcher that matches a single character. 320 * 321 * @param ch the character to match 322 */ 323 CharMatcher(final char ch) { 324 this.ch = ch; 325 } 326 327 /** 328 * Returns whether or not the given character matches. 329 * 330 * @param buffer the text content to match against, do not change 331 * @param pos the starting position for the match, valid for buffer 332 * @param bufferStart the first active index in the buffer, valid for buffer 333 * @param bufferEnd the end index of the active buffer, valid for buffer 334 * @return the number of matching characters, zero for no match 335 */ 336 @Override 337 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 338 return ch == buffer[pos] ? 1 : 0; 339 } 340 } 341 342 /** 343 * Class used to define a set of characters for matching purposes. 344 */ 345 static final class StringMatcher extends StrMatcher { 346 /** The string to match, as a character array. */ 347 private final char[] chars; 348 349 /** 350 * Constructor that creates a matcher from a String. 351 * 352 * @param str the string to match, must not be null 353 */ 354 StringMatcher(final String str) { 355 chars = str.toCharArray(); 356 } 357 358 /** 359 * Returns whether or not the given text matches the stored string. 360 * 361 * @param buffer the text content to match against, do not change 362 * @param pos the starting position for the match, valid for buffer 363 * @param bufferStart the first active index in the buffer, valid for buffer 364 * @param bufferEnd the end index of the active buffer, valid for buffer 365 * @return the number of matching characters, zero for no match 366 */ 367 @Override 368 public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) { 369 final int len = chars.length; 370 if (pos + len > bufferEnd) { 371 return 0; 372 } 373 for (int i = 0; i < chars.length; i++, pos++) { 374 if (chars[i] != buffer[pos]) { 375 return 0; 376 } 377 } 378 return len; 379 } 380 381 @Override 382 public String toString() { 383 return super.toString() + ' ' + Arrays.toString(chars); 384 } 385 386 } 387 388 /** 389 * Class used to match no characters. 390 */ 391 static final class NoMatcher extends StrMatcher { 392 393 /** 394 * Constructs a new instance of {@link NoMatcher}. 395 */ 396 NoMatcher() { 397 } 398 399 /** 400 * Always returns {@code false}. 401 * 402 * @param buffer the text content to match against, do not change 403 * @param pos the starting position for the match, valid for buffer 404 * @param bufferStart the first active index in the buffer, valid for buffer 405 * @param bufferEnd the end index of the active buffer, valid for buffer 406 * @return the number of matching characters, zero for no match 407 */ 408 @Override 409 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 410 return 0; 411 } 412 } 413 414 /** 415 * Class used to match whitespace as per trim(). 416 */ 417 static final class TrimMatcher extends StrMatcher { 418 419 /** 420 * Constructs a new instance of {@link TrimMatcher}. 421 */ 422 TrimMatcher() { 423 } 424 425 /** 426 * Returns whether or not the given character matches. 427 * 428 * @param buffer the text content to match against, do not change 429 * @param pos the starting position for the match, valid for buffer 430 * @param bufferStart the first active index in the buffer, valid for buffer 431 * @param bufferEnd the end index of the active buffer, valid for buffer 432 * @return the number of matching characters, zero for no match 433 */ 434 @Override 435 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 436 return buffer[pos] <= 32 ? 1 : 0; 437 } 438 } 439 440}