Package com.actelion.research.util
Class StringFunctions
- java.lang.Object
-
- com.actelion.research.util.StringFunctions
-
public class StringFunctions extends java.lang.Object
-
-
Field Summary
Fields Modifier and Type Field Description static java.lang.String
PAT_NOT_ALPHANUMERIC
static java.lang.String
PAT_WHITESPACE
static java.lang.String[]
REGEX_META_CHARACTERS
static java.lang.String
SEP
-
Constructor Summary
Constructors Constructor Description StringFunctions()
-
Method Summary
All Methods Static Methods Concrete Methods Modifier and Type Method Description static boolean
containsLowerCase(java.lang.String s)
static boolean
containsUpperCase(java.lang.String s)
static java.lang.String
convertToValidFileNameCharacters(java.lang.String str)
Not allowed are: \ / : * ? < > |static int
countIntegerInText(java.lang.String txt)
static int
countOccurence(java.lang.String str, char c)
static int
countWordInText(java.lang.String txt)
static java.lang.String
encodeHTML(java.lang.String txt)
static boolean
equal(byte[] b1, byte[] b2)
static java.lang.String
escapeDanglingMetaCharacters(java.lang.String pattern)
Escapes the meta characters in a regular expression pattern with \\.static java.lang.String
extract(java.lang.String str, java.lang.String regex)
static java.lang.String
extractInverse(java.lang.String str, java.lang.String regex)
static java.lang.String
format(java.lang.String str)
Keeps the minus.static java.lang.String
format(java.lang.String str, char replacement)
Keeps the minus.static java.lang.String
format2DefinedLengthLeading(java.lang.String s, int length)
static java.lang.String
format2DefinedLengthTrailing(java.lang.String s, int length)
static java.lang.String
formatToCharactersAndDigits(java.lang.String str)
static java.lang.String
formatToPrintableCharactersOnly(java.lang.String str)
This function was implemented because in AxoSOMSampleView was a new line in SMILES molConvert from ChemAxon that is not detected by replaceAll("\n", "");static java.util.List<java.lang.String>
getAllSubStrings(java.lang.String str, int minsize)
static java.lang.String
getAppendedSorted(java.lang.String s1, java.lang.String s2)
static java.util.Comparator<java.lang.String>
getComparatorLength()
static java.text.DecimalFormat
getDecimalFormat(int precision)
static int[][]
getMatrixFromString(java.lang.String str, java.lang.String seperator)
static java.lang.String
getMaximumOverlap(java.util.List<java.lang.String> li, int minsize)
Finds the maximum common String in all Strings.static java.lang.String
getRandom(int min, int max)
static java.util.List<java.lang.String>
getSplittedOverlappingText(java.util.List<java.lang.String> liWords, int lenSubText, int lenOverlap)
Generates a list with overlapstatic java.lang.String
getString(java.lang.String sLine, java.lang.String sStart, java.lang.String sEnd)
static java.lang.String
getString(java.lang.String sLine, java.lang.String sStart, java.lang.String sEnd, int iFromIndex)
static java.lang.String
getStringFromRegEx(java.lang.String str, java.lang.String regex)
static java.util.List<java.lang.String>
getTokenized(java.lang.String txt, java.lang.String separator)
Returns the tokenized and trimmed values.static java.util.List<java.lang.String>
getTokenizedBySeperatorRegex(java.lang.String txt, java.lang.String regex)
static java.util.List<java.lang.String>
getTokenizedQuoted(java.lang.String txt)
Get a list from quoted and comma or otherwise separated phrases.static java.util.List<java.lang.String>
getWordsFormatted(java.lang.String txt)
Removes all non characters and digits.static java.util.List<java.lang.String>
getWordsFormattedUniqueLowerCase(java.lang.String txt)
static java.lang.String
hex2String(java.lang.String hex)
static boolean
isAllLetter(java.lang.String s)
static boolean
isAllLowerCase(java.lang.String s)
static boolean
isAllUpperCase(java.lang.String s)
static boolean
isCapitalizedWord(java.lang.String s)
static boolean
isMissingParenthesis(java.lang.String name)
static boolean
isRegexInString(java.lang.String str, java.lang.String regex)
static boolean
isUpperAndLowerCase(java.lang.String s)
static void
main(java.lang.String[] args)
static java.util.List<java.awt.Point>
match(java.lang.String str, java.lang.String regex)
static java.awt.Point
matchFirst(java.lang.String str, java.lang.String regex)
static java.lang.String
max(java.lang.String s1, java.lang.String s2)
static java.lang.String
min(java.lang.String s1, java.lang.String s2)
static int
nextClosing(java.lang.String txt, int indexStart, char cOpen, char cClose)
finds the next corresponding closing bracket char to the first open charstatic int
nextClosingBracket(java.lang.String txt, int iIndexStart)
finds the next balanced closing bracket "]" to the first open bracket "[" in the string.static double[]
parse2Double(java.lang.String s, java.lang.String sepRegEx)
static java.lang.String
removeCharacter(java.lang.StringBuilder str, char c)
static java.lang.String
removeCharacter(java.lang.String str, char c)
static int
sizeOf(java.lang.String s)
https://stackoverflow.com/questions/4385623/bytes-of-a-string-in-java sizeof(string) = 8 + // object header used by the VM 8 + // 64-bit reference to char array (value) 8 + string.length() * 2 + // character array itself (object header + 16-bit chars) 4 + // offset integer 4 + // count integer 4 + // cached hash codestatic int
sizeOf(java.util.List<java.lang.String> l)
static java.lang.String
toSortedString(java.util.List<java.lang.String> li)
static java.lang.String
toString(boolean[] arr)
static java.lang.String
toString(byte[] arr)
static java.lang.String
toString(double[] arr)
static java.lang.String
toString(double[] arr, java.text.NumberFormat nf)
static java.lang.String
toString(float[] arr, java.text.NumberFormat nf)
static java.lang.String
toString(int[][] arr, java.lang.String seperator)
static java.lang.String
toString(int[] arr, java.lang.String seperator)
static java.lang.String
toString(java.lang.Exception ex)
static java.lang.String
toString(java.lang.String[] arr, java.lang.String seperator)
static java.lang.String
toString(java.util.Collection<java.lang.String> li, java.lang.String sep)
static java.lang.String
toString(java.util.List<java.lang.Double> li, java.text.NumberFormat nf)
static java.lang.String
toString(java.util.List<java.lang.String> li)
static java.lang.String
toStringBinary(int v)
static java.lang.String
toStringBinary(long v)
static java.lang.String
toStringFileNameCompatible(double d)
static java.lang.String
toStringHex(java.lang.String s)
static java.lang.String
toStringInt(java.util.List<java.lang.Integer> li)
static java.lang.String
toStringInt(java.util.List<java.lang.Integer> li, java.lang.String sep)
static java.lang.String
toStringInteger(java.util.List<java.lang.Integer> li, java.lang.String sep)
static java.lang.String
toStringLong(java.util.List<java.lang.Long> li, java.lang.String sep)
static java.lang.String
toStringTabNL(java.lang.String[][] arr)
Elements are separated by tabs and rows are separated by newline.
-
-
-
Field Detail
-
PAT_WHITESPACE
public static final java.lang.String PAT_WHITESPACE
- See Also:
- Constant Field Values
-
PAT_NOT_ALPHANUMERIC
public static final java.lang.String PAT_NOT_ALPHANUMERIC
- See Also:
- Constant Field Values
-
REGEX_META_CHARACTERS
public static final java.lang.String[] REGEX_META_CHARACTERS
-
SEP
public static final java.lang.String SEP
- See Also:
- Constant Field Values
-
-
Method Detail
-
parse2Double
public static double[] parse2Double(java.lang.String s, java.lang.String sepRegEx)
-
getAppendedSorted
public static java.lang.String getAppendedSorted(java.lang.String s1, java.lang.String s2)
-
min
public static java.lang.String min(java.lang.String s1, java.lang.String s2)
-
max
public static java.lang.String max(java.lang.String s1, java.lang.String s2)
-
countIntegerInText
public static int countIntegerInText(java.lang.String txt)
-
countWordInText
public static int countWordInText(java.lang.String txt)
-
equal
public static boolean equal(byte[] b1, byte[] b2)
-
encodeHTML
public static java.lang.String encodeHTML(java.lang.String txt)
-
getComparatorLength
public static java.util.Comparator<java.lang.String> getComparatorLength()
-
getDecimalFormat
public static java.text.DecimalFormat getDecimalFormat(int precision)
-
getAllSubStrings
public static java.util.List<java.lang.String> getAllSubStrings(java.lang.String str, int minsize)
-
getRandom
public static java.lang.String getRandom(int min, int max)
- Parameters:
min
- minimum lengthmax
- maximum length- Returns:
-
getMatrixFromString
public static int[][] getMatrixFromString(java.lang.String str, java.lang.String seperator)
- Parameters:
str
- has to be of this form [1,2,3][2,3,4]. The seperator has to be given.seperator
-- Returns:
-
getMaximumOverlap
public static java.lang.String getMaximumOverlap(java.util.List<java.lang.String> li, int minsize)
Finds the maximum common String in all Strings. Position independent.- Parameters:
li
-- Returns:
-
removeCharacter
public static java.lang.String removeCharacter(java.lang.String str, char c)
-
removeCharacter
public static java.lang.String removeCharacter(java.lang.StringBuilder str, char c)
-
countOccurence
public static int countOccurence(java.lang.String str, char c)
-
convertToValidFileNameCharacters
public static java.lang.String convertToValidFileNameCharacters(java.lang.String str)
Not allowed are: \ / : * ? < > |- Parameters:
str
- input string- Returns:
- string with -X- instead of the not allowed characters. 10.09.2003 MK
-
toStringFileNameCompatible
public static java.lang.String toStringFileNameCompatible(double d)
-
formatToPrintableCharactersOnly
public static java.lang.String formatToPrintableCharactersOnly(java.lang.String str)
This function was implemented because in AxoSOMSampleView was a new line in SMILES molConvert from ChemAxon that is not detected by replaceAll("\n", "");- Parameters:
str
- input String- Returns:
- a String only with printable ASCII characters. No extended ASCII characters.
-
formatToCharactersAndDigits
public static java.lang.String formatToCharactersAndDigits(java.lang.String str)
-
format2DefinedLengthTrailing
public static java.lang.String format2DefinedLengthTrailing(java.lang.String s, int length)
-
format2DefinedLengthLeading
public static java.lang.String format2DefinedLengthLeading(java.lang.String s, int length)
-
format
public static java.lang.String format(java.lang.String str)
Keeps the minus. Every other ASCII character, not a letter nor a digit is replaced with '_'.- Parameters:
str
-- Returns:
-
format
public static java.lang.String format(java.lang.String str, char replacement)
Keeps the minus. Every other ASCII character, not a letter nor a digit is replaced withreplacement
.- Parameters:
str
-- Returns:
-
getString
public static java.lang.String getString(java.lang.String sLine, java.lang.String sStart, java.lang.String sEnd)
- Parameters:
sLine
- input stringsStart
- start tagsEnd
- end tag- Returns:
- string between the two tags, if one the tags is not founds a string with the length 0 is returned.
-
getStringFromRegEx
public static java.lang.String getStringFromRegEx(java.lang.String str, java.lang.String regex)
- Parameters:
str
-regex
-- Returns:
- null if substring not found.
-
isRegexInString
public static boolean isRegexInString(java.lang.String str, java.lang.String regex)
-
extract
public static java.lang.String extract(java.lang.String str, java.lang.String regex)
- Parameters:
str
-regex
-- Returns:
- expression which was matched by regex.
-
extractInverse
public static java.lang.String extractInverse(java.lang.String str, java.lang.String regex)
- Parameters:
str
-regex
-- Returns:
- the combined not matching parts of the string.
-
getString
public static java.lang.String getString(java.lang.String sLine, java.lang.String sStart, java.lang.String sEnd, int iFromIndex)
- Parameters:
sLine
- input stringsStart
- start tagsEnd
- end tagiFromIndex
- start index- Returns:
- string between the two tags, if one the tags is not founds a string with the length 0 is returned.
-
getWordsFormatted
public static java.util.List<java.lang.String> getWordsFormatted(java.lang.String txt)
Removes all non characters and digits.- Parameters:
txt
-- Returns:
-
getWordsFormattedUniqueLowerCase
public static java.util.List<java.lang.String> getWordsFormattedUniqueLowerCase(java.lang.String txt)
- Parameters:
txt
-- Returns:
- formatted, unique and lower case
-
getTokenizedQuoted
public static java.util.List<java.lang.String> getTokenizedQuoted(java.lang.String txt)
Get a list from quoted and comma or otherwise separated phrases.- Parameters:
txt
-- Returns:
-
getTokenized
public static java.util.List<java.lang.String> getTokenized(java.lang.String txt, java.lang.String separator)
Returns the tokenized and trimmed values.- Parameters:
txt
-separator
-- Returns:
-
getTokenizedBySeperatorRegex
public static java.util.List<java.lang.String> getTokenizedBySeperatorRegex(java.lang.String txt, java.lang.String regex)
-
getSplittedOverlappingText
public static java.util.List<java.lang.String> getSplittedOverlappingText(java.util.List<java.lang.String> liWords, int lenSubText, int lenOverlap)
Generates a list with overlap- Parameters:
liWords
-lenSubText
- so many words are in each entry.lenOverlap
-- Returns:
-
sizeOf
public static int sizeOf(java.lang.String s)
https://stackoverflow.com/questions/4385623/bytes-of-a-string-in-java sizeof(string) = 8 + // object header used by the VM 8 + // 64-bit reference to char array (value) 8 + string.length() * 2 + // character array itself (object header + 16-bit chars) 4 + // offset integer 4 + // count integer 4 + // cached hash code- Parameters:
s
-- Returns:
-
sizeOf
public static int sizeOf(java.util.List<java.lang.String> l)
-
toString
public static java.lang.String toString(double[] arr, java.text.NumberFormat nf)
-
toString
public static java.lang.String toString(float[] arr, java.text.NumberFormat nf)
-
toString
public static java.lang.String toString(byte[] arr)
-
toString
public static java.lang.String toString(boolean[] arr)
-
toString
public static java.lang.String toString(java.util.List<java.lang.Double> li, java.text.NumberFormat nf)
-
toStringInteger
public static java.lang.String toStringInteger(java.util.List<java.lang.Integer> li, java.lang.String sep)
-
toString
public static java.lang.String toString(double[] arr)
-
toString
public static java.lang.String toString(int[][] arr, java.lang.String seperator)
-
toString
public static java.lang.String toString(java.lang.String[] arr, java.lang.String seperator)
-
toString
public static java.lang.String toString(int[] arr, java.lang.String seperator)
-
toStringTabNL
public static java.lang.String toStringTabNL(java.lang.String[][] arr)
Elements are separated by tabs and rows are separated by newline.- Parameters:
arr
-- Returns:
-
toString
public static java.lang.String toString(java.lang.Exception ex)
-
toString
public static java.lang.String toString(java.util.List<java.lang.String> li)
-
toString
public static java.lang.String toString(java.util.Collection<java.lang.String> li, java.lang.String sep)
-
toStringLong
public static java.lang.String toStringLong(java.util.List<java.lang.Long> li, java.lang.String sep)
-
toStringInt
public static java.lang.String toStringInt(java.util.List<java.lang.Integer> li, java.lang.String sep)
-
toStringInt
public static java.lang.String toStringInt(java.util.List<java.lang.Integer> li)
-
toSortedString
public static java.lang.String toSortedString(java.util.List<java.lang.String> li)
-
toStringBinary
public static java.lang.String toStringBinary(int v)
-
toStringBinary
public static java.lang.String toStringBinary(long v)
-
toStringHex
public static java.lang.String toStringHex(java.lang.String s)
-
hex2String
public static java.lang.String hex2String(java.lang.String hex)
-
nextClosingBracket
public static int nextClosingBracket(java.lang.String txt, int iIndexStart)
finds the next balanced closing bracket "]" to the first open bracket "[" in the string.- Parameters:
txt
- StringiIndexStart
- start index- Returns:
- index of the next corresponding bracket
-
escapeDanglingMetaCharacters
public static java.lang.String escapeDanglingMetaCharacters(java.lang.String pattern)
Escapes the meta characters in a regular expression pattern with \\.- Parameters:
pattern
-- Returns:
-
match
public static final java.util.List<java.awt.Point> match(java.lang.String str, java.lang.String regex)
- Parameters:
str
-regex
-- Returns:
- list with points, x start, y end of matching string (offset after the last character matched).
-
matchFirst
public static final java.awt.Point matchFirst(java.lang.String str, java.lang.String regex)
-
nextClosing
public static int nextClosing(java.lang.String txt, int indexStart, char cOpen, char cClose)
finds the next corresponding closing bracket char to the first open char- Parameters:
txt
-indexStart
-cOpen
-cClose
-- Returns:
-
isAllLetter
public static boolean isAllLetter(java.lang.String s)
-
isAllUpperCase
public static boolean isAllUpperCase(java.lang.String s)
-
isAllLowerCase
public static boolean isAllLowerCase(java.lang.String s)
-
containsUpperCase
public static boolean containsUpperCase(java.lang.String s)
-
containsLowerCase
public static boolean containsLowerCase(java.lang.String s)
-
isCapitalizedWord
public static boolean isCapitalizedWord(java.lang.String s)
- Parameters:
s
-- Returns:
- true only if the first letter is capitalized and all other words are lower case letters.
-
isUpperAndLowerCase
public static boolean isUpperAndLowerCase(java.lang.String s)
-
isMissingParenthesis
public static boolean isMissingParenthesis(java.lang.String name)
- Parameters:
name
-- Returns:
- false if for each opening parenthesis none closing one is present.
-
main
public static void main(java.lang.String[] args)
-
-