Eclipse SUMO - Simulation of Urban MObility
StringUtils.cpp
Go to the documentation of this file.
1 /****************************************************************************/
2 // Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.org/sumo
3 // Copyright (C) 2001-2022 German Aerospace Center (DLR) and others.
4 // This program and the accompanying materials are made available under the
5 // terms of the Eclipse Public License 2.0 which is available at
6 // https://www.eclipse.org/legal/epl-2.0/
7 // This Source Code may also be made available under the following Secondary
8 // Licenses when the conditions for such availability set forth in the Eclipse
9 // Public License 2.0 are satisfied: GNU General Public License, version 2
10 // or later which is available at
11 // https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
12 // SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
13 /****************************************************************************/
21 // Some static methods for string processing
22 /****************************************************************************/
23 #include <config.h>
24 
25 #include <string>
26 #include <iostream>
27 #include <cstdio>
28 #include <cstring>
29 #include <regex>
30 #include <xercesc/util/TransService.hpp>
31 #include <xercesc/util/TranscodingException.hpp>
33 #include <utils/common/ToString.h>
34 #include "StringUtils.h"
35 
36 
37 // ===========================================================================
38 // static member definitions
39 // ===========================================================================
40 std::string StringUtils::emptyString;
41 XERCES_CPP_NAMESPACE::XMLLCPTranscoder* StringUtils::myLCPTranscoder = nullptr;
42 
43 
44 // ===========================================================================
45 // method definitions
46 // ===========================================================================
47 std::string
48 StringUtils::prune(const std::string& str) {
49  const std::string::size_type endpos = str.find_last_not_of(" \t\n\r");
50  if (std::string::npos != endpos) {
51  const int startpos = (int)str.find_first_not_of(" \t\n\r");
52  return str.substr(startpos, endpos - startpos + 1);
53  }
54  return "";
55 }
56 
57 
58 std::string
59 StringUtils::to_lower_case(std::string str) {
60  for (int i = 0; i < (int)str.length(); i++) {
61  if (str[i] >= 'A' && str[i] <= 'Z') {
62  str[i] = str[i] + 'a' - 'A';
63  }
64  }
65  return str;
66 }
67 
68 
69 std::string
70 StringUtils::latin1_to_utf8(std::string str) {
71  // inspired by http://stackoverflow.com/questions/4059775/convert-iso-8859-1-strings-to-utf-8-in-c-c
72  std::string result;
73  for (int i = 0; i < (int)str.length(); i++) {
74  const unsigned char c = str[i];
75  if (c < 128) {
76  result += c;
77  } else {
78  result += (char)(0xc2 + (c > 0xbf));
79  result += (char)((c & 0x3f) + 0x80);
80  }
81  }
82  return result;
83 }
84 
85 
86 std::string
87 StringUtils::convertUmlaute(std::string str) {
88  str = replace(str, "\xE4", "ae");
89  str = replace(str, "\xC4", "Ae");
90  str = replace(str, "\xF6", "oe");
91  str = replace(str, "\xD6", "Oe");
92  str = replace(str, "\xFC", "ue");
93  str = replace(str, "\xDC", "Ue");
94  str = replace(str, "\xDF", "ss");
95  str = replace(str, "\xC9", "E");
96  str = replace(str, "\xE9", "e");
97  str = replace(str, "\xC8", "E");
98  str = replace(str, "\xE8", "e");
99  return str;
100 }
101 
102 
103 
104 std::string
105 StringUtils::replace(std::string str, const char* what,
106  const char* by) {
107  const std::string what_tmp(what);
108  const std::string by_tmp(by);
109  std::string::size_type idx = str.find(what);
110  const int what_len = (int)what_tmp.length();
111  if (what_len > 0) {
112  const int by_len = (int)by_tmp.length();
113  while (idx != std::string::npos) {
114  str = str.replace(idx, what_len, by);
115  idx = str.find(what, idx + by_len);
116  }
117  }
118  return str;
119 }
120 
121 
122 std::string StringUtils::substituteEnvironment(std::string str) {
123  // Expression for an environment variables, e.g. ${NAME}
124  // Note: - R"(...)" is a raw string literal syntax to simplify a regex declaration
125  // - .+? looks for the shortest match (non-greedy)
126  // - (.+?) defines a "subgroup" which is already stripped of the $ and {, }
127  std::regex envVarExpr(R"(\$\{(.+?)\})");
128 
129  // Are there any variables in this string?
130  std::smatch match;
131  std::string strIter = str;
132 
133  // Loop over the entire value string and look for variable names
134  while (std::regex_search(strIter, match, envVarExpr)) {
135  std::string varName = match[1];
136 
137  // Find the variable in the environment and its value
138  std::string varValue;
139  if (std::getenv(varName.c_str()) != nullptr) {
140  varValue = std::getenv(varName.c_str());
141  }
142 
143  // Replace the variable placeholder with its value in the original string
144  str = std::regex_replace(str, std::regex("\\$\\{" + varName + "\\}"), varValue);
145 
146  // Continue the loop with the remainder of the string
147  strIter = match.suffix();
148  }
149 
150  return str;
151 }
152 
153 std::string
155  std::ostringstream oss;
156  if (time < 0) {
157  oss << "-";
158  time = -time;
159  }
160  char buffer[10];
161  sprintf(buffer, "%02i:", (time / 3600));
162  oss << buffer;
163  time = time % 3600;
164  sprintf(buffer, "%02i:", (time / 60));
165  oss << buffer;
166  time = time % 60;
167  sprintf(buffer, "%02i", time);
168  oss << buffer;
169  return oss.str();
170 }
171 
172 
173 bool
174 StringUtils::startsWith(const std::string& str, const std::string prefix) {
175  return str.compare(0, prefix.length(), prefix) == 0;
176 }
177 
178 
179 bool
180 StringUtils::endsWith(const std::string& str, const std::string suffix) {
181  if (str.length() >= suffix.length()) {
182  return str.compare(str.length() - suffix.length(), suffix.length(), suffix) == 0;
183  } else {
184  return false;
185  }
186 }
187 
188 
189 std::string
190 StringUtils::padFront(const std::string& str, int length, char padding) {
191  return std::string(MAX2(0, length - (int)str.size()), padding) + str;
192 }
193 
194 
195 std::string
196 StringUtils::escapeXML(const std::string& orig, const bool maskDoubleHyphen) {
197  std::string result = replace(orig, "&", "&amp;");
198  result = replace(result, ">", "&gt;");
199  result = replace(result, "<", "&lt;");
200  result = replace(result, "\"", "&quot;");
201  if (maskDoubleHyphen) {
202  result = replace(result, "--", "&#45;&#45;");
203  }
204  for (char invalid = '\1'; invalid < ' '; invalid++) {
205  result = replace(result, std::string(1, invalid).c_str(), "");
206  }
207  return replace(result, "'", "&apos;");
208 }
209 
210 
211 std::string
212 StringUtils::urlEncode(const std::string& toEncode, const std::string encodeWhich) {
213  std::ostringstream out;
214 
215  for (int i = 0; i < (int)toEncode.length(); ++i) {
216  const char t = toEncode.at(i);
217 
218  if ((encodeWhich != "" && encodeWhich.find(t) == std::string::npos) ||
219  (encodeWhich == "" &&
220  ((t >= 45 && t <= 57) || // hyphen, period, slash, 0-9
221  (t >= 65 && t <= 90) || // A-Z
222  t == 95 || // underscore
223  (t >= 97 && t <= 122) || // a-z
224  t == 126)) // tilde
225  ) {
226  out << toEncode.at(i);
227  } else {
228  out << charToHex(toEncode.at(i));
229  }
230  }
231 
232  return out.str();
233 }
234 
235 
236 std::string
237 StringUtils::urlDecode(const std::string& toDecode) {
238  std::ostringstream out;
239 
240  for (int i = 0; i < (int)toDecode.length(); ++i) {
241  if (toDecode.at(i) == '%') {
242  std::string str(toDecode.substr(i + 1, 2));
243  out << hexToChar(str);
244  i += 2;
245  } else {
246  out << toDecode.at(i);
247  }
248  }
249 
250  return out.str();
251 }
252 
253 std::string
254 StringUtils::charToHex(unsigned char c) {
255  short i = c;
256 
257  std::stringstream s;
258 
259  s << "%" << std::setw(2) << std::setfill('0') << std::hex << i;
260 
261  return s.str();
262 }
263 
264 
265 unsigned char
266 StringUtils::hexToChar(const std::string& str) {
267  short c = 0;
268  if (!str.empty()) {
269  std::istringstream in(str);
270  in >> std::hex >> c;
271  if (in.fail()) {
272  throw NumberFormatException(str + " could not be interpreted as hex");
273  }
274  }
275  return static_cast<unsigned char>(c);
276 }
277 
278 
279 int
280 StringUtils::toInt(const std::string& sData) {
281  long long int result = toLong(sData);
282  if (result > std::numeric_limits<int>::max() || result < std::numeric_limits<int>::min()) {
283  throw NumberFormatException(toString(result) + " int overflow");
284  }
285  return (int)result;
286 }
287 
288 
289 int
290 StringUtils::toIntSecure(const std::string& sData, int def) {
291  if (sData.length() == 0) {
292  return def;
293  }
294  return toInt(sData);
295 }
296 
297 
298 long long int
299 StringUtils::toLong(const std::string& sData) {
300  const char* const data = sData.c_str();
301  if (data == 0 || data[0] == 0) {
302  throw EmptyData();
303  }
304  char* end;
305  errno = 0;
306 #ifdef WIN32
307  long long int ret = _strtoi64(data, &end, 10);
308 #else
309  long long int ret = strtoll(data, &end, 10);
310 #endif
311  if (errno == ERANGE) {
312  errno = 0;
313  throw NumberFormatException("(long long integer range) " + sData);
314  }
315  if ((int)(end - data) != (int)strlen(data)) {
316  throw NumberFormatException("(long long integer format) " + sData);
317  }
318  return ret;
319 }
320 
321 
322 int
323 StringUtils::hexToInt(const std::string& sData) {
324  if (sData.length() == 0) {
325  throw EmptyData();
326  }
327  size_t idx = 0;
328  int result;
329  try {
330  if (sData[0] == '#') { // for html color codes
331  result = std::stoi(sData.substr(1), &idx, 16);
332  idx++;
333  } else {
334  result = std::stoi(sData, &idx, 16);
335  }
336  } catch (...) {
337  throw NumberFormatException("(hex integer format) " + sData);
338  }
339  if (idx != sData.length()) {
340  throw NumberFormatException("(hex integer format) " + sData);
341  }
342  return result;
343 }
344 
345 
346 double
347 StringUtils::toDouble(const std::string& sData) {
348  if (sData.size() == 0) {
349  throw EmptyData();
350  }
351  try {
352  size_t idx = 0;
353  const double result = std::stod(sData, &idx);
354  if (idx != sData.size()) {
355  throw NumberFormatException("(double format) " + sData);
356  } else {
357  return result;
358  }
359  } catch (...) {
360  // invalid_argument or out_of_range
361  throw NumberFormatException("(double) " + sData);
362  }
363 }
364 
365 
366 double
367 StringUtils::toDoubleSecure(const std::string& sData, const double def) {
368  if (sData.length() == 0) {
369  return def;
370  }
371  return toDouble(sData);
372 }
373 
374 
375 bool
376 StringUtils::toBool(const std::string& sData) {
377  if (sData.length() == 0) {
378  throw EmptyData();
379  }
380  std::string s = sData;
381  // Don't use std::transform(..., ::tolower) due a C4244 Warning in MSVC17
382  for (int i = 0; i < (int)s.length(); i++) {
383  s[i] = (char)::tolower((char)s[i]);
384  }
385  if (s == "1" || s == "yes" || s == "true" || s == "on" || s == "x" || s == "t") {
386  return true;
387  } else if (s == "0" || s == "no" || s == "false" || s == "off" || s == "-" || s == "f") {
388  return false;
389  } else {
390  throw BoolFormatException(s);
391  }
392 }
393 
394 
395 std::string
396 StringUtils::transcode(const XMLCh* const data, int length) {
397  if (data == 0) {
398  throw EmptyData();
399  }
400  if (length == 0) {
401  return "";
402  }
403 #if _XERCES_VERSION < 30100
405  std::string result(t);
406  XERCES_CPP_NAMESPACE::XMLString::release(&t);
407  return result;
408 #else
409  try {
410  XERCES_CPP_NAMESPACE::TranscodeToStr utf8(data, "UTF-8");
411  return reinterpret_cast<const char*>(utf8.str());
412  } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {
413  return "?";
414  }
415 #endif
416 }
417 
418 
419 std::string
420 StringUtils::transcodeFromLocal(const std::string& localString) {
421 #if _XERCES_VERSION > 30100
422  try {
423  if (myLCPTranscoder == nullptr) {
424  myLCPTranscoder = XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgTransService->makeNewLCPTranscoder(XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgMemoryManager);
425  }
426  if (myLCPTranscoder != nullptr) {
427  return transcode(myLCPTranscoder->transcode(localString.c_str()));
428  }
429  } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {}
430 #endif
431  return localString;
432 }
433 
434 
435 std::string
436 StringUtils::transcodeToLocal(const std::string& utf8String) {
437 #if _XERCES_VERSION > 30100
438  try {
439  if (myLCPTranscoder == nullptr) {
440  myLCPTranscoder = XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgTransService->makeNewLCPTranscoder(XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgMemoryManager);
441  }
442  if (myLCPTranscoder != nullptr) {
443  XERCES_CPP_NAMESPACE::TranscodeFromStr utf8(reinterpret_cast<const XMLByte*>(utf8String.c_str()), utf8String.size(), "UTF-8");
444  return myLCPTranscoder->transcode(utf8.str());
445  }
446  } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {}
447 #endif
448  return utf8String;
449 }
450 
451 
452 std::string
453 StringUtils::trim_left(const std::string s, const std::string& t) {
454  std::string result = s;
455  result.erase(0, s.find_first_not_of(t));
456  return result;
457 }
458 
459 std::string
460 StringUtils::trim_right(const std::string s, const std::string& t) {
461  std::string result = s;
462  result.erase(s.find_last_not_of(t) + 1);
463  return result;
464 }
465 
466 std::string
467 StringUtils::trim(const std::string s, const std::string& t) {
468  return trim_right(trim_left(s, t), t);
469 }
470 
471 void
473  myLCPTranscoder = nullptr;
474 }
475 
476 /****************************************************************************/
T MAX2(T a, T b)
Definition: StdDefs.h:80
std::string toString(const T &t, std::streamsize accuracy=gPrecision)
Definition: ToString.h:46
std::string transcode(const XMLCh *const qname)
static std::string urlEncode(const std::string &url, const std::string encodeWhich="")
static std::string charToHex(unsigned char c)
static std::string urlDecode(const std::string &encoded)
static long long int toLong(const std::string &sData)
converts a string into the long value described by it by calling the char-type converter,...
static double toDoubleSecure(const std::string &sData, const double def)
converts a string into the integer value described by it
static std::string replace(std::string str, const char *what, const char *by)
static std::string trim(const std::string s, const std::string &t=" \t\n")
remove leading and trailing whitespace
static void resetTranscoder()
must be called when shutting down the xml subsystem
static XERCES_CPP_NAMESPACE::XMLLCPTranscoder * myLCPTranscoder
Definition: StringUtils.h:164
static std::string trim_right(const std::string s, const std::string &t=" \t\n")
remove trailing whitespace from string
static std::string trim_left(const std::string s, const std::string &t=" \t\n")
remove leading whitespace from string
static std::string substituteEnvironment(std::string str)
static std::string toTimeString(int time)
Builds a time string (hh:mm:ss) from the given seconds.
static int hexToInt(const std::string &sData)
converts a string with a hex value into the integer value described by it by calling the char-type co...
static double toDouble(const std::string &sData)
converts a string into the double value described by it by calling the char-type converter
static std::string escapeXML(const std::string &orig, const bool maskDoubleHyphen=false)
Replaces the standard escapes by their XML entities.
static std::string to_lower_case(std::string str)
Transfers the content to lower case.
Definition: StringUtils.cpp:59
static std::string latin1_to_utf8(std::string str)
Transfers from Latin 1 (ISO-8859-1) to UTF-8.
Definition: StringUtils.cpp:70
static std::string prune(const std::string &str)
Removes trailing and leading whitechars.
Definition: StringUtils.cpp:48
static std::string padFront(const std::string &str, int length, char padding)
static std::string convertUmlaute(std::string str)
Converts german "Umlaute" to their latin-version.
Definition: StringUtils.cpp:87
static unsigned char hexToChar(const std::string &str)
static bool startsWith(const std::string &str, const std::string prefix)
Checks whether a given string starts with the prefix.
static std::string emptyString
An empty string.
Definition: StringUtils.h:84
static bool endsWith(const std::string &str, const std::string suffix)
Checks whether a given string ends with the suffix.
static std::string transcode(const XMLCh *const data)
converts a 0-terminated XMLCh* array (usually UTF-16, stemming from Xerces) into std::string in UTF-8
Definition: StringUtils.h:137
static std::string transcodeToLocal(const std::string &utf8String)
convert a string from UTF-8 to the local codepage
static int toIntSecure(const std::string &sData, int def)
converts a string into the integer value described by it
static std::string transcodeFromLocal(const std::string &localString)
convert a string from the local codepage to UTF-8
static int toInt(const std::string &sData)
converts a string into the integer value described by it by calling the char-type converter,...
static bool toBool(const std::string &sData)
converts a string into the bool value described by it by calling the char-type converter