Eclipse SUMO - Simulation of Urban MObility
StringTokenizer.cpp
Go to the documentation of this file.
1 /****************************************************************************/
2 // Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.org/sumo
3 // Copyright (C) 2001-2019 German Aerospace Center (DLR) and others.
4 // This program and the accompanying materials
5 // are made available under the terms of the Eclipse Public License v2.0
6 // which accompanies this distribution, and is available at
7 // http://www.eclipse.org/legal/epl-v20.html
8 // SPDX-License-Identifier: EPL-2.0
9 /****************************************************************************/
17 // A java-style StringTokenizer for c++ (stl)
18 /****************************************************************************/
19 
20 
21 // ===========================================================================
22 // included modules
23 // ===========================================================================
24 #include <config.h>
25 
26 #include <string>
27 #include <vector>
28 #include <iostream> // !!! debug only
29 
30 #include "UtilExceptions.h"
31 #include "StringTokenizer.h"
32 
33 
34 // ===========================================================================
35 // variable definitions
36 // ===========================================================================
37 const int StringTokenizer::NEWLINE = -256;
38 const int StringTokenizer::WHITECHARS = -257;
39 const int StringTokenizer::SPACE = 32;
40 const int StringTokenizer::TAB = 9;
41 
42 
43 // ===========================================================================
44 // method definitions
45 // ===========================================================================
46 
48  myPos(0) {
49 }
50 
51 
52 StringTokenizer::StringTokenizer(std::string tosplit) :
53  myTosplit(tosplit), myPos(0) {
54  prepareWhitechar(tosplit);
55 }
56 
57 
58 StringTokenizer::StringTokenizer(std::string tosplit, std::string token, bool splitAtAllChars) :
59  myTosplit(tosplit), myPos(0) {
60  prepare(tosplit, token, splitAtAllChars);
61 }
62 
63 
64 StringTokenizer::StringTokenizer(std::string tosplit, int special) :
65  myTosplit(tosplit), myPos(0) {
66  switch (special) {
67  case NEWLINE:
68  prepare(tosplit, "\r\n", true);
69  break;
70  case TAB:
71  prepare(tosplit, "\t", true);
72  break;
73  case WHITECHARS:
74  prepareWhitechar(tosplit);
75  break;
76  default:
77  char* buf = new char[2];
78  buf[0] = (char) special;
79  buf[1] = 0;
80  prepare(tosplit, buf, false);
81  delete[] buf;
82  break;
83  }
84 }
85 
86 
88 
89 
91  myPos = 0;
92 }
93 
94 
96  return myPos != (int)myStarts.size();
97 }
98 
99 
100 std::string StringTokenizer::next() {
101  if (myPos >= (int)myStarts.size()) {
102  throw OutOfBoundsException();
103  }
104  if (myLengths[myPos] == 0) {
105  myPos++;
106  return "";
107  }
108  int start = myStarts[myPos];
109  int length = myLengths[myPos++];
110  return myTosplit.substr(start, length);
111 }
112 
113 
114 std::string StringTokenizer::front() {
115  if (myStarts.size() == 0) {
116  throw OutOfBoundsException();
117  }
118  if (myLengths[0] == 0) {
119  return "";
120  }
121  return myTosplit.substr(myStarts[0], myLengths[0]);
122 }
123 
124 
125 std::string StringTokenizer::get(int pos) const {
126  if (pos >= (int)myStarts.size()) {
127  throw OutOfBoundsException();
128  }
129  if (myLengths[pos] == 0) {
130  return "";
131  }
132  int start = myStarts[pos];
133  int length = myLengths[pos];
134  return myTosplit.substr(start, length);
135 }
136 
137 
139  return (int)myStarts.size();
140 }
141 
142 
143 void StringTokenizer::prepare(const std::string& tosplit, const std::string& token, bool splitAtAllChars) {
144  int beg = 0;
145  int len = (int)token.length();
146  if (splitAtAllChars) {
147  len = 1;
148  }
149  while (beg < (int)tosplit.length()) {
150  std::string::size_type end;
151  if (splitAtAllChars) {
152  end = tosplit.find_first_of(token, beg);
153  } else {
154  end = tosplit.find(token, beg);
155  }
156  if (end == std::string::npos) {
157  end = tosplit.length();
158  }
159  myStarts.push_back(beg);
160  myLengths.push_back((int)end - beg);
161  beg = (int)end + len;
162  if (beg == (int)tosplit.length()) {
163  myStarts.push_back(beg - 1);
164  myLengths.push_back(0);
165  }
166  }
167 }
168 
169 
170 void StringTokenizer::prepareWhitechar(const std::string& tosplit) {
171  std::string::size_type len = tosplit.length();
172  std::string::size_type beg = 0;
173  while (beg < len && tosplit[beg] <= SPACE) {
174  beg++;
175  }
176  while (beg != std::string::npos && beg < len) {
177  std::string::size_type end = beg;
178  while (end < len && tosplit[end] > SPACE) {
179  end++;
180  }
181  myStarts.push_back((int)beg);
182  myLengths.push_back((int)end - (int)beg);
183  beg = end;
184  while (beg < len && tosplit[beg] <= SPACE) {
185  beg++;
186  }
187  }
188 }
189 
190 
191 std::vector<std::string>
193  std::vector<std::string> ret;
194  ret.reserve(size());
195  while (hasNext()) {
196  ret.push_back(next());
197  }
198  reinit();
199  return ret;
200 }
201 
202 /****************************************************************************/
StringTokenizer::hasNext
bool hasNext()
returns the information whether further substrings exist
Definition: StringTokenizer.cpp:95
StringTokenizer::next
std::string next()
returns the next substring when it exists. Otherwise the behaviour is undefined
Definition: StringTokenizer.cpp:100
StringTokenizer::prepare
void prepare(const std::string &tosplit, const std::string &token, bool splitAtAllChars)
splits the first string at all occurences of the second. If the third parameter is true split at all ...
Definition: StringTokenizer.cpp:143
StringTokenizer::~StringTokenizer
~StringTokenizer()
destructor
Definition: StringTokenizer.cpp:87
StringTokenizer::WHITECHARS
static const int WHITECHARS
identifier for splitting the given string at all whitespace characters
Definition: StringTokenizer.h:68
StringTokenizer::myTosplit
std::string myTosplit
the string to split
Definition: StringTokenizer.h:140
StringTokenizer::StringTokenizer
StringTokenizer()
default constructor
Definition: StringTokenizer.cpp:47
StringTokenizer::get
std::string get(int pos) const
returns the item at the given position
Definition: StringTokenizer.cpp:125
StringTokenizer::NEWLINE
static const int NEWLINE
identifier for splitting the given string at all newline characters
Definition: StringTokenizer.h:65
StringTokenizer::myLengths
SizeVector myLengths
the list of substring lengths
Definition: StringTokenizer.h:149
OutOfBoundsException
Definition: UtilExceptions.h:122
UtilExceptions.h
StringTokenizer::size
int size() const
returns the number of existing substrings
Definition: StringTokenizer.cpp:138
StringTokenizer::SPACE
static const int SPACE
the ascii index of the highest whitespace character
Definition: StringTokenizer.h:71
StringTokenizer::TAB
static const int TAB
the ascii index of the tab character
Definition: StringTokenizer.h:74
StringTokenizer::myStarts
SizeVector myStarts
the list of substring starts
Definition: StringTokenizer.h:146
StringTokenizer::front
std::string front()
returns the first substring without moving the iterator
Definition: StringTokenizer.cpp:114
StringTokenizer::myPos
int myPos
the current position in the list of substrings
Definition: StringTokenizer.h:143
StringTokenizer::getVector
std::vector< std::string > getVector()
return vector of strings
Definition: StringTokenizer.cpp:192
config.h
StringTokenizer.h
StringTokenizer::prepareWhitechar
void prepareWhitechar(const std::string &tosplit)
splits the first string at all occurences of whitechars
Definition: StringTokenizer.cpp:170
StringTokenizer::reinit
void reinit()
reinitialises the internal iterator
Definition: StringTokenizer.cpp:90