Mantid
Loading...
Searching...
No Matches
StringTokenizer.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
8#include <algorithm>
9#include <iterator> //cbegin,cend
10#include <stdexcept>
11
12namespace {
13
14// implement our own trim function to avoid the locale overhead in boost::trim.
15
16// trim from start
17void trimTokenFromStart(std::string &s) { s.erase(s.begin(), std::find_if_not(s.begin(), s.end(), ::isspace)); }
18
19// trim from end
20void trimTokenFromEnd(std::string &s) { s.erase(std::find_if_not(s.rbegin(), s.rend(), ::isspace).base(), s.end()); }
21
22// trim from both ends
23void trimToken(std::string &s) {
24 trimTokenFromStart(s);
25 trimTokenFromEnd(s);
26}
27
28// If the final character is a separator, we need to add an empty string to
29// tokens.
30void addEmptyFinalToken(const std::string &str, const std::string &delims, std::vector<std::string> &tokens) {
31
32 const auto pos = std::find(delims.cbegin(), delims.cend(), str.back());
33
34 if (pos != delims.cend()) {
35 tokens.emplace_back();
36 }
37}
38
39// generic tokenizer using std::find_first_of modelled after
40// http://tcbrindle.github.io/posts/a-quicker-study-on-tokenising/
41// MIT licensed.
42template <class InputIt, class ForwardIt, class BinOp>
43void for_each_token(InputIt first, InputIt last, ForwardIt s_first, ForwardIt s_last, BinOp binary_op) {
44 while (first != last) {
45 const auto pos = std::find_first_of(first, last, s_first, s_last);
46 binary_op(first, pos);
47 if (pos == last)
48 break;
49 first = std::next(pos);
50 }
51}
52
53void splitKeepingWhitespaceEmptyTokens(const std::string &str, const std::string &delims,
54 std::vector<std::string> &output) {
55 output.clear();
56 for_each_token(str.cbegin(), str.cend(), delims.cbegin(), delims.cend(),
57 [&output](std::string::const_iterator first, std::string::const_iterator second) {
58 output.emplace_back(first, second);
59 });
60}
61
62void splitKeepingWhitespaceIgnoringEmptyTokens(const std::string &str, const std::string &delims,
63 std::vector<std::string> &output) {
64 output.clear();
65 for_each_token(str.cbegin(), str.cend(), delims.cbegin(), delims.cend(),
66 [&output](std::string::const_iterator first, std::string::const_iterator second) {
67 if (first != second)
68 output.emplace_back(first, second);
69 });
70}
71
72void splitIgnoringWhitespaceKeepingEmptyTokens(const std::string &str, const std::string &delims,
73 std::vector<std::string> &output) {
74 output.clear();
75 for_each_token(str.cbegin(), str.cend(), delims.cbegin(), delims.cend(),
76 [&output](std::string::const_iterator first, std::string::const_iterator second) {
77 output.emplace_back(first, second);
78 trimToken(output.back());
79 });
80}
81
82void splitIgnoringWhitespaceEmptyTokens(const std::string &str, const std::string &delims,
83 std::vector<std::string> &output) {
84 output.clear();
85 for_each_token(str.cbegin(), str.cend(), delims.cbegin(), delims.cend(),
86 [&output](std::string::const_iterator first, std::string::const_iterator second) {
87 if (first != second) {
88 output.emplace_back(first, second);
89 trimToken(output.back());
90 if (output.back().empty())
91 output.pop_back();
92 }
93 });
94}
95} // namespace
96
106Mantid::Kernel::StringTokenizer::StringTokenizer(const std::string &str, const std::string &separators,
107 unsigned options) {
108
109 // if str is empty, then there is no work to do. exit early.
110 if (str.empty())
111 return;
112
113 // see comments above for the different options split0,split1,split2 and
114 // split3 implement.
115 // cases 0-3 will check for a separator in the last place and insert an empty
116 // token at the end.
117 // cases 4-7 will not check and ignore a potential empty token at the end.
118 switch (options) {
119 case 0:
120 splitKeepingWhitespaceEmptyTokens(str, separators, m_tokens);
121 addEmptyFinalToken(str, separators, m_tokens);
122 return;
123 case TOK_IGNORE_EMPTY:
124 splitKeepingWhitespaceIgnoringEmptyTokens(str, separators, m_tokens);
125 return;
126 case TOK_TRIM:
127 splitIgnoringWhitespaceKeepingEmptyTokens(str, separators, m_tokens);
128 addEmptyFinalToken(str, separators, m_tokens);
129 return;
130 case (TOK_TRIM | TOK_IGNORE_EMPTY):
131 splitIgnoringWhitespaceEmptyTokens(str, separators, m_tokens);
132 return;
134 splitKeepingWhitespaceEmptyTokens(str, separators, m_tokens);
135 return;
137 splitKeepingWhitespaceIgnoringEmptyTokens(str, separators, m_tokens);
138 return;
140 splitIgnoringWhitespaceKeepingEmptyTokens(str, separators, m_tokens);
141 return;
143 splitIgnoringWhitespaceEmptyTokens(str, separators, m_tokens);
144 return;
145 }
146
147 // This point is reached only if options > 7.
148 throw std::runtime_error("Invalid option passed to Mantid::Kernel::StringTokenizer:" + std::to_string(options));
149}
@ TOK_IGNORE_FINAL_EMPTY_TOKEN
ignore an empty token at the end of the string.
@ TOK_IGNORE_EMPTY
ignore empty tokens
@ TOK_TRIM
remove leading and trailing whitespace from tokens
std::vector< std::string > m_tokens
StringTokenizer()=default
Constructs an object from an empty string.
std::string to_string(const wide_integer< Bits, Signed > &n)