Mantid
Loading...
Searching...
No Matches
StringTokenizer.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
9#include <algorithm>
10#include <iterator> //cbegin,cend
11#include <stdexcept>
12
13namespace {
14
15// If the final character is a separator, we need to add an empty string to
16// tokens.
17void addEmptyFinalToken(const std::string &str, const std::string &delims, std::vector<std::string> &tokens) {
18
19 const auto pos = std::find(delims.cbegin(), delims.cend(), str.back());
20
21 if (pos != delims.cend()) {
22 tokens.emplace_back();
23 }
24}
25
26// generic tokenizer using std::find_first_of modelled after
27// http://tcbrindle.github.io/posts/a-quicker-study-on-tokenising/
28// MIT licensed.
29template <class InputIt, class ForwardIt, class BinOp>
30void for_each_token(InputIt first, InputIt last, ForwardIt s_first, ForwardIt s_last, BinOp binary_op) {
31 while (first != last) {
32 const auto pos = std::find_first_of(first, last, s_first, s_last);
33 binary_op(first, pos);
34 if (pos == last)
35 break;
36 first = std::next(pos);
37 }
38}
39
40void splitKeepingWhitespaceEmptyTokens(const std::string &str, const std::string &delims,
41 std::vector<std::string> &output) {
42 output.clear();
43 for_each_token(str.cbegin(), str.cend(), delims.cbegin(), delims.cend(),
44 [&output](std::string::const_iterator first, std::string::const_iterator second) {
45 output.emplace_back(first, second);
46 });
47}
48
49void splitKeepingWhitespaceIgnoringEmptyTokens(const std::string &str, const std::string &delims,
50 std::vector<std::string> &output) {
51 output.clear();
52 for_each_token(str.cbegin(), str.cend(), delims.cbegin(), delims.cend(),
53 [&output](std::string::const_iterator first, std::string::const_iterator second) {
54 if (first != second)
55 output.emplace_back(first, second);
56 });
57}
58
59void splitIgnoringWhitespaceKeepingEmptyTokens(const std::string &str, const std::string &delims,
60 std::vector<std::string> &output) {
61 output.clear();
62 for_each_token(str.cbegin(), str.cend(), delims.cbegin(), delims.cend(),
63 [&output](std::string::const_iterator first, std::string::const_iterator second) {
64 output.emplace_back(first, second);
65 Mantid::Kernel::Strings::stripInPlace(output.back());
66 });
67}
68
69void splitIgnoringWhitespaceEmptyTokens(const std::string &str, const std::string &delims,
70 std::vector<std::string> &output) {
71 output.clear();
72 for_each_token(str.cbegin(), str.cend(), delims.cbegin(), delims.cend(),
73 [&output](std::string::const_iterator first, std::string::const_iterator second) {
74 if (first != second) {
75 output.emplace_back(first, second);
76 Mantid::Kernel::Strings::stripInPlace(output.back());
77 if (output.back().empty())
78 output.pop_back();
79 }
80 });
81}
82} // namespace
83
92Mantid::Kernel::StringTokenizer::StringTokenizer(const std::string &str, const std::string &separators,
93 unsigned options) {
94
95 // if str is empty, then there is no work to do. exit early.
96 if (str.empty())
97 return;
98
99 // see comments above for the different options split0,split1,split2 and
100 // split3 implement.
101 // cases 0-3 will check for a separator in the last place and insert an empty
102 // token at the end.
103 // cases 4-7 will not check and ignore a potential empty token at the end.
104 switch (options) {
105 case 0:
106 splitKeepingWhitespaceEmptyTokens(str, separators, m_tokens);
107 addEmptyFinalToken(str, separators, m_tokens);
108 return;
109 case TOK_IGNORE_EMPTY:
110 splitKeepingWhitespaceIgnoringEmptyTokens(str, separators, m_tokens);
111 return;
112 case TOK_TRIM:
113 splitIgnoringWhitespaceKeepingEmptyTokens(str, separators, m_tokens);
114 addEmptyFinalToken(str, separators, m_tokens);
115 return;
116 case (TOK_TRIM | TOK_IGNORE_EMPTY):
117 splitIgnoringWhitespaceEmptyTokens(str, separators, m_tokens);
118 return;
120 splitKeepingWhitespaceEmptyTokens(str, separators, m_tokens);
121 return;
123 splitKeepingWhitespaceIgnoringEmptyTokens(str, separators, m_tokens);
124 return;
126 splitIgnoringWhitespaceKeepingEmptyTokens(str, separators, m_tokens);
127 return;
129 splitIgnoringWhitespaceEmptyTokens(str, separators, m_tokens);
130 return;
131 }
132
133 // This point is reached only if options > 7.
134 throw std::runtime_error("Invalid option passed to Mantid::Kernel::StringTokenizer:" + std::to_string(options));
135}
@ TOK_IGNORE_FINAL_EMPTY_TOKEN
ignore an empty token at the end of the string.
@ TOK_IGNORE_EMPTY
ignore empty tokens
@ TOK_TRIM
remove leading and trailing whitespace from tokens
std::vector< std::string > m_tokens
StringTokenizer()=default
Constructs an object from an empty string.
std::string to_string(const wide_integer< Bits, Signed > &n)