db/d8d/StringTokenizer_8cpp_source.html

// Mantid Repository : https://github.com/mantidproject/mantid

//

// Copyright &copy; 2018 ISIS Rutherford Appleton Laboratory UKRI,

//   NScD Oak Ridge National Laboratory, European Spallation Source,

//   Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS

// SPDX - License - Identifier: GPL - 3.0 +

#include "MantidKernel/StringTokenizer.h"

#include <algorithm>

#include <iterator> //cbegin,cend

#include <stdexcept>


namespace {


// implement our own trim function to avoid the locale overhead in boost::trim.


// trim from start

void trimTokenFromStart(std::string &s) { s.erase(s.begin(), std::find_if_not(s.begin(), s.end(), ::isspace)); }


// trim from end

void trimTokenFromEnd(std::string &s) { s.erase(std::find_if_not(s.rbegin(), s.rend(), ::isspace).base(), s.end()); }


// trim from both ends

void trimToken(std::string &s) {

  trimTokenFromStart(s);

  trimTokenFromEnd(s);

}


// If the final character is a separator, we need to add an empty string to

// tokens.

void addEmptyFinalToken(const std::string &str, const std::string &delims, std::vector<std::string> &tokens) {


  const auto pos = std::find(delims.cbegin(), delims.cend(), str.back());


  if (pos != delims.cend()) {

    tokens.emplace_back();

  }

}


// generic tokenizer using std::find_first_of modelled after

// http://tcbrindle.github.io/posts/a-quicker-study-on-tokenising/

// MIT licensed.

template <class InputIt, class ForwardIt, class BinOp>

void for_each_token(InputIt first, InputIt last, ForwardIt s_first, ForwardIt s_last, BinOp binary_op) {

  while (first != last) {

    const auto pos = std::find_first_of(first, last, s_first, s_last);

    binary_op(first, pos);

    if (pos == last)

      break;

    first = std::next(pos);

  }

}


void splitKeepingWhitespaceEmptyTokens(const std::string &str, const std::string &delims,

                                       std::vector<std::string> &output) {

  output.clear();

  for_each_token(str.cbegin(), str.cend(), delims.cbegin(), delims.cend(),

                 [&output](std::string::const_iterator first, std::string::const_iterator second) {

                   output.emplace_back(first, second);

                 });

}


void splitKeepingWhitespaceIgnoringEmptyTokens(const std::string &str, const std::string &delims,

                                               std::vector<std::string> &output) {

  output.clear();

  for_each_token(str.cbegin(), str.cend(), delims.cbegin(), delims.cend(),

                 [&output](std::string::const_iterator first, std::string::const_iterator second) {

                   if (first != second)

                     output.emplace_back(first, second);

                 });

}


void splitIgnoringWhitespaceKeepingEmptyTokens(const std::string &str, const std::string &delims,

                                               std::vector<std::string> &output) {

  output.clear();

  for_each_token(str.cbegin(), str.cend(), delims.cbegin(), delims.cend(),

                 [&output](std::string::const_iterator first, std::string::const_iterator second) {

                   output.emplace_back(first, second);

                   trimToken(output.back());

                 });

}


void splitIgnoringWhitespaceEmptyTokens(const std::string &str, const std::string &delims,

                                        std::vector<std::string> &output) {

  output.clear();

  for_each_token(str.cbegin(), str.cend(), delims.cbegin(), delims.cend(),

                 [&output](std::string::const_iterator first, std::string::const_iterator second) {

                   if (first != second) {

                     output.emplace_back(first, second);

                     trimToken(output.back());

                     if (output.back().empty())

                       output.pop_back();

                   }

                 });

}

} // namespace


Mantid::Kernel::StringTokenizer::StringTokenizer(const std::string &str, const std::string &separators,

                                                 unsigned options) {


  // if str is empty, then there is no work to do. exit early.

  if (str.empty())

    return;


  // see comments above for the different options split0,split1,split2 and

  // split3 implement.

  // cases 0-3 will check for a separator in the last place and insert an empty

  // token at the end.

  // cases 4-7 will not check and ignore a potential empty token at the end.

  switch (options) {

  case 0:

    splitKeepingWhitespaceEmptyTokens(str, separators, m_tokens);

    addEmptyFinalToken(str, separators, m_tokens);

    return;

  case TOK_IGNORE_EMPTY:

    splitKeepingWhitespaceIgnoringEmptyTokens(str, separators, m_tokens);

    return;

  case TOK_TRIM:

    splitIgnoringWhitespaceKeepingEmptyTokens(str, separators, m_tokens);

    addEmptyFinalToken(str, separators, m_tokens);

    return;

  case (TOK_TRIM | TOK_IGNORE_EMPTY):

    splitIgnoringWhitespaceEmptyTokens(str, separators, m_tokens);

    return;

  case TOK_IGNORE_FINAL_EMPTY_TOKEN:

    splitKeepingWhitespaceEmptyTokens(str, separators, m_tokens);

    return;

  case (TOK_IGNORE_FINAL_EMPTY_TOKEN | TOK_IGNORE_EMPTY):

    splitKeepingWhitespaceIgnoringEmptyTokens(str, separators, m_tokens);

    return;

  case (TOK_IGNORE_FINAL_EMPTY_TOKEN | TOK_TRIM):

    splitIgnoringWhitespaceKeepingEmptyTokens(str, separators, m_tokens);

    return;

  case (TOK_IGNORE_FINAL_EMPTY_TOKEN | TOK_TRIM | TOK_IGNORE_EMPTY):

    splitIgnoringWhitespaceEmptyTokens(str, separators, m_tokens);

    return;

  }


  // This point is reached only if options > 7.

  throw std::runtime_error("Invalid option passed to Mantid::Kernel::StringTokenizer:" + std::to_string(options));

}

StringTokenizer.h

Mantid::Kernel::StringTokenizer::TOK_IGNORE_FINAL_EMPTY_TOKEN
@ TOK_IGNORE_FINAL_EMPTY_TOKEN
ignore an empty token at the end of the string.
Definition: StringTokenizer.h:33

Mantid::Kernel::StringTokenizer::TOK_IGNORE_EMPTY
@ TOK_IGNORE_EMPTY
ignore empty tokens
Definition: StringTokenizer.h:31

Mantid::Kernel::StringTokenizer::TOK_TRIM
@ TOK_TRIM
remove leading and trailing whitespace from tokens
Definition: StringTokenizer.h:32

Mantid::Kernel::StringTokenizer::m_tokens
std::vector< std::string > m_tokens
Definition: StringTokenizer.h:118

Mantid::Kernel::StringTokenizer::StringTokenizer
StringTokenizer()=default
Constructs an object from an empty string.

std::to_string
std::string to_string(const wide_integer< Bits, Signed > &n)