Mantid
Loading...
Searching...
No Matches
Expression.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
7#include <locale>
8#include <sstream>
9
11
12#include "MantidKernel/Logger.h"
14
15namespace Mantid::API {
16
18
19const std::vector<std::string> Expression::DEFAULT_OPS_STR = {
20 ";", ",", "=", "== != > < <= >=", "&& || ^^", "+ -", "* /", "^"};
21
22const std::string EMPTY_EXPRESSION_NAME = "EMPTY";
23namespace {
29std::string makeErrorMessage(const std::string &msg, const std::string &expr, size_t i) {
30 const size_t MAX_LEFT_SIZE = 10;
31 const size_t MAX_RIGHT_SIZE = 10;
32 std::ostringstream res;
33 res << msg << " at\n\n";
34 size_t j = i;
35 size_t skip = 0;
36 size_t n = expr.size();
37 std::string leftEllipsis = "";
38 if (i > MAX_LEFT_SIZE) {
39 skip = i - MAX_LEFT_SIZE;
40 leftEllipsis = "...";
41 j = MAX_LEFT_SIZE + leftEllipsis.size();
42 n -= skip;
43 }
44 std::string rightEllipsis = "";
45 if (n - j > MAX_RIGHT_SIZE) {
46 n = i + MAX_RIGHT_SIZE;
47 rightEllipsis = "...";
48 }
49 // Write a substring of expr around the error indicator at symbol #i.
50 res << leftEllipsis << expr.substr(skip, n) << rightEllipsis << '\n';
51 res << std::string(j, ' ') << '^' << '\n';
52 return res.str();
53}
54
55// Get a reference to the logger
56Kernel::Logger logger("Expression");
57
58} // namespace
59
65Expression::ParsingError::ParsingError(const std::string &msg, const std::string &expr, size_t i)
66 : std::runtime_error(makeErrorMessage(msg, expr, i)) {}
67
70Expression::ParsingError::ParsingError(const std::string &msg) : std::runtime_error(msg) {}
71
73 m_operators.reset(new Operators());
74 // Define binary operators. Put them in the reverse precedence order (from
75 // lower to higher prec.)
77
78 // Define unary operators
79 std::unordered_set<std::string> unary;
80 unary.insert("+");
81 unary.insert("-");
82
83 add_unary(unary);
84}
85
87Expression::Expression(const std::vector<std::string> &ops) {
88 m_operators.reset(new Operators());
89 add_operators(ops);
90}
91
93Expression::Expression(const std::vector<std::string> &binary, const std::unordered_set<std::string> &unary) {
94 m_operators.reset(new Operators());
95 add_operators(binary);
96 add_unary(unary);
97}
98
100 : // m_tokens(expr.m_tokens),
101 // m_expr(expr.m_expr),
102 m_funct(expr.m_funct), m_op(expr.m_op), m_terms(expr.m_terms), m_operators(expr.m_operators) {}
103Expression::Expression(const Expression *pexpr) : m_operators(pexpr->m_operators) {}
104
106Expression &Expression::operator=(const Expression &expr) = default;
107
108void Expression::add_operators(const std::vector<std::string> &ops) {
109 m_operators->binary = ops;
110 // Fill in the precedence table (m_op_precedence)
111 for (size_t i = 0; i < m_operators->binary.size(); i++) {
112 char j = 0;
114 for (const auto &index : tkz) {
115 m_operators->precedence[index] = i + 1;
116 m_operators->op_number[index] = j++;
117 }
118 }
119
120 for (auto str : ops) {
121 for (char c : str) {
122 if (c == ' ')
123 continue;
124 m_operators->symbols.insert(c);
125 }
126 }
127}
128
129void Expression::add_unary(const std::unordered_set<std::string> &ops) {
130 m_operators->unary = ops;
131 for (const auto &op : ops) {
132 m_operators->symbols.insert(op.cbegin(), op.cend());
133 }
134}
135
136size_t Expression::op_prec(const std::string &op) const {
137 std::map<std::string, size_t>::const_iterator i = m_operators->precedence.find(op);
138 if (i == m_operators->precedence.end())
139 return 0;
140 return i->second;
141}
142
143bool Expression::is_unary(const std::string &op) const {
144 return m_operators->unary.find(op) != m_operators->unary.end();
145}
146
147bool Expression::is_op_symbol(const char c) const { return m_operators->symbols.find(c) != m_operators->symbols.end(); }
148
149void Expression::trim(std::string &str) {
150 size_t i = str.find_first_not_of(" \t\n\r");
151 size_t j = str.find_last_not_of(" \t\n\r");
152 if (i == std::string::npos || j == std::string::npos || j < i) {
153 str = "";
154 } else {
155 str = str.substr(i, j - i + 1);
156 }
157}
158
159void Expression::parse(const std::string &str) {
160 m_expr = str;
161 trim(m_expr);
162
163 if (m_expr.size() > 1 && m_expr.front() == '(' && m_expr.back() == ')') {
164 if (m_expr.find('(', 1) == std::string::npos) {
165 m_expr.erase(0, 1);
166 m_expr.erase(m_expr.size() - 1, 1);
167 trim(m_expr);
168 }
169 }
170
171 tokenize();
172
173 if (m_tokens.empty()) {
175 return;
176 }
177
178 std::string op = GetOp(0);
179 // size_t prec = m_operators->precedence[op];
180 size_t prec = op_prec(op);
182
183 setFunct(*tkz.begin());
184
185 for (size_t i = 0; i <= m_tokens.size(); i++) {
186 m_terms.emplace_back(Expression(this));
187 Expression &t = m_terms.back();
188 if (i)
189 t.m_op = GetOp(i - 1);
190 t.parse(GetToken(i));
191 }
192 m_expr = "";
193 m_tokens.clear();
194}
195
197 m_tokens.clear();
198
199 size_t min_prec = 1000;
200 size_t is = 0;
201 unsigned int lvl = 0;
202 const size_t last = m_expr.size() - 1;
203 bool inString = false;
204 int skip = 0;
205 bool canBeBinary = false;
206 // if parser is inside a number (important case is 123.45e+67)
207 bool isNumber = false;
208 bool canDotBeAdded = false;
209 bool canEBeAdded = false;
210 bool canPlusBeAdded = false;
211 Tokens tokens;
212 for (size_t i = 0; i < m_expr.size(); i++) {
213 char c = m_expr[i];
214
215 if (!inString && skip == 0) {
216 if (isNumber) {
217 if (c == '.') {
218 if (canDotBeAdded) {
219 canDotBeAdded = false;
220 } else {
221 isNumber = false;
222 }
223 } else if (c == 'e' || c == 'E') {
224 if (canEBeAdded) {
225 canEBeAdded = false;
226 canDotBeAdded = false;
227 canPlusBeAdded = true;
228 } else {
229 isNumber = false;
230 }
231 } else if (c == '+' || c == '-') {
232 if (canPlusBeAdded) {
233 canPlusBeAdded = false;
234 canEBeAdded = false;
235 canDotBeAdded = false;
236 } else {
237 isNumber = false;
238 }
239 } else if (!isdigit(c)) {
240 isNumber = false;
241 }
242 } else if (isdigit(c)) {
243 isNumber = true;
244 canDotBeAdded = true;
245 canEBeAdded = true;
246 canPlusBeAdded = false;
247 }
248 if (lvl == 0 && !isNumber && is_op_symbol(c)) // insert new token
249 {
250 if (i == last) {
251 if (c == ',' || c == ';') {
252 m_expr.resize(last);
253 break;
254 } else {
255 throw ParsingError("A binary operator isn't followed by a value", m_expr, i);
256 }
257 }
258 auto is1 = i + (is_op_symbol(m_expr[i + 1]) ? 2 : 1);
259
260 if (is1 > last) {
261 throw ParsingError("Syntax error", m_expr, last);
262 }
263
264 std::string op = m_expr.substr(i, is1 - i);
265 size_t prec = canBeBinary ? m_operators->precedence[op] : 0;
266 if (!prec) // operator does not exist
267 {
268 bool error = true;
269 // check if it's a binary and a unary operators together
270 if (op.size() == 2) {
271 if (is_unary(op)) {
272 is1 -= 2;
273 skip = 2;
274 prec = min_prec + 1; // do not add token
275 error = false;
276 } else {
277 is1 -= 1;
278 std::string uop = op.substr(1, 1);
279 op = op[0];
280 if (is_op_symbol(m_expr[is1 + 1])) {
281 uop += m_expr[is1 + 1];
282 if (is1 + 2 > last) {
283 throw ParsingError("Syntax error", m_expr, is1 + 1);
284 }
285 }
286 if (is_unary(uop)) {
287 prec = m_operators->precedence[op];
288 if (prec) { // we don't want to create a new token with unary
289 // operator. it is processed in SetFunct()
290 skip = 1;
291 error = false;
292 }
293 }
294 }
295 } // op.size == 2
296 else if (op.size() == 1) {
297 // skip = 1;
298 prec = min_prec + 1; // do not add token
299 error = false;
300 }
301 if (error) {
302 throw ParsingError("Unrecognized operator", m_expr, i);
303 }
304 }
305
306 if (prec <= min_prec) {
307 if (prec < min_prec)
308 min_prec = prec;
309 Token tok(is, i - 1, is1, prec);
310 tokens.emplace_back(tok);
311 is = is1;
312 }
313
314 i = is1 - 1;
315
316 canBeBinary = false;
317
318 } // insert new token
319 else if (c != ' ' && c != '\t' && c != '\r' && c != '\n') {
320 canBeBinary = true;
321 }
322
323 if (c == '(')
324 lvl++;
325 if (c == ')') {
326 if (lvl)
327 lvl--;
328 else {
329 throw ParsingError("Unmatched bracket", m_expr, 0);
330 }
331 }
332 } // !inString || skip
333 else if (skip > 0) {
334 skip--;
335 }
336
337 if (c == '"') {
338 inString = !inString;
339 }
340
341 } // for i
342
343 if (!tokens.empty()) {
344 // remove operators of higher prec
345 m_tokens.emplace_back(tokens[0]);
346 for (size_t i = 0; i < tokens.size(); i++) {
347 const Token &tok = tokens[i];
348 std::string op = m_expr.substr(tok.ie + 1, tok.is1 - tok.ie - 1); //?
349 if (m_operators->precedence[op] == min_prec) {
350 Token &last_tok = m_tokens.back();
351 last_tok.ie = tok.ie;
352 last_tok.is1 = tok.is1;
353 if (i != tokens.size() - 1)
354 m_tokens.emplace_back(tokens[i + 1]);
355 }
356 }
357 }
358}
359
360std::string Expression::GetToken(size_t i) {
361 if (m_tokens.empty())
362 return m_expr;
363
364 if (i < m_tokens.size()) {
365 const Token &tok = m_tokens[i];
366 return m_expr.substr(tok.is, tok.ie - tok.is + 1);
367 }
368
369 if (i == m_tokens.size()) {
370 const Token &tok = m_tokens[i - 1];
371 return m_expr.substr(tok.is1);
372 }
373
374 return "";
375}
376
377std::string Expression::GetOp(size_t i) {
378 if (m_tokens.empty() || i >= m_tokens.size())
379 return "";
380
381 const Token &tok = m_tokens[i];
382 return m_expr.substr(tok.ie + 1, tok.is1 - tok.ie - 1);
383}
384
385void Expression::logPrint(const std::string &pads) const {
386 std::string myPads = pads + " ";
387 if (!m_terms.empty()) {
388 logger.debug() << myPads << m_op << '[' << m_funct << ']' << "(\n";
389 for (const auto &term : m_terms)
390 term.logPrint(myPads);
391 logger.debug() << myPads << ")\n";
392 } else
393 logger.debug() << myPads << m_op << m_funct << '\n';
394}
395
396void Expression::setFunct(const std::string &name) {
397 if (!op_prec(name)) {
398 std::string op;
399 if (name.size() > 1 && is_op_symbol(name[0])) {
400 op = name.substr(0, 1);
401 if (name.size() > 2 && is_op_symbol(name[1])) {
402 op += name[1];
403 }
404 }
405 if (!op.empty() && is_unary(op)) {
406 m_funct = op;
407 Expression tmp(this);
408 tmp.parse(name.substr(op.size()));
409 m_terms.emplace_back(tmp);
410 return;
411 }
412 }
413
414 m_funct = name;
415 trim(m_funct);
416
417 if (m_funct.empty()) {
419 return;
420 }
421
422 // Check if the function has arguments
423 std::string::size_type i = std::string::npos;
424
425 bool inQuotes = false;
426 for (std::string::const_iterator c = name.begin(); c != name.end(); ++c) {
427 if (*c == '"') {
428 inQuotes = !inQuotes;
429 continue;
430 }
431
432 if (!inQuotes && *c == '(') {
433 i = c - name.begin();
434 break;
435 }
436 }
437
438 if (i != std::string::npos) {
439 std::string::size_type j = name.find_last_of(')');
440 if (j == std::string::npos || j < i) {
441 throw ParsingError("Unmatched bracket", name, i);
442 }
443
444 if (j > i + 1) // nonzero argument list
445 {
446 std::string args = name.substr(i + 1, j - i - 1); //?
447 trim(args);
448 std::string f = name.substr(0, i);
449 Expression tmp(this);
450 tmp.parse(args);
451 if (tmp.name() != EMPTY_EXPRESSION_NAME && (!tmp.isFunct() || tmp.name() != ",")) {
452 m_terms.emplace_back(tmp);
453 } else {
454 if (f.empty() && tmp.name() == ",") {
455 f = ",";
456 }
457 std::string my_op = m_op;
458 *this = tmp;
459 m_op = my_op;
460 }
461 m_funct = f;
462 if (m_funct.empty() && m_terms.empty()) {
464 }
465 }
466 }
467}
468
469std::string Expression::str() const {
470 bool brackets = false;
471 std::ostringstream res;
472 size_t prec = op_prec(m_funct);
473 if (size() == 1 && is_unary(m_funct)) { // unary operator
474 res << m_funct;
475 if (op_prec(m_terms[0].m_funct) > 0) {
476 brackets = true;
477 }
478 } else if (!prec) { // function with a name
479 res << m_funct;
480 brackets = true;
481 } else if (m_op == "-" && m_funct == "+") {
482 brackets = true;
483 } else if (m_op == "/" && m_funct == "*") {
484 brackets = true;
485 }
486
487 if (!m_terms.empty()) {
488 if (brackets)
489 res << '(';
490 for (const auto &term : m_terms) {
491 res << term.operator_name();
492 size_t prec1 = op_prec(term.m_funct);
493 bool isItUnary = false;
494 if (term.size() == 1 && is_unary(term.m_funct)) {
495 prec1 = 0; // unary operator
496 isItUnary = true;
497 }
498 bool bk = prec > 0 && prec1 > 0 && prec > prec1;
499 if (bk)
500 res << '(';
501 if (isItUnary)
502 res << ' ';
503 res << term.str();
504 if (bk)
505 res << ')';
506 }
507 if (brackets)
508 res << ')';
509 }
510 return res.str();
511}
512
514 const Expression *e = this;
515 while (e->name().empty() && e->size() == 1) {
516 e = &e->m_terms[0];
517 }
518 return *e;
519}
520
524std::unordered_set<std::string> Expression::getVariables() const {
525 std::unordered_set<std::string> out;
526 if (!isFunct()) {
527 std::string s = name();
528 if (!s.empty() && !isdigit(s[0])) {
529 out.insert(s);
530 }
531 } else {
532 for (const auto &e : *this) {
533 if (e.isFunct()) {
534 std::unordered_set<std::string> tout = e.getVariables();
535 out.insert(tout.begin(), tout.end());
536 } else {
537 std::string s = e.name();
538 if (!s.empty() && !isdigit(s[0])) {
539 out.insert(s);
540 }
541 }
542 }
543 }
544 return out;
545}
546
547void Expression::rename(const std::string &newName) { m_funct = newName; }
548
549void Expression::renameAll(const std::string &oldName, const std::string &newName) {
550 if (!isFunct() && name() == oldName) {
551 rename(newName);
552 } else {
553 for (auto &term : m_terms) {
554 term.renameAll(oldName, newName);
555 }
556 }
557}
558
559void Expression::toList(const std::string &sep) {
560 if (name() == sep)
561 return;
562 Expression term(*this);
563 m_terms.resize(1);
564 m_terms[0] = term;
565 setFunct(sep);
566}
567
568} // namespace Mantid::API
gsl_vector * tmp
double error
Definition: IndexPeaks.cpp:133
std::map< DeltaEMode::Type, std::string > index
Definition: DeltaEMode.cpp:19
Specialised exception for parsing errors.
Definition: Expression.h:39
ParsingError(const std::string &msg, const std::string &expr, size_t i)
Constructor.
Definition: Expression.cpp:65
This class represents an expression made up of names, binary operators and brackets.
Definition: Expression.h:36
std::string name() const
Returns the name of the expression which is a function or variable name.
Definition: Expression.h:71
std::string m_expr
Saved expression string.
Definition: Expression.h:170
static void trim(std::string &str)
Remove leading and ending empty spaces from a string.
Definition: Expression.cpp:149
std::shared_ptr< Operators > m_operators
pointer ot the operators
Definition: Expression.h:215
void add_unary(const std::unordered_set< std::string > &ops)
Adds new unary operators to the expression.
Definition: Expression.cpp:129
std::unordered_set< std::string > getVariables() const
Return a list of all variable names in this expression.
Definition: Expression.cpp:524
void parse(const std::string &str)
Parse a string and create an expression.
Definition: Expression.cpp:159
std::string m_op
Operator connecting this expression to its sibling on the left.
Definition: Expression.h:173
std::string m_funct
Function name.
Definition: Expression.h:172
const Expression & bracketsRemoved() const
If the expression has 1 argument and empty function name it means it is wrapped in brackets This meth...
Definition: Expression.cpp:513
void logPrint(const std::string &pads="") const
Print the expression into std::cerr to show its structure.
Definition: Expression.cpp:385
void setFunct(const std::string &name)
Set the function name of this expression.
Definition: Expression.cpp:396
std::string GetOp(size_t i)
Get the operator connecting i-th token.
Definition: Expression.cpp:377
bool is_unary(const std::string &op) const
Check if a string is a unary operator.
Definition: Expression.cpp:143
Expression & operator=(const Expression &expr)
Assignment operator.
bool isFunct() const
Returns true if the expression is a function (i.e. has arguments)
Definition: Expression.h:69
void renameAll(const std::string &oldName, const std::string &newName)
Rename all variables with a given name.
Definition: Expression.cpp:549
std::vector< Token > Tokens
The container type.
Definition: Expression.h:149
std::string str() const
Returns this expression as a string.
Definition: Expression.cpp:469
std::string GetToken(size_t i)
Get i-th token.
Definition: Expression.cpp:360
Tokens m_tokens
The container for the token markers.
Definition: Expression.h:169
void toList(const std::string &sep=",")
Make sure the expression is a list of expression separated by sep, eg "term1,term2,...
Definition: Expression.cpp:559
bool is_op_symbol(const char c) const
Check if a character is a part of an operator.
Definition: Expression.cpp:147
Expression()
Default contructor.
Definition: Expression.cpp:72
size_t size() const
Returns the number of argumens.
Definition: Expression.h:79
std::vector< Expression > m_terms
Child expressions (function arguments)
Definition: Expression.h:174
static const std::vector< std::string > DEFAULT_OPS_STR
Definition: Expression.h:119
size_t op_prec(const std::string &op) const
Returns the precedence of operator op.
Definition: Expression.cpp:136
void add_operators(const std::vector< std::string > &ops)
Adds new binary operators to the expression.
Definition: Expression.cpp:108
void rename(const std::string &newName)
Rename this expression.
Definition: Expression.cpp:547
void tokenize()
Analyze the string in m_expr and find all top level tokens.
Definition: Expression.cpp:196
The Logger class is in charge of the publishing messages from the framework through various channels.
Definition: Logger.h:52
Iterator begin()
Iterator referring to first element in the container.
@ TOK_IGNORE_EMPTY
ignore empty tokens
@ TOK_TRIM
remove leading and trailing whitespace from tokens
const std::string EMPTY_EXPRESSION_NAME
Definition: Expression.cpp:22
STL namespace.
Keeps operator that can be used in an expression.
Definition: Expression.h:179
This is a struct to mark a token in a string expression.
Definition: Expression.h:131
size_t is1
The index of the first symbol of the next token.
Definition: Expression.h:145
size_t is
The index of the first symbol of the token.
Definition: Expression.h:143
size_t ie
The index of the last symbol of the token.
Definition: Expression.h:144