Source code for rule_engine.parser

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
#  rule_engine/parser.py
#
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions are
#  met:
#
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above
#    copyright notice, this list of conditions and the following disclaimer
#    in the documentation and/or other materials provided with the
#    distribution.
#  * Neither the name of the project nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#

import ast as ast_
import threading

from . import ast
from . import errors

import ply.lex as lex
import ply.yacc as yacc

literal_eval = ast_.literal_eval

[docs]class ParserBase(object): """ A base class for parser objects to inherit from. This does not provide any grammar related definitions. """ precedence = () """The precedence for operators.""" tokens = () reserved_words = {} """ A mapping of literal words which are reserved to their corresponding grammar names. """ __mutex = threading.Lock()
[docs] def __init__(self, debug=False): """ :param bool debug: Whether or not to enable debugging features when using the ply API. """ self.debug = debug self.context = None # Build the lexer and parser self._lexer = lex.lex(module=self, debug=self.debug) self._parser = yacc.yacc(module=self, debug=self.debug, write_tables=self.debug)
[docs] def parse(self, text, context, **kwargs): """ Parse the specified text in an abstract syntax tree of nodes that can later be evaluated. :param str text: The grammar text to parse into an AST. :param context: A context for specifying parsing and evaluation options. :type context: :py:class:`~rule_engine.engine.Context` :return: The parsed AST statement. :rtype: :py:class:`~rule_engine.ast.Statement` """ kwargs['lexer'] = kwargs.pop('lexer', self._lexer) with self.__mutex: self.context = context result = self._parser.parse(text, **kwargs) self.context = None return result
[docs]class Parser(ParserBase): """ The parser class for the rule grammar. This class contains many ply specific members to define the various components of the grammar allowing it to be parsed and reduced into an abstract syntax tree (AST). Once the AST has been constructed it can then be evaluated multiple times. To make the evaluation more efficient, nodes within the AST that are able to be reduced are while the parsing is taking place. This reduction phase involves evaluation, causing :py:exc:`~rule_engine.errors.EvaluationError` exceptions to be raised during parsing. """ op_names = { # arithmetic operators '+': 'ADD', '-': 'SUB', '**': 'POW', '*': 'MUL', '/': 'TDIV', '//': 'FDIV', '%': 'MOD', # bitwise operators '&': 'BWAND', '|': 'BWOR', '^': 'BWXOR', '<<': 'BWLSH', '>>': 'BWRSH', # comparison operators '==': 'EQ', '=~': 'EQ_FZM', '=~~': 'EQ_FZS', '!=': 'NE', '!~': 'NE_FZM', '!~~': 'NE_FZS', '>': 'GT', '>=': 'GE', '<': 'LT', '<=': 'LE', # logical operators 'and': 'AND', 'or': 'OR', } reserved_words = { # booleans 'true': 'TRUE', 'false': 'FALSE', # float constants 'inf': 'FLOAT_INF', 'nan': 'FLOAT_NAN', # null 'null': 'NULL', # operators 'and': 'AND', 'or': 'OR', 'not': 'NOT', } tokens = ( 'DATETIME', 'FLOAT', 'STRING', 'SYMBOL', 'LPAREN', 'RPAREN', 'QMARK', 'COLON' ) + tuple(set(list(reserved_words.values()) + list(op_names.values()))) t_ignore = ' \t' # Tokens t_BWAND = r'\&' t_BWOR = r'\|' t_BWXOR = r'\^' t_LPAREN = r'\(' t_RPAREN = r'\)' t_EQ = r'==' t_NE = r'!=' t_QMARK = r'\?' t_COLON = r'\:' t_ADD = r'\+' t_SUB = r'\-' t_MOD = r'\%' t_FLOAT = r'0(b[01]+|o[0-7]+|x[0-9a-fA-F]+)|[0-9]+(\.[0-9]*)?([eE][+-]?[0-9]+)?|\.[0-9]+([eE][+-]?[0-9]+)?' # tokens are listed from lowest to highest precedence, ones that appear # later are effectively evaluated first # see: https://en.wikipedia.org/wiki/Order_of_operations#Programming_languages precedence = ( ('left', 'OR'), ('left', 'AND'), ('right', 'NOT'), ('left', 'BWOR'), ('left', 'BWXOR'), ('left', 'BWAND'), ('right', 'QMARK', 'COLON'), ('nonassoc', 'EQ', 'NE', 'EQ_FZM', 'EQ_FZS', 'NE_FZM', 'NE_FZS', 'GE', 'GT', 'LE', 'LT'), # Nonassociative operators ('left', 'ADD', 'SUB'), ('left', 'BWLSH', 'BWRSH'), ('left', 'MUL', 'TDIV', 'FDIV', 'MOD'), ('left', 'POW'), ('right', 'UMINUS'), ) def t_POW(self, t): r'\*\*?' if t.value == '*': t.type = 'MUL' return t def t_FDIV(self, t): r'\/\/?' if t.value == '/': t.type = 'TDIV' return t def t_LT(self, t): r'<([=<])?' t.type = {'<': 'LT', '<=': 'LE', '<<': 'BWLSH'}[t.value] return t def t_GT(self, t): r'>([=>])?' t.type = {'>': 'GT', '>=': 'GE', '>>': 'BWRSH'}[t.value] return t def t_EQ_FZS(self, t): r'=~~?' if t.value == '=~': t.type = 'EQ_FZM' return t def t_NE_FZS(self, t): r'!~~?' if t.value == '!~': t.type = 'NE_FZM' return t def t_DATETIME(self, t): r'd(?P<quote>["\'])([^\\\n]|(\\.))*?(?P=quote)' t.value = t.value[1:] return t def t_STRING(self, t): r's?(?P<quote>["\'])([^\\\n]|(\\.))*?(?P=quote)' if t.value[0] == 's': t.value = t.value[1:] return t def t_SYMBOL(self, t): r'\$?[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)*' t.type = self.reserved_words.get(t.value, 'SYMBOL') return t def t_newline(self, t): r'\n+' t.lexer.lineno += t.value.count("\n") def t_error(self, t): raise errors.RuleSyntaxError("syntax error (illegal character {0!r})".format(t.value[0]), t) # Parsing Rules def p_error(self, token): raise errors.RuleSyntaxError('syntax error', token) def p_statement_expr(self, p): 'statement : expression' p[0] = ast.Statement(self.context, p[1]) def p_expression_ternary(self, p): """ expression : expression QMARK expression COLON expression """ condition, _, case_true, _, case_false = p[1:6] p[0] = ast.TernaryExpression(self.context, condition, case_true, case_false).reduce() def p_expression_arithmetic(self, p): """ expression : expression ADD expression | expression SUB expression | expression MOD expression | expression MUL expression | expression FDIV expression | expression TDIV expression | expression POW expression """ left, op, right = p[1:4] op_name = self.op_names[op] p[0] = ast.ArithmeticExpression(self.context, op_name, left, right).reduce() def p_expression_bitwise(self, p): """ expression : expression BWAND expression | expression BWOR expression | expression BWXOR expression | expression BWLSH expression | expression BWRSH expression """ left, op, right = p[1:4] op_name = self.op_names[op] p[0] = ast.BitwiseExpression(self.context, op_name, left, right).reduce() def p_expression_comparison(self, p): """ expression : expression EQ expression | expression NE expression """ left, op, right = p[1:4] op_name = self.op_names[op] p[0] = ast.ComparisonExpression(self.context, op_name, left, right).reduce() def p_expression_arithmetic_comparison(self, p): """ expression : expression GT expression | expression GE expression | expression LT expression | expression LE expression """ left, op, right = p[1:4] op_name = self.op_names[op] p[0] = ast.ArithmeticComparisonExpression(self.context, op_name, left, right).reduce() def p_expression_fuzzy_comparison(self, p): """ expression : expression EQ_FZM expression | expression EQ_FZS expression | expression NE_FZM expression | expression NE_FZS expression """ left, op, right = p[1:4] op_name = self.op_names[op] p[0] = ast.FuzzyComparisonExpression(self.context, op_name, left, right).reduce() def p_expression_logic(self, p): """ expression : expression AND expression | expression OR expression """ left, op, right = p[1:4] op_name = self.op_names[op] p[0] = ast.LogicExpression(self.context, op_name, left, right).reduce() def p_expression_group(self, p): 'expression : LPAREN expression RPAREN' p[0] = p[2] def p_expression_negate(self, p): 'expression : NOT expression' p[0] = ast.UnaryExpression(self.context, 'NOT', p[2]).reduce() def p_expression_symbol(self, p): 'expression : SYMBOL' name = p[1] scope = None if name[0] == '$': scope = 'built-in' name = name[1:] p[0] = ast.SymbolExpression(self.context, name, scope=scope).reduce() def p_expression_uminus(self, p): 'expression : SUB expression %prec UMINUS' names = {'-': 'UMINUS'} p[0] = ast.UnaryExpression(self.context, names[p[1]], p[2]).reduce() # Literal expressions def p_expression_boolean(self, p): """ expression : TRUE | FALSE """ p[0] = ast.BooleanExpression(self.context, p[1] == 'true') def p_expression_datetime(self, p): 'expression : DATETIME' p[0] = ast.DatetimeExpression.from_string(self.context, literal_eval(p[1])) def p_expression_float(self, p): 'expression : FLOAT' p[0] = ast.FloatExpression(self.context, float(literal_eval(p[1]))) def p_expression_float_nan(self, p): 'expression : FLOAT_NAN' p[0] = ast.FloatExpression(self.context, float('nan')) def p_expression_float_inf(self, p): 'expression : FLOAT_INF' p[0] = ast.FloatExpression(self.context, float('inf')) def p_expression_null(self, p): """expression : NULL""" p[0] = ast.NullExpression(self.context) def p_expression_string(self, p): 'expression : STRING' p[0] = ast.StringExpression(self.context, literal_eval(p[1]))