examples/simple_term_rewriting.py

# Copyright (c) 2008, David Baird <dhbaird@gmail.com>
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 
# 1. Redistributions of source code must retain the above copyright notice,
#    this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
# 3. Neither the name of the <ORGANIZATION> nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

"""
From http://mathworld.wolfram.com/TermRewritingSystem.html
(accessed 2008 Aug 7):

    Term rewriting systems are reduction systems in which rewrite rules
    apply to terms.  Terms are built up from variables and constants
    using function symbols (or operators).  Rules of term rewriting
    systems have the form x -> y, where both x and y are terms, x is
    not a variable, and every variable from y occurs in x as well.

The example presented here is split up into 4 parts:

1. The ``Expr`` class, for building trees of expressions using native
   Python syntax

2. The ``expansion_rules`` function which rewrites a redex according to
   one of several patterns that can be matched.

3. The ``simple_reduction_algorithm`` function which simply
   iterates through an expression tree and keeps applying a rewrite
   function (``expansion_rules``) until no more changes occur to the
   tree.

4. Some pretty print functions, ``to_prefix_s`` and ``to_infix_s`` for
   doing pretty printing of S-expressions.

Some limitations of this system are:

- The rules (``expansion_rules``) are not compiled into a fast pattern
  matching automata.

- The system is not capable of detecting confluence.

Caveat: I don't really know anything reduction/term rewriting systems.
This example was thrown together after about 1 day's worth of me
investigating the subject.
"""

from splarnektity import FMatch

uniqctr = 0
def uniqid(prefix='_'):
    """This function will just hand out a new unique identifier everytime
it is called."""
    global uniqctr
    x = '%s%05d' % (prefix, uniqctr)
    uniqctr += 1
    return x

# The following two functions are helper functions for pretty-printing
# of sexps in either prefix or infix notation (note that prefix is the
# normal way to write an sexp):

def to_prefix_s(expr):
    f = FMatch(expr); F1, TA, A, B = f.var(4)
    if f.when((TA, A)) and TA.v in ('var', 'const'): # if a terminal
        return '%s' % (A.v,)
    if f.when((F1, A.others)):
        return '(%s)' % ' '.join([F1.v] + ['%s' % to_prefix_s(x) for x in A.v])
    print 'ERROR:', expr
    assert(False)

def to_infix_s(expr):
    f = FMatch(expr); F1, TA, A, B = f.var(4)
    if f.when((F1, A, B)) and F1.v in ('+', '-', '*', '/'):
        # rewrite operators (+, -, *, /) to be infix:
        return '(%s %s %s)' % (to_infix_s(A.v), F1.v, to_infix_s(B.v))
    if f.when((TA, A)) and TA.v in ('var', 'const'): # if a terminal
        return '%s' % (A.v,)
    if f.when((F1, A.others)):
        # rewrite other functions (non-operators) to be prefix:
        return '(%s)' % ' '.join([F1.v] + ['%s' % to_infix_s(x) for x in A.v])
    print 'ERROR:', expr
    assert(False)

class Expr(object):
    """Expr objects have exactly one property: a sexp representing the
actual expression.  The purpose of the Expr class is to overload
Python's operators in order to enable direct manipulation of
expressions with native Python syntax.  Otherwise, everything
happily remains in sexp format."""

    def __init__(self, sexp_expression):
        self.__sexp__ = sexp_expression

    @property
    def sexp(self): return self.__sexp__

    # This is the whole reason for having an Expr class (versus using
    # vanilla sexps):
    def __add__(self, other):  return Expr.function('+', 2, self, other)
    def __radd__(self, other): return Expr.function('+', 2, other, self)
    def __sub__(self, other):  return Expr.function('-', 2, self, other)
    def __rsub__(self, other): return Expr.function('-', 2, other, self)
    def __mul__(self, other):  return Expr.function('*', 2, self, other)
    def __rmul__(self, other): return Expr.function('*', 2, other, self)
    def __div__(self, other):  return Expr.function('/', 2, self, other)
    def __rdiv__(self, other): return Expr.function('/', 2, other, self)
    def __neg__(self):         return Expr.function('-', 1, self)
    # ...and lots more: __eq__, __pow__, etc..

    # The following 3 methods construct the building blocks of an AST that
    # can be manipulated by a term rewriting system:

    @staticmethod
    def function(name, arity, *args):
        """Creates a function s-expression and wraps it in an Expr object."""
        assert(len(args) == arity)
        return Expr.to_expr((name,) + tuple(map(Expr.to_sexp, args)))

    @staticmethod
    def variable():
        """Creates a variable s-expression and wraps it in an Expr object."""
        return Expr.to_expr(('var', uniqid('_var')))

    @staticmethod
    def constant(value):
        """Creates a constant s-expression and wraps it in an Expr object."""
        return Expr.to_expr(('const', value))

    # The following methods are for convienience (they know how to turn
    # sexps and int/float/longs into Expr objects and almost vice versa):

    @staticmethod
    def to_sexp(x):
        if   type(x) in [list, tuple]: return x
        else:                          return Expr.to_expr(x).sexp

    @staticmethod
    def to_expr(x):
        """Coerce x into Expr format."""
        if   type(x) is Expr:               return x
        elif type(x) in (list, tuple):      return Expr(x)
        elif type(x) in (int, float, long): return Expr.constant(x)
        else:                               return Expr(to_sexp(x))

def expansion_rules(redex):
    redex = Expr.to_sexp(redex)
    f = FMatch(redex); F1, F2, A, B, C = f.var(5)

    # 1. Distributive rule:       A * (B + C) --> A * B + A * C
    #                             (A + B) * C --> A * C + B * C
    # 2. Left-associative rule:   A + (B + C) --> (A + B) + C
    #                             A * (B * C) --> (A * B) * C
    # 3. If two coefficients are adjacent, then combine them:
    #                       A * B --> evaluate(A * B) if  iscoefficient A
    #                                                 and iscoefficient B
    # 4. Coefficients go first:         A * B --> B * A if iscoefficient B
    #    (warning: executed naively (as is done here), this could result
    #    in a slow bubble sort)
    #
    # ...there are some other rules that could can be thrown in, such as
    # converting minus to plus, but those rules will be ignored here for
    # the sake of simplicity...

    # 1. Distributive rule:       A * (B + C) --> A * B + A * C
    #                             (A + B) * C --> A * C + B * C
    if f.when(('*', A, ('+', B, C))):
        print 'Rule #1a: distributive rule'
        return ('+', ('*', A.v, B.v), ('*', A.v, C.v))
    if f.when(('*', ('+', A, B), C)):
        print 'Rule #1b: distributive rule'
        return ('+', ('*', A.v, C.v), ('*', B.v, C.v))

    # 2. Left-associative rule:   A + (B + C) --> (A + B) + C
    #                             A * (B * C) --> (A * B) * C
    if f.when((F1, A, (F2, B, C))) and F1.v == F2.v and F1.v in ('+', '*'):
        print 'Rule #2: left-associative rule'
        return (F1.v, (F1.v, A.v, B.v), C.v)

    # 3. If two coefficients are adjacent, then combine them:
    #                       A * B --> evaluate(A * B) if  iscoefficient A
    #                                                 and iscoefficient B
    if f.when(('*', ('const', A), ('const', B))):
        print 'Rule #3: combine coefficients'
        return ('const', A.v * B.v)

    # 4. Coefficients go first:         A * B --> B * A if iscoefficient B
    #    (warning: executed naively (as is done here), this could result
    #    in a slow bubble sort)
    if f.when(('*', A, ('const', B))):
        print 'Rule #4: coefficients go first'
        return ('*', ('const', B.v), A.v)

    # Otherwise... no change
    return redex

def simple_reduction_algorithm(rewrite, expr):
    """This code probably has horrible performance, but it
is highly simplified and thus should be great for educational
value to someone who is just learning the basics of term
rewriting systems.  Here are the steps of the algorithm:

1. Execute a tree search algorithm to identify redexs in expr
2. Apply the rewrite function to the redex
3. Keep repeating until the rewrite function results in zero
   changes to the tree
"""
    expr = Expr.to_sexp(expr)
    expr1 = None
    # expr := the expr before a rewrite
    # expr1 := the expr after a rewrite
    while expr != expr1: # keep rewriting until no more changes occur
        if expr1 is not None:
            print 'Rewrote:', to_prefix_s(expr)
            print '     -->', to_prefix_s(expr1)
            print
            expr = expr1
        #
        f = FMatch(expr); TA, A, FName = f.var(3)
        if   f.when((TA, A)) and TA.v in ('const', 'var'): # if a terminal
            # NOTE: A term rewriting system is technically not supposed
            #       to rewrite terminals (variables)...
            expr1 = expr # versus rewrite(expr)
        elif f.when((FName, A.others)): # if not a terminal
            subtrees = [simple_reduction_algorithm(rewrite, x)
                        for x in A.v]
            expr1 = rewrite((FName.v,) + tuple(subtrees))

    return expr

A = Expr.variable()
expr = 3 * ((A + 2) * 4)
print 'before:', to_prefix_s(Expr.to_sexp(expr))
print
expr1 = simple_reduction_algorithm(expansion_rules, expr)
print 'after:', to_prefix_s(Expr.to_sexp(expr1))

# >>>
# before: (* 3 (* (+ _var00000 2) 4))
# 
# Rule #1b: distributive rule
# Rewrote: (* (+ _var00000 2) 4)
#      --> (+ (* _var00000 4) (* 2 4))
# 
# Rule #4: coefficients go first
# Rewrote: (* _var00000 4)
#      --> (* 4 _var00000)
# 
# Rule #3: combine coefficients
# Rewrote: (* 2 4)
#      --> 8
# 
# Rewrote: (+ (* _var00000 4) (* 2 4))
#      --> (+ (* 4 _var00000) 8)
# 
# Rule #1a: distributive rule
# Rewrote: (* 3 (* (+ _var00000 2) 4))
#      --> (+ (* 3 (* 4 _var00000)) (* 3 8))
# 
# Rule #2: left-associative rule
# Rewrote: (* 3 (* 4 _var00000))
#      --> (* (* 3 4) _var00000)
# 
# Rule #3: combine coefficients
# Rewrote: (* 3 4)
#      --> 12
# 
# Rewrote: (* (* 3 4) _var00000)
#      --> (* 12 _var00000)
# 
# Rule #3: combine coefficients
# Rewrote: (* 3 8)
#      --> 24
# 
# Rewrote: (+ (* 3 (* 4 _var00000)) (* 3 8))
#      --> (+ (* 12 _var00000) 24)
# 
# after: (+ (* 12 _var00000) 24)
Generated by GNU enscript 1.6.4.