Version 14 of Algebraic Types

Updated 2011-05-21 13:50:08 by aspect

149 lines with little testing code and comments. That's why I like Tcl - complex language flow control construct in such a snippet.

Actually, it was my second attempt. First was a lot uglier and bigger.

 # Algebraic types.
 # Creating constructors and deconstruction by pattern matching.

 namespace eval atypes {
        # List of defined constructors.
        # Actually, it is an map from constructor names to their typenames,
        # and used to keep track of constructor name uniqueness.
        array set defcons {}
        # Create algebraic type.
        # Argument 'typename' is useless, but FP languages need it.
        # 'args' argument is a list of constructor
        # descriptions. They are in the form
        # ConstructorName ?param? ?param ...?.
        # ConstructorName should be unique.
        #
        # At the end we create a bunch of commands.
        proc atype {typename args} {
                variable defcons
                # Defining constructors for type.
                foreach c $args {
                        # Split constructor definition into name and args
                        set cvars [lrange $c 1 end]
                        set constrname [lindex $c 0]
                        if {![string is upper [string range $constrname 0 0]]} {
                                error "Incorrect constructor name '$constrname' for type $typename"
                        }
                        if {[info exists defcons($constrname)]} {
                                error "Constructor $constrname (type $typename) was already defined for type $defcons($constrname)"
                        }
                        # We ought to create constructor header and
                        # construction part. Constructor header is a list
                        # of variables.
                        set construction [list $constrname]
                        foreach cv $cvars {
                                if {[llength $cv]!=1} {
                                        error "Improper argument name $cv for $constrname (type $typename)"
                                }
                                lappend construction "\[set $cv\]"
                        }
                        set construction [join $construction " "]
                        proc ::$constrname $cvars "return \[list $construction\]"
                }
        }
        # Match a constructed value against pattern(s).
        # Pattern can be:
        # $<varname> - if element starts from $, then [string range 1 end]
        #              is considered variable name to bind.
        # _ - for dummy variable (expression is matched but dropped from
        #     assignment)
        # {ConstrName ?pattern pattern ...?} - match of constructed expression
        proc match {expr patterns} {
                foreach {p pcode} $patterns {
                        # Try to match
                        foreach {ok bindlist} [matchbind $expr $p] break
                        if {$ok} {
                                foreach {var val} $bindlist {
                                        uplevel 1 [list set $var $val]
                                }
                                set rcode [catch {uplevel 1 $pcode} result]
                                return -code $rcode $result
                        }
                }
                error "$expr does not match patterns $patterns"
        }
        # Auxillary function.
        # Actually tries to match expression and pattern.
        # Returns [list 1 bindlist] for success and [list 0 {}] for failure.
        proc matchbind {expr pattern} {
                # pattern might be a '_':
                if {[string equal _ $pattern]} {
                        return {1 {}}
                }
                # pattern might be a $var:
                if {[string equal \$ [string range $pattern 0 0]]} {
                        return [list 1 [list [string range $pattern 1 end] $expr]]
                }
                # Complex structural match case.
                # First, 'shape' of pattern binding and
                # expression should match:
                if {[llength $expr]!=[llength $pattern]} {
                        return {0 {}}
                }
                # Split pattern and expression into respective constructors
                # and arguments.
                set subps  [lrange $pattern 1 end]
                set pattconstr [lindex $pattern 0]
                set subexprs [lrange $expr 1 end]
                set exprconstr [lindex $expr 0]
                # Second, head of expression and pattern should be equal.
                if {![string equal $pattconstr $exprconstr]} {
                        return {0 {}}
                }
                # Okay, then we should match every expression with
                # every pattern, going recursively, if needed.
                set binds {}        ;# bind list
                foreach p $subps e $subexprs {
                        # Match subexpression with subpattern
                        foreach {ok bindadd} [matchbind $e $p] break
                        if {!$ok} {
                                # If not matched - fail.
                                return {0 {}}
                        }
                        # Otherwise - grow bind pars list.
                        set binds [concat $binds $bindadd]
                }
                # Return success
                return [list 1 $binds]
        }
        # Export such a useful command:
        namespace export atype match
 }
 catch {rename match {}}
 catch {rename atype {}}
 namespace import atypes::*

 # tests:
 if 1 {
        # Maybe type (single element list)
        atype Maybe {Nothing} {Just a}
        # List as it should be:
        atype List {Nil} {List head tail}
        # Verifying construction:
        puts [Nothing]
        puts [Just "hello, world!"]
        puts [List ? [List ! Nil]]
        # Verifying matching.
        proc testmatch {e} {
                match $e {
                        {Just $x} {puts "Just x branch: $x"}
                        {Nothing} {puts "Nothing branch"}
                        {List $head {List $head2 $tail2}} {
                                puts "Complex list branch."
                                puts "head '$head', head2 '$head2', tail2 '$tail2'"
                        }
                        {List $head $tail} {
                                puts "List branch: head '$head' tail '$tail'"
                        }
                        {generate error} {
                                puts "We will generate an error"
                                error "Error was generated"
                        }
                        _ {
                                puts "Unknown expression $e"
                        }
                }
        }
        testmatch [Just "what?"]
        testmatch [Nothing]
        testmatch [List ? [List ! Nil]]
        testmatch [List ? Nil]
        testmatch ???
        testmatch "generate error"
 }

You may see that we testing a match with arbitrary string in

 testmatch "generate error"

This is perfectly Ok and I mean it from the start. "Everything is a string", isn't it?

I eliminated use of lassign so it doesn't need Tclx anymore. So I (and you) can use it on my (or your) Linux notebook without any upgrades.

Take a look at Simple BDD for demonstration of capabilities.

I've used algebraic types for expression manipulation. An example could be found here: Expression Tree Package.

NEM notes that this code appears to be written by SZ. I've also had a go at implementing algebraic pattern matching while playing with Monadic TOOT, and you can see the code on that page. Haskell is full of interesting ideas...


NEM 2009-05-15: Here is a version of algebraic data-types that can handle matching multiple values simultaneously:

# datatype.tcl --
#
#       Algebraic datatypes and pattern matching in Tcl.
#

package require Tcl         8.5
package provide datatype    0.1

namespace eval ::datatype {
    namespace export define match matches
    namespace ensemble create

    # Datatype definitions
    proc define {type = args} { 
        set ns [uplevel 1 { namespace current }]
        foreach cons [split [join $args] |] {
            set name [lindex $cons 0]
            set args [lrange $cons 1 end]
            proc $ns\::$name $args [format {
                lreplace [info level 0] 0 0 %s
            } [list $name]]
        }
        return $type
    }

    # Pattern matching
    # matches pattern value envVar --
    #   Returns 1 if value matches pattern, else 0
    #   Binds match variables in envVar
    proc matches {pattern value envVar} {
        upvar 1 $envVar env
        if {[var? $pattern]} { return [bind env $pattern $value] }
        if {[llength $pattern] != [llength $value]} { return 0 }
        if {[lindex $pattern 0] ne [lindex $value 0]} { return 0 }
        foreach pat [lrange $pattern 1 end] val [lrange $value 1 end] {
            if {![matches $pat $val env]} { return 0 }
        }
        return 1
    }
    # A variable starts with lower-case letter or _. _ is a wildcard.
    proc var? term { string match {[a-z_]*} $term }
    proc bind {envVar var value} {
        upvar 1 $envVar env
        if {![info exists env]} { set env [dict create] }
        if {$var eq "_"} { return 1 }
        dict set env $var $value
        return 1
    }
    proc match args {
        #puts "MATCH: $args"
        set values [lrange $args 0 end-1]
        set choices [lindex $args end]
        append choices \n [list return -code error -level 2 "no match for $values"]
        set f [list values $choices [namespace current]]
        lassign [apply $f $values] env body
        #puts "RESULT: $env -> $body"
        dict for {k v} $env { upvar 1 $k var; set var $v }
        catch { uplevel 1 $body } msg opts
        dict incr opts -level
        return -options $opts $msg
    }
    proc case args {
        upvar 1 values values
        set patterns [lrange $args 0 end-2]
        set body [lindex $args end]
        set env [dict create]
        if {[llength $patterns] != [llength $values]} { return }
        foreach pattern $patterns value $values {
            if {![matches $pattern $value env]} { return }
        }
        return -code return [list $env $body]
    }
    proc default body { return -code return [list {} $body] }
}

As an example of use, here is an implementation of insertion into a Red-Black tree, as described at [L1 ] (based on the Haskell code there):

datatype define Color = R | B
datatype define Tree  = E | T color left val right

# balance :: Color -> Tree a -> a -> Tree a -> Tree a
proc balance {color left val right} {
    datatype match $color $left $val $right {
        case B [T R [T R a x b] y c] z d -> { T R [T B $a $x $b] $y [T B $c $z $d] }
        case B [T R a x [T R b y c]] z d -> { T R [T B $a $x $b] $y [T B $c $z $d] }
        case B a x [T R [T R b y c] z d] -> { T R [T B $a $x $b] $y [T B $c $z $d] }
        case B a x [T R b y [T R c z d]] -> { T R [T B $a $x $b] $y [T B $c $z $d] }
        case col a x b                   -> { T $col $a $x $b }
    }
}
# insert :: Ord a => a -> Tree a -> Tree a
proc insert {x s} {
    datatype match [ins $x $s] {
        case [T _ a y b]  -> { T B $a $y $b }
    }
}
# ins :: Ord a => a -> Tree a -> Tree a
proc ins {x s} {
    datatype match $s {
        case E               -> { T R E $x E }
        case [T col a y b]   -> {
            if {$x < $y} { return [balance $col [ins $x $a] $y $b] }
            if {$x > $y} { return [balance $col $a $y [ins $x $b]] }
            return $s
        }
    }
}
# Test on random numbers:
set tree [E]
set i 0
while {[incr i] < 20} {
    set n [expr {int(rand()*100)}]
    set tree [insert $n $tree]
}
puts $tree

TODO:

  • Assumes each element is a well-formed list
  • Assumes string equality comparison
  • No way to match a literal string beginning with a lower-case letter or underscore

I believe these can all be solved by treating each type itself as an ensemble and having per-type equal and match "methods" (with built-ins for strings and numbers).