Markov chain Monte Carlo

Arjen Markus (30 december 2019) The code below is just a preliminary attempt to implement Markov Chain Monte Carlo methods. It has been tested mildly and there is a lot to be done to make it fully useable. Consider it "work very much in progress". I have a number of other stochastic/probabilistic algorithms as well and I want to create a new project on Chiselapp to hold them. They are all based on the idea that by examining random points in the search space you can get an answer without having to examine the mathematics of the problem and design a specific algorithm.

The code below uses Markov Chain Monte Carlo simulation to select points in the space of one or more stochastic variables to estimate the expectation of a function of these variables. It is fairly straightforward Tcl code, no surprises there, with the possible exception of defining an object-specific method (see Object-oriented programming in Tcl or Ashok's BOOK The Tcl Programming Language).

Anyway, this is a start and I intend to extend it ;).

# mcmc_v2.tcl --
#     Second attempt at a Markov Chain Monte Carlo implementation.
#
#     Note:
#     It is my intention to make it fully flexible, but for the moment
#     a simple implementation, that is, not too many options and only
#     a one-dimensional function will have to suffice.
#
#     Options:
#     -one-by-one
#     -quasi-random
#     -skip (to to avoid strong correlation)
#
#     TODO:
#     - Keep track of rejections (with the purpose of checking the quality)
#     - Method next to just select a new point
#     - Version which provides more information
#     - Error estimates for integration
#     - Documentation
#
namespace eval ::MCMC {
    # Nothing in particular - just create the namespace
}

# mcmc --
#     Class that holds the methods and variables
#
::oo::class create ::MCMC::Metropolis {
    variable currentpoint      ;# Current point
    variable oldprob           ;# Initial value is "empty"
    variable default_samples   ;# Default number of samples
    variable scale             ;# Scale for the next point function
    variable probabilityfunc   ;# Function to estimate the probability - up to normalisation
    variable nextfunc          ;# Function to generate the next point
    variable repeat_rejection  ;# Get again a next point if it was rejected

    constructor {args} {
        variable currentpoint     {}
        variable oldprob          {}
        variable default_samples  1000
        variable scale            1.0
        variable probabilityfunc  {}
        variable nextfunc         {}
        variable repeat_rejection 0

        set burnin          1000    ;# Default number of initial iterations
        set nextfunc        "gauss" ;# Default method for getting the next point
        set initial         {}      ;# No initial point - use 0's if none given
        set dimension       {}      ;# No dimension - determine in a different way

        foreach {key value} $args {
            #puts "$key -- $value"
            switch -- $key {
                "-dimensions" {
                    set dimensions $value
                }
                "-burnin" {
                    set burnin $value
                }
                "-samples" {
                    set samples $value
                }
                "-nextfunc" {
                    set nextfunc $value
                    if { $nextfunc ni {gauss exponential uniform} } {
                        return -code error "Unsupported function for determining the next point: $nextfunc"
                    }
                }
                "-probabilityfunc" {
                    set probabilityfunc $value
                }
                "-scale" {
                    set scale $value
                }
                "-initial-point" {
                    set initial $value
                }
                "-repeat-on-rejection" {
                    set repeat_rejection $value
                }
                default {
                    puts "Unknown option: $key"
                }
            }
        }

        #
        # Check the consistency of the various options - especially those depending on the
        # dimension of the problem
        #
        if { $dimension == {} } {
            set dimension [llength $scale]
        } else {
            if { [llength $scale] == 1 } {
                set scale [lrepeat $dimension $scale]
            } elseif { $dimension != [llength $scale] } {
                return -code error "Inconsistent data on the dimension - given as $dimension, but the scale vector is [llength $scale] long"
            }
        }
        if { [llength $initial] == 0 } {
            set initial [lrepeat $dimension 0.0]
        } elseif { [llength $initial] == 1 } {
            set initial [lrepeat $dimension $initial]
        } elseif { $dimension != [llength $initial] } {
            return -code error "Inconsistent data on the dimension - given as $dimension, but the initial point vector is [llength $initial] long"
        }

        set currentpoint $initial

        #
        # We need to define an object-specific method for the next point function
        #
        # For now: only uniform
        switch -- $nextfunc {
            "uniform" {
                ::oo::objdefine [self] {
                    method GetRandom {scale} {
                        set random {}
                        foreach s $scale {
                            lappend random [expr {2.0 * $s * (rand() - 0.5)}]
                        }
                        return $random
                    }
                }
            }
            "exponential" {
                ::oo::objdefine [self] {
                    method GetRandom {scale} {
                        set random {}
                        foreach s $scale {
                            set r [expr {rand()}]
                            if { $r > 0.5 } {
                                lappend random [expr {-$s * log(2.0 * ($r-0.5))}]
                            } else {
                                lappend random [expr {$s * log(2.0 * $r)}]
                            }
                        }
                        return $random
                    }
                }
            }
            "gauss"       {
                return -code error "Sorry the nextfunc method $nextfunc has not been implemented yet"
            }
        }

        #
        # Burn-in phase
        #
        set oldprob 0.0
        for {set i 0} {$i < $burnin} {incr i} {
            my NextPoint
            #puts "$oldprob -- $currentpoint"
        }
    }

    method NextPoint {} {
        variable currentpoint
        variable oldprob
        variable scale
        variable probabilityfunc
        variable repeat_rejection

        set accept 0
        while {1} {
            #
            # Generation step
            #
            set randomDisplacement [my GetRandom $scale]

            set newpoint {}
            foreach comp $randomDisplacement coord $currentpoint {
                lappend newpoint [expr {$coord + $comp}]
            }

            #
            # Acception/rejection step
            # Note the condition that newprob must be positive.
            # This is due to distributions with finite support.
            #
            set newprob [$probabilityfunc $newpoint]

            if { $newprob >= $oldprob && $newprob > 0.0 } {
                set oldprob $newprob
                set accept 1
                break
            } else {
                set r [expr {rand()}]
                if { $oldprob * $r < $newprob } {
                    set oldprob $newprob
                    set accept 1
                    break
                } else {
                    if { ! $repeat_rejection } {
                        set accept 0
                        break
                    }
                }
            }
        }

        # TODO: keep track of acceptions/rejections
        if { $accept } {
            set currentpoint $newpoint
        }
        return $currentpoint
    }

    method integrate {func args} {
        variable default_samples
        variable oldprob
        variable randomNumbers

        set  randomNumbers {}

        set funcsum 0.0
        set weight  0.0
        set samples $default_samples

        foreach {key value} $args {
            switch -- $key {
                "-samples" {
                    set samples $value
                    if { $samples <= 0 } {
                        return -code error "Incorrect value for number of samples - $valeu - must be positive"
                    }
                }
                default {
                    return -code error "Unknown option to \"integrate\": $key"
                }
            }
        }

        for {set i 0} { $i < $samples} {incr i} {
            set point     [my NextPoint]
            set funcvalue [$func $point]
            set funcsum   [expr {$funcsum + $funcvalue}]
        }

        # TODO: estimate error

        return [expr {$funcsum / $samples}]
    }
}

# test --
#     Quick and dirty test
#
proc gauss {x} {
    return [expr {exp(-($x**2)/2.0)}]
}
proc uni {x} {
    if { abs($x) > 1 } {
        return 0.0
    } else {
        return 1.0
    }
}
proc func {x} {
    return [expr {$x**2}]
}

set mc [::MCMC::Metropolis new -probabilityfunc gauss -burnin 10 -nextfunc uniform -scale 1.0]
puts [$mc integrate func -samples 1000]

set mc2 [::MCMC::Metropolis new -probabilityfunc gauss -burnin 10 -nextfunc uniform -scale 2.0 -repeat-on-rejection 1]
puts [$mc2 integrate func -samples 1000]
puts [$mc2 integrate func -samples 10000]

set mc3 [::MCMC::Metropolis new -probabilityfunc uni -burnin 10 -nextfunc uniform -scale 2.0 -repeat-on-rejection 1]
puts "[$mc3 integrate func -samples 1000] -- 1000"
puts "[$mc3 integrate func -samples 10000] -- 10000"

#
# Alternative implementation: simply random numbers
#
package require math::statistics

set funcsum 0.0
for {set i 0} {$i < 10000} {incr i} {
    set value     [expr {2.0*(rand()-0.5)}]
    set funcvalue [func $value]
    set funcsum   [expr {$funcsum + $funcvalue}]
}

puts [expr {$funcsum/10000.0}]

#
# Now for a two-dimnensional integral
#
proc parabola {coords} {
    lassign $coords x y

    return [expr {$x**2+$y**2}]
}

proc gauss2 {coords} {
    lassign $coords x y
    return [expr {exp(-($x**2+$y**2)/2.0)}]
}

set mc4 [::MCMC::Metropolis new -probabilityfunc gauss2 -burnin 10 -nextfunc uniform -scale {1.0 1.0} -repeat-on-rejection 1]
puts [$mc4 integrate parabola]

set mc5 [::MCMC::Metropolis new -probabilityfunc gauss2 -burnin 10 -nextfunc exponential -scale {1.0 1.0} -repeat-on-rejection 1]
puts [$mc5 integrate parabola]