Beta distribution

EKB This is an implementation of the Beta distribution. A Beta distribution has the form:

   f(x; a, b) = (1/B(a,b)) * x^(a-1) * (1-x)^(b-1)

The function B(a,b) is the (complete) Beta function, equal to:

   B(a,b) = Gamma(a) * Gamma(b) / Gamma(a + b)

The Beta distribution has a variety of applications in statistics -- see the Wikipedia page [L1 ].

Some dependencies: The cumulative distribution function (CDF) of the Beta distribution is given by the Incomplete Beta Function. Also, this uses the Beta and ln_Gamma functions from tcllib::math. The random-number generator uses the random-number generator for Gamma distributions. Finally, the code uses Simple Test as a testing harness.

Here's a visual comparison of 5,000 random deviates compared to the probability distribution function (PDF). This is generated by the "beta_test.tcl" code at the bottom of this page.

http://www.kb-creative.net/images/betadist.gif

EKB 11 Jan 2007, improvemed pdf and some additional tests. At the same time, improved substantially on the Incomplete Beta Function.

The library (beta.tcl)

    package require math

    namespace import ::math::ln_Gamma
    namespace import ::math::Beta

    source simpletest.tcl
    source gammadist.tcl    ;# Use random variates
    namespace import gamma::random-gamma

    #
    # Implement the beta distribution
    #

    namespace eval beta {
        namespace export pdf-beta cdf-beta random-beta
        
        source incbeta.tcl
        
        # Values calculated using R ver. 2.4.1
        
        st::addproc pdf-beta {
            a   ;# first shape param
            b   ;# second shape param
            x   ;# value where dist is to be evaluated
        } where {
            {{1.3 2.4 0.2} ~ 1.68903180472449}
            {{1 1 0.5} ~ 1.0}
            {{3.7 0.9 0.0} ~ 0.0}
            {{1.8 4.2 1.0} ~ 0.0}
            {{320 400 0.4} ~ 1.18192376783860}
            {{500 1 0.2} ~ 0.0}
            {{1000 1000 0.50} ~ 35.6780222917086}
        }
        
        st::addproc cdf-beta {
            a   ;# first shape param
            b   ;# second shape param
            x   ;# value up to which dist is to be integrated
        } where {
            {{2.1 3.0 0.2} ~ 0.16220409275804}
            {{4.2 17.3 0.5} ~ 0.998630771123192}
            {{500 375 0.7} ~ 1.0}
            {{250 760 0.2} ~ 0.000125234318666948}
            {{43.2 19.7 0.6} ~ 0.0728881294218269}
            {{500 640 0.3} ~ 2.99872547567313e-23}
            {{400 640 0.3} ~ 3.07056696205524e-09}
            {{0.1 30 0.1} ~ 0.998641008671625}
            {{0.01 0.03 0.9} ~ 0.765865005703006}
            {{2 3 0.9999} ~ 0.999999999996}
            {{249.9999 759.99999 0.2} ~ 0.000125237075575121}
            {{1000 1000 0.4} ~ 8.23161135486914e-20}
            {{1000 1000 0.499} ~ 0.464369443974288}
            {{1000 1000 0.5} ~ 0.5}
            {{1000 1000 0.7} ~ 1.0}
            {{2 3 0.6} ~ 0.8208}
        }
        
        st::addproc random-beta {
            a   ;# first shape param
            b   ;# second shape param
            number  ;# Number of random deviates to return
        } where {}  ;# No tests: assess visually by comparing density with histogram
    }

    proc beta::pdf-beta {a b x} {
        if {$x < 0.0 || $x > 1.0} {
            error "Value out of range in Beta density: x = $x, not in \[0, 1\]"
        }
        if {$a <= 0.0} {
            error "Value out of range in Beta density: a = $a, must be > 0"
        }
        if {$b <= 0.0} {
            error "Value out of range in Beta density: b = $b, must be > 0"
        }
        set aplusb [expr {$a + $b}]
        set term1 [expr {[ln_Gamma $aplusb]- [ln_Gamma $a] - [ln_Gamma $b]}]
        set term2 [expr {($a - 1.0) * log($x) + ($b - 1.0) * log(1.0 - $x)}]
        
        expr {exp($term1 + $term2)}
    }

    proc beta::cdf-beta {a b x} {
        incompleteBeta $a $b $x
    }

    # Use trick from J.S. Dagpunar, "Simulation and
    #   Monte Carlo: With Applications in Finance
    #   and MCMC", Section 4.5
    proc beta::random-beta {a b number} {
        set retval {}
        foreach w [random-gamma $a 1.0 $number] y [random-gamma $b 1.0 $number] {
            lappend retval [expr {$w / ($w + $y)}]
        }
        return $retval
    }

**The Test Suite (beta_test.tcl)**
    source beta.tcl

    st::tolerance 1.0e-9
    console show
    namespace eval beta {
        st::print
    }

    foreach t {{::beta::cdf-beta 2 3 0.9999}
                {::beta::cdf-beta 250 760 0.47}
                {::beta::cdf-beta 249.9999 759.99999 0.47}
                {::beta::cdf-beta 2.1 3.2 0.7}
                {::beta::cdf-beta 4.3 9.2 0.3}
                {::beta::cdf-beta 2.5 1.9 0.1}
                {::beta::cdf-beta 5 7 0.3}} {
        puts " Average time/1000 iterations for $t:"
        puts " [time $t 1000]"
    }

    ##########################################################################################
    ##
    ## TESTING
    ##
    ## For random numbers, generate histograms:
    ##
    ##########################################################################################
    canvas .c
    pack .c -side top
    frame .f
    pack .f -side bottom
    label .f.alphal -text "a"
    entry .f.alphae -textvariable a
    pack .f.alphal -side left
    pack .f.alphae -side left
    label .f.betal -text "b"
    entry .f.betae -textvariable b
    pack .f.betal -side left
    pack .f.betae -side left
    button .f.run -text "Run" -command runtest
    pack .f.run -side left

    proc runtest {} {
        set vals [beta::random-beta $::a $::b 5000]
        set remainder 5000
        for {set x 0.0} {$x < 1.0} {set x [expr {$x + 0.025}]} {
            lappend bins $x
            set distval [beta::pdf-beta $::a $::b $x]
            # Total trials (5000) * step (0.025) * value
            set distval [expr {int(5000 * 0.025 * $distval)}]
            lappend distcounts $distval
        }
        # Assume none are left
        lappend distcounts 0.0
        set bincounts [::math::statistics::histogram $bins $vals]
        .c delete hist
        .c delete dist
        math::statistics::plot-scale .c 0 1 0 [math::statistics::max $bincounts]
        math::statistics::plot-histogram .c $bincounts $bins hist
        math::statistics::plot-histogram .c $distcounts $bins dist
        .c itemconfigure dist -fill {} -outline green
    }

    set a 3
    set b 5
    runtest