Beta distribution

EKB This is an implementation of the Beta distribution. A Beta distribution has the form:

   f(x; a, b) = (1/B(a,b)) * x^(a-1) * (1-x)^(b-1)

The function B(a,b) is the (complete) Beta function, equal to:

   B(a,b) = Gamma(a) * Gamma(b) / Gamma(a + b)

The Beta distribution has a variety of applications in statistics -- see the Wikipedia page [L1 ].

Some dependencies: The cumulative distribution function (CDF) of the Beta distribution is given by the Incomplete Beta Function. Also, this uses the Beta and ln_Gamma functions from tcllib::math. The random-number generator uses the random-number generator for Gamma distributions. Finally, the code uses Simple Test as a testing harness.

Here's a visual comparison of 5,000 random deviates compared to the probability distribution function (PDF). This is generated by the "beta_test.tcl" code at the bottom of this page.

EKB 11 Jan 2007, improvemed pdf and some additional tests. At the same time, improved substantially on the Incomplete Beta Function.

The library (beta.tcl)

    package require math

    namespace import ::math::ln_Gamma
    namespace import ::math::Beta

    source simpletest.tcl
    source gammadist.tcl    ;# Use random variates
    namespace import gamma::random-gamma

    #
    # Implement the beta distribution
    #

    namespace eval beta {
        namespace export pdf-beta cdf-beta random-beta
        
        source incbeta.tcl
        
        # Values calculated using R ver. 2.4.1
        
        st::addproc pdf-beta {
            a   ;# first shape param
            b   ;# second shape param
            x   ;# value where dist is to be evaluated
        } where {
            {{1.3 2.4 0.2} ~ 1.68903180472449}
            {{1 1 0.5} ~ 1.0}
            {{3.7 0.9 0.0} ~ 0.0}
            {{1.8 4.2 1.0} ~ 0.0}
            {{320 400 0.4} ~ 1.18192376783860}
            {{500 1 0.2} ~ 0.0}
            {{1000 1000 0.50} ~ 35.6780222917086}
        }
        
        st::addproc cdf-beta {
            a   ;# first shape param
            b   ;# second shape param
            x   ;# value up to which dist is to be integrated
        } where {
            {{2.1 3.0 0.2} ~ 0.16220409275804}
            {{4.2 17.3 0.5} ~ 0.998630771123192}
            {{500 375 0.7} ~ 1.0}
            {{250 760 0.2} ~ 0.000125234318666948}
            {{43.2 19.7 0.6} ~ 0.0728881294218269}
            {{500 640 0.3} ~ 2.99872547567313e-23}
            {{400 640 0.3} ~ 3.07056696205524e-09}
            {{0.1 30 0.1} ~ 0.998641008671625}
            {{0.01 0.03 0.9} ~ 0.765865005703006}
            {{2 3 0.9999} ~ 0.999999999996}
            {{249.9999 759.99999 0.2} ~ 0.000125237075575121}
            {{1000 1000 0.4} ~ 8.23161135486914e-20}
            {{1000 1000 0.499} ~ 0.464369443974288}
            {{1000 1000 0.5} ~ 0.5}
            {{1000 1000 0.7} ~ 1.0}
            {{2 3 0.6} ~ 0.8208}
        }
        
        st::addproc random-beta {
            a   ;# first shape param
            b   ;# second shape param
            number  ;# Number of random deviates to return
        } where {}  ;# No tests: assess visually by comparing density with histogram
    }

    proc beta::pdf-beta {a b x} {
        if {$x < 0.0 || $x > 1.0} {
            error "Value out of range in Beta density: x = $x, not in \[0, 1\]"
        }
        if {$a <= 0.0} {
            error "Value out of range in Beta density: a = $a, must be > 0"
        }
        if {$b <= 0.0} {
            error "Value out of range in Beta density: b = $b, must be > 0"
        }
        set aplusb [expr {$a + $b}]
        set term1 [expr {[ln_Gamma $aplusb]- [ln_Gamma $a] - [ln_Gamma $b]}]
        set term2 [expr {($a - 1.0) * log($x) + ($b - 1.0) * log(1.0 - $x)}]
        
        expr {exp($term1 + $term2)}
    }

    proc beta::cdf-beta {a b x} {
        incompleteBeta $a $b $x
    }

    # Use trick from J.S. Dagpunar, "Simulation and
    #   Monte Carlo: With Applications in Finance
    #   and MCMC", Section 4.5
    proc beta::random-beta {a b number} {
        set retval {}
        foreach w [random-gamma $a 1.0 $number] y [random-gamma $b 1.0 $number] {
            lappend retval [expr {$w / ($w + $y)}]
        }
        return $retval
    }

**The Test Suite (beta_test.tcl)**
    source beta.tcl

    st::tolerance 1.0e-9
    console show
    namespace eval beta {
        st::print
    }

    foreach t {{::beta::cdf-beta 2 3 0.9999}
                {::beta::cdf-beta 250 760 0.47}
                {::beta::cdf-beta 249.9999 759.99999 0.47}
                {::beta::cdf-beta 2.1 3.2 0.7}
                {::beta::cdf-beta 4.3 9.2 0.3}
                {::beta::cdf-beta 2.5 1.9 0.1}
                {::beta::cdf-beta 5 7 0.3}} {
        puts " Average time/1000 iterations for $t:"
        puts " [time $t 1000]"
    }

    ##########################################################################################
    ##
    ## TESTING
    ##
    ## For random numbers, generate histograms:
    ##
    ##########################################################################################
    canvas .c
    pack .c -side top
    frame .f
    pack .f -side bottom
    label .f.alphal -text "a"
    entry .f.alphae -textvariable a
    pack .f.alphal -side left
    pack .f.alphae -side left
    label .f.betal -text "b"
    entry .f.betae -textvariable b
    pack .f.betal -side left
    pack .f.betae -side left
    button .f.run -text "Run" -command runtest
    pack .f.run -side left

    proc runtest {} {
        set vals [beta::random-beta $::a $::b 5000]
        set remainder 5000
        for {set x 0.0} {$x < 1.0} {set x [expr {$x + 0.025}]} {
            lappend bins $x
            set distval [beta::pdf-beta $::a $::b $x]
            # Total trials (5000) * step (0.025) * value
            set distval [expr {int(5000 * 0.025 * $distval)}]
            lappend distcounts $distval
        }
        # Assume none are left
        lappend distcounts 0.0
        set bincounts [::math::statistics::histogram $bins $vals]
        .c delete hist
        .c delete dist
        math::statistics::plot-scale .c 0 1 0 [math::statistics::max $bincounts]
        math::statistics::plot-histogram .c $bincounts $bins hist
        math::statistics::plot-histogram .c $distcounts $bins dist
        .c itemconfigure dist -fill {} -outline green
    }

    set a 3
    set b 5
    runtest

Category Statistics

Category Mathematics