Chi-squared distribution

The chi-squared distribution is widely used in statistics. It is a special case of Gamma distributions and is also related to the Gaussian distribution, in that the sum of squares of identically and independently distributed (iid) gaussian variables follow a chi-squared distribution.

For an overview, see the Wikipedia article [L1 ] and Abramowitz and Stegun [L2 ].

EKB This is an implementation of the chi-squared distribution, providing cdf-chisquare, pdf-chisquare, and random-chisquare. It uses code for Gamma distributions and the Simple Test code. (I used Simple Test to implement this using test-driven development.) To test the random-number generation, there is test code at the end to compare a histogram of the random variates against the theoretical pdf-chisq.

    # Implements the (central) chi-squared distribution
    # as a special case of the gamma distribution

    source simpletest.tcl
    source gammadist.tcl

    namespace eval chisq {
        
        # Values were produced from R ver. 2.4.1
        
        st::addproc cdf-chisquare {
            df  ;# Degrees of freedom
            x   ;# chisquared value; find prob that < this
        } where {
            {{2 3.5} ~ 0.826226056549555}
            {{5 2.2} ~ 0.179164030785504}
            {{5 100} ~ 1.0}
            {{3.9 4.2} ~ 0.634682741547709}
            {{1 2.0} ~ 0.842700792949715}
            {{3 -2.0} -> 0.0}
        }
        
        st::addproc pdf-chisquare {
            df  ;# Degrees of freedom
            x   ;# chisquared value; eval prob dens at this value
        } where {
            {{3 1.75} ~ 0.219999360547348}
            {{10 2.9} ~ 0.0216024880121444}
            {{4 17.45} ~ 0.000708787557977144}
            {{2.5 1.8} ~ 0.218446210041615}
            {{1 0.0} -> Inf}
            {{5 -1.5} -> 0.0}
        }
    }

    proc chisq::cdf-chisquare {df x} {
        set alpha [expr {0.5 * $df}]
        set beta 0.5
        
        if {$x < 0.0} {return 0.0}
        
        cdf-gamma $alpha $beta $x
    }

    proc chisq::pdf-chisquare {df x} {
        set alpha [expr {0.5 * $df}]
        set beta 0.5
        
        if {$x < 0.0} {return 0.0}
        
        pdf-gamma $alpha $beta $x
    }

    # Prodce "number" random deviates distributed according to a chisq dist
    proc chisq::random-chisquare {df number} {
        set alpha [expr {0.5 * $df}]
        set beta 0.5
        
        random-gamma $alpha $beta $number    
    }

    console show
    namespace eval chisq {
        st::print
    }

    ### Test random deviates

    ##########################################################################################
    ##
    ## TESTING
    ##
    ## Can test pdf & cdf by running in a console. For random numbers, generate histograms:
    ##
    ##########################################################################################
    canvas .c
    pack .c -side top
    frame .f
    pack .f -side bottom
    label .f.dfl -text "Degrees of freedom: "
    entry .f.dfe -textvariable df
    pack .f.dfl -side left
    pack .f.dfe -side left
    button .f.run -text "Run" -command runtest
    pack .f.run -side left

    proc runtest {} {
        set vals [chisq::random-chisquare $::df 5000]
        set remainder 5000
        for {set x 0.0} {$x < 20.0} {set x [expr {$x + 0.25}]} {
            lappend bins $x
            set distval [chisq::pdf-chisquare $::df $x]
            # Total trials (5000) * step (0.25) * value
            set distval [expr {int(5000 * 0.25 * $distval)}]
            lappend distcounts $distval
        }
        # Assume none are left
        lappend distcounts 0.0
        set bincounts [::math::statistics::histogram $bins $vals]
        .c delete hist
        .c delete dist
        math::statistics::plot-scale .c 0 20 0 [math::statistics::max $bincounts]
        math::statistics::plot-histogram .c $bincounts $bins hist
        math::statistics::plot-histogram .c $distcounts $bins dist
        .c itemconfigure dist -fill {} -outline green
    }

    set df 3
    runtest

EKB For statistical inference, the inverse chi-squared distribution is used more often than the distribution itself. So here's an implementation of the inverse chi-squared distribution, to be added to library above:

    namespace eval chisq {
        variable maxiter 100
        
        # Values generated using R ver. 2.4.1
        st::addproc invchisq {
            df  ;# Degrees of freedom
            p   ;# probability
        } where {
            {{2 0.5} ~ 1.38629436111989}
            {{3 0.0} ~ 0.0}
            {{15 0.7} ~ 17.3216944984992}
            {{3 0.1} ~ 0.584374374155183}
            {{14 0.01} ~ 4.66042506265777}
            {{70 0.10} ~ 55.3289395719096}
            {{2 1.0} -> Inf}
            {{40 1.0} -> Inf}
        }
    }

    proc chisq::quicknorminv {p} {
        if {$p < 0.0 || $p > 1.0} {
            error "Value out of bounds for inverse normal: $p"
            return 0
        }

        set sign -1
        if {$p > 0.5} {
            set p [expr {1 - $p}]
            set sign 1
        }

        set t [expr {sqrt(-2.0 * log($p))}]

        set a0 2.30753
        set a1 0.27061
        set b1 0.99229
        set b2 0.04481

        set num [expr {$a0 + $a1 * $t}]
        set denom [expr {1 + $b1 * $t + $b2 * $t * $t}]

        return [expr {$sign * ($t - $num/$denom)}]

    }
     
    proc chisq::invchisq {df p {tol 1.0e-10}} {
        variable maxiter
        
        if {$p < 0.0 || $p > 1.0} {
            error "Value out of bounds for inverse chi-square: $p"
            return 0.0
        }
        
        if {$p > 1.0 - $tol} {
            return [expr Inf]
        }
        
        set deltax [expr {100 * $tol}]

        # Initial value for x -- use mean = dof, unless large #dof
        set x $df
        if {$df > 30} {
            # Use approximate value from Abramowitz & Stegun for df > 30
            set p_norm [expr {1.0 - $p}]
            set xp [quicknorminv $p_norm]
            set y [expr {2.0/(9.0 * double($df))}]
            set x [expr {$df * (1.0 - $y + $xp * sqrt($y))}]
        }
        
        set niter 0
        while {1} {
            set pstar [cdf-chisquare $df $x]
            set dpdx [pdf-chisquare $df $x]
            if {abs($dpdx) < $tol} {
                set x [expr {$x + $deltax}]
                set dpdx [pdf-chisquare $df $x]
            }
            
            # Sometimes the slope can be quite large -- damp down size of steps if necessary
            set newx [expr {$x + ($p - $pstar)/$dpdx}]
            set newx [expr {$newx > $x + 0.5 * $df ? $x + 0.5 * $df : $newx}]
            set newx [expr {$newx < $x - 0.5 * $df ? $x - 0.5 * $df : $newx}]
            
            if {$newx < 0.0} {
                set newx 0.0
            }
            
            if {abs($newx - $x) < $tol} {
                set x $newx
                break
            } else {
                set x $newx
            }
            
            incr niter
            if {$niter == $maxiter} {
                error "Inverse chi-squared distribution did not converge after $niter iterations"
                return {}
            }
        }

        return $x

    }