Version 17 of Poisson distribution

Updated 2007-12-28 03:04:42 by EKB

This is a discrete distribution of a random variable. An example may tell us what it can be used for:

Suppose this Wiki is visited by 12 people daily (as an average - DKF: very far on the low side, btw). What is the probability, that the Wiki will have 20 visitors a day? The answer is calculated using the Poisson distribution:


  12²⁰×e⁻¹²
p = ——————
  20!

The answer is: p = 0.0097 which is 0.97%. Another example is: Simulating a server system.

Here is an implementation that draws integer numbers being Poisson distributed. It is based on an ecology modelling book I am currently reading and this thread on c.l.t [L1 ] helped finding a good Tcl implementation. That's a discussion on how to draw random numbers from a given probability distribution (not just the onw shown here).

 proc RandomPoisson {lambda count} {
        #
        # generate random numbers that are poisson distributed
        #
        # lambda -> expected value = "mean value" (which happens to also be the variance)
        # count -> number of numbers to be generated
        #
        # (adapted from: "Parameter Estimation in Ecology" by O. Richter & D. Söndgerath)

        # factorial f (here: the factorial of 0):
        set f 1
        # poisson probability of the value 0:
        set su [expr {exp(-$lambda)}]
        set p(0) $su
        # probabilities of the integers up to the math limits of Tcl:
        # (computed as the discrete density function)
        set i 0
        while {1} {
                incr i
                set f [expr {$f*$i}]
                set su [expr {$su + exp(-$lambda) * pow($lambda,$i)/double($f)}]
                if {$su > 1} {
                        # we cannot calculate more precisely here,
                        # so we assume 1 is ok for the final density:
                        set p($i) 1
                        break
                }
                set p($i) $su
        }
        # calculate random values according to the 
        # given discrete probability density function:
        # (this does work for all dicrete distributions,
        # not only for poisson)
        for {set c 0} {$c < $count} {incr c} {
                # random number in the interval [0,1]:
                set x [expr {rand()}]
                # transform this number to the correct target interval:
                for {set j 0} {$j <= $i} {incr j} {
                        if {$p($j) > $x} {
                                lappend result $j
                                break
                        }
                }
        }
        return $result
 }

We run into a problem here when using Tcl's normal expr. The numbers in the formula quickly put expr to its limits and therefor I have added the case where the probability is just set to 1, when this point arrives. You could use mpexpr instead if you need a better approximation of the very improbable part of the Poisson distribution.

Here is a test:

 # produce 10,000 random integer numbers according to the Poisson distribution:
 set data [RandomPoisson 3.5 10000]
 # count their frequencies: 
 foreach el $data {
        if {[info exists count($el)]} {
                incr count($el) 1
        } else {
                set count($el) 1
        }
 }
 # and display their frequencies:
 foreach el [lsort -integer [array names count]] {
        puts "$el => $count($el)"
 }

EKB This implements pdf-poisson (actually the Poisson has a probability mass function, pmf, but this matches the function names in tcllib), cdf-poisson, and random-poisson, which generates poisson-distributed random numbers. This version works efficiently for both small and large values of mu. The implementation of cdf-poisson does not seem particularly efficient, but it runs reasonably quickly even for large values.

    package require math

    namespace eval poisson {}

    proc poisson::random-poisson {mu number} {
        if {$mu < 20} {
            return [randp_invert $mu $number]
        } else {
            return [randp_PTRS $mu $number]
        }
    }


    # Generate a poisson-distributed random deviate
    # Use algorithm in section 4.9 of Dagpunar, J.S,
    #  "Simulation and Monte Carlo: With Applications
    #   in Finance and MCMC", pub. 2007 by Wiley
    # This inverts the cdf using a "chop-down" search
    # to avoid storing an extra intermediate value.
    # It is only good for small mu.
    proc poisson::randp_invert {mu number} {
        set W0 [expr {exp(-$mu)}]

        set retval {}

        for {set i 0} {$i < $number} {incr i} {
            set W $W0
            set R [expr {rand()}]
            set X 0

            while {$R > $W} {
                set R [expr {$R - $W}]
                incr X
                set W [expr {$W * $mu/double($X)}]
            }

            lappend retval $X
        }

        return $retval
    }

    # Generate a poisson-distributed random deviate
    # Use the transformed rejection method with
    #   squeeze of Hoermann: Wolfgang
    #   Hoermann, "The Transformed Rejection Method
    #   for Generating Poisson Random Variables,"
    #   Preprint #2, Dept of Applied Statistics and
    #   Data Processing, Wirtshcaftsuniversitaet Wien,
    #   http://statistik.wu-wien.ac.at/
    # This method works for mu >= 10.

    # First, a helper proc
    proc poisson::logfac {k} {
        incr k
        return [::math::ln_Gamma $k]
    }

    proc poisson::randp_PTRS {mu number} {
        set smu [expr {sqrt($mu)}]
        set b [expr {0.931 + 2.53 * $smu}]
        set a [expr {-0.059 + 0.02483 * $b}]
        set vr [expr {0.9277 - 3.6224/($b - 2.0)}]
        set invalpha [expr {1.1239 + 1.1328/($b - 3.4)}]
        set lnmu [expr {log($mu)}]

        set retval {}
        for {set i 0} {$i < $number} {incr i} {
            while 1 {
                set U [expr {rand() - 0.5}]
                set V [expr {rand()}]

                set us [expr {0.5 - abs($U)}]
                set k [expr {int(floor((2.0 * $a/$us + $b) * $U + $mu + 0.43))}]

                if {$us >= 0.07 && $V <= $vr} {
                    break
                }

                if {$k < 0} {
                    continue
                }

                if {$us < 0.013 && $V > $us} {
                    continue
                }

                if {log($V * $invalpha / ($a/($us * $us) + $b)) <= -$mu + $k * $lnmu - [logfac $k]} {
                    break
                }
            }

            lappend retval $k
        }
        return $retval
    }

    proc poisson::pdf-poisson {mu k} {
        set intk [expr {int($k)}]
        expr {exp(-$mu + floor($k) * log($mu) - [logfac $intk])}
    }

    proc poisson::cdf-poisson {mu x} {
        # To avoid dividing large values, build up:
        set W [expr {exp(-$mu)}]
        set retval $W

        for {set z 1} {$z <= int($x)} {incr z} {
            set W [expr {$W * $mu/double($z)}]
            set retval [expr {$retval + $W}]
        }

        return $retval
    }

    ##########################################################################################
    ##
    ## TESTING
    ##
    ## Can test pdf & cdf by running in a console. For random numbers, generate histograms:
    ##
    ##########################################################################################

    package require math::statistics

    canvas .c
    pack .c -side top
    frame .f
    pack .f -side bottom
    label .f.mul -text "mu"
    entry .f.mue -textvariable mu
    pack .f.mul -side left
    pack .f.mue -side left
    button .f.run -text "Run" -command runtest
    pack .f.run -side left

    proc runtest {} {
        set numbins [expr {3 * $::mu}]
        set vals [poisson::random-poisson $::mu 5000]
        set remainder 5000
        for {set x 0.0} {$x < $numbins} {set x [expr {$x + 1}]} {
            lappend bins $x
            set distval [poisson::pdf-poisson $::mu $x]
            set distval [expr {int(5000 * $distval)}]
            lappend distcounts $distval
        }
        # Assume none are left
        lappend distcounts 0.0
        set bincounts [::math::statistics::histogram $bins $vals]
        .c delete hist
        .c delete dist
        math::statistics::plot-scale .c 0 $numbins 0 [math::statistics::max $bincounts]
        math::statistics::plot-histogram .c $bincounts $bins hist
        math::statistics::plot-histogram .c $distcounts $bins dist
        .c itemconfigure dist -fill {} -outline green
    }

    set mu 25
    runtest

See also: Statistical Distributions