Arjen Markus (25 june 2018) A straightforward implementation of logistic regression using Tcllib's Nelder-Mead optimisation proc.
# stat_logit.tcl -- # Logistic regression functions - part of the statistics package # # Note: # The implementation was derived from the Wikipedia page on logistic regression, # as is the test case. # # TODO: # - Deviance to evaluate the goodness of fit # package require math::optimize namespace eval ::math::statistics { variable xLogit {} variable yLogit {} } # logistic-model -- # Fit 1/0 data to a logistic model # # Arguments: # xdata Independent variables (list of lists if there are more than one) # ydata Corresponding scores (0 or 1) # # Result: # Estimate of the parameters for a logistic model # # Note: # It is expected that the independent variables have roughly the same scale # proc ::math::statistics::logistic-model {xdata ydata} { variable xLogit variable yLogit set xLogit {} foreach coords $xdata { lappend xLogit [concat 1.0 $coords] } set yLogit $ydata # # Use a trivial starting point # set startx [lrepeat [llength [lindex $xLogit 0]] 0.0] set result [::math::optimize::nelderMead LogisticML_NM $startx] return [dict get $result x] } # LogisticML_NM -- # Calculate the (log) maximum likelihood for the given logistic model # using Nelder-Mead # # Arguments: # args Vector of the current regression coefficients # # Returns: # Log maximum likelihood # proc ::math::statistics::LogisticML_NM {args} { variable xLogit variable yLogit set loglike 0.0 foreach coords $xLogit score $yLogit { set sum 0.0 foreach c $coords v $args { set sum [expr {$sum + $v * $c}] } set exp [expr {exp(-$sum)}] if { $score == 1 } { set loglike [expr {$loglike - log(1.0 + $exp)}] } else { set loglike [expr {$loglike - $sum - log(1.0 + $exp)}] } } return [expr {-$loglike}] } # logistic-probability -- # Calculate the probability of a positive score (1) given the model # # Arguments: # coeffs Coefficients of the logistic model (for instance outcome of model fit) # values Values of the independent variables # # Returns: # Probability # proc ::math::statistics::logistic-probability {coeffs values} { set sum 0.0 foreach c $coeffs v [concat 1.0 $values] { set sum [expr {$sum + $c * $v}] } return [expr {1.0 / (1.0 + exp(-$sum))}] } # test case: from Wikipedia set xdata {0.50 0.75 1.00 1.25 1.50 1.75 1.75 2.00 2.25 2.50 2.75 3.00 3.25 3.50 4.00 4.25 4.50 4.75 5.00 5.50} set ydata {0 0 0 0 0 0 1 0 1 0 1 0 1 0 1 1 1 1 1 1 } set coeffs [::math::statistics::logistic-model $xdata $ydata] puts "Model fit: $coeffs" puts "Probabilities:" foreach x {1 2 3 4 5} { puts "$x - [::math::statistics::logistic-probability $coeffs $x]" }