Version 1 of Logistic regression

Updated 2018-06-25 07:13:41 by arjen

Arjen Markus (25 june 2018) A straightforward implementation of logistic regression using Tcllib's Nelder-Mead optimisation proc.

# stat_logit.tcl --
#     Logistic regression functions - part of the statistics package
#
#     Note:
#     The implementation was derived from the Wikipedia page on logistic regression,
#     as is the test case.
#
#     TODO:
#     - Deviance to evaluate the goodness of fit
#

package require math::optimize

namespace eval ::math::statistics {
     variable xLogit {}
     variable yLogit {}
}

# logistic-model --
#     Fit 1/0 data to a logistic model
#
# Arguments:
#     xdata        Independent variables (list of lists if there are more than one)
#     ydata        Corresponding scores (0 or 1)
#
# Result:
#     Estimate of the parameters for a logistic model
#
# Note:
#     It is expected that the independent variables have roughly the same scale
#
proc ::math::statistics::logistic-model {xdata ydata} {
    variable xLogit
    variable yLogit

    set xLogit {}
    foreach coords $xdata {
        lappend xLogit [concat 1.0 $coords]
    }
    set yLogit $ydata

    #
    # Use a trivial starting point
    #
    set startx [lrepeat [llength [lindex $xLogit 0]] 0.0]

    set result [::math::optimize::nelderMead LogisticML_NM $startx]

    return [dict get $result x]
}


# LogisticML_NM --
#     Calculate the (log) maximum likelihood for the given logistic model
#     using Nelder-Mead
#
# Arguments:
#     args        Vector of the current regression coefficients
#
# Returns:
#     Log maximum likelihood
#
proc ::math::statistics::LogisticML_NM {args} {
    variable xLogit
    variable yLogit

    set loglike 0.0
    foreach coords $xLogit score $yLogit {
        set sum 0.0

        foreach c $coords v $args {
            set sum [expr {$sum + $v * $c}]
        }
        set exp [expr {exp(-$sum)}]

        if { $score == 1 } {
            set loglike [expr {$loglike - log(1.0 + $exp)}]
        } else {
            set loglike [expr {$loglike - $sum - log(1.0 + $exp)}]
        }
    }
    return [expr {-$loglike}]
}

# logistic-probability --
#     Calculate the probability of a positive score (1) given the model
#
# Arguments:
#     coeffs      Coefficients of the logistic model (for instance outcome of model fit)
#     values      Values of the independent variables
#
# Returns:
#     Probability
#
proc ::math::statistics::logistic-probability {coeffs values} {
    set sum 0.0

    foreach c $coeffs v [concat 1.0 $values] {
        set sum [expr {$sum + $c * $v}]
    }

    return [expr {1.0 / (1.0 + exp(-$sum))}]
}

# test case: from Wikipedia

set xdata {0.50 0.75 1.00 1.25 1.50 1.75 1.75 2.00 2.25 2.50 2.75 3.00 3.25 3.50 4.00 4.25 4.50 4.75 5.00 5.50}
set ydata {0    0    0    0    0    0    1    0    1    0    1    0    1    0    1    1    1    1    1    1   }

set coeffs [::math::statistics::logistic-model $xdata $ydata]

puts "Model fit: $coeffs"

puts "Probabilities:"

foreach x {1 2 3 4 5} {
    puts "$x - [::math::statistics::logistic-probability $coeffs $x]"
}