## Lognormal Income Distributions

Incomes within countries typically follow a skewed distribution with a long high tail. It has been found that on average national income distributions fit a lognormal distribution pretty well. Since I sometimes develop scenarios of income distribution and poverty as part of my work, I've put together a library for calculating standard poverty measures for income distributions.

EKB This library is now being used for the Greenhouse Development Rights (GDRs) Calculator (http://gdrs.sourceforge.net/ ). The GDRs calculator is a supporting tool for the GDRs framework for allocating the burden for greenhouse gas mitigation and adaptation.

EKB I've submitted this to the tcllib math library. While doing that I've made some modifications, which I'm posting below. One change is to the license, which is now BSD to make it consistent with tcllib. Also, I've added some examples (above the code) and a test suite (below the code).

The input variables are:

• yave: mean per capita income
• yc: an income cutoff
• p: the proportion of people below the cutoff
• x: cumulative fraction of the population (for the Lorenz Curve)
• gini: The Gini coefficient

Some background:

• Lognormal distributions on Wikipedia [L1 ]
• Lorenz curves on Wikipedia [L2 ]
• Gini coefficients on Wikipedia [L3 ]
• A World Bank working paper justifying the use of lognormals for income distributions [L4 ]

Some examples:

# Calculate the poverty headcount and poverty gap for a
#  country with a mean income of 15000, a poverty line of
#  3000, and a Gini coefficient of 0.45
puts [math::lognorm::gap 3000 15000 0.45]

# Calculate the share of income held by the lowest-earning
#  50% in a country with a Gini coefficient of 0.35
puts [math::lognorm::L 0.50 0.35]

# Calculate the ratio of the highest-earning p percent to
#  the lowest-earning p percent in a country with the
#  given Gini coefficient
proc highlow {p Gini} {
set low [math::lognorm::L \$p \$Gini]
set high [expr {1.0 - [math::lognorm::L [expr {(1.0 - \$p)}] \$Gini]}]
return [expr {\$high/\$low}]
}
# Apply "highlow"
puts [highlow 0.20 0.35]
puts [highlow 0.20 0.55]

##
## Lognormal income distributions
##
# Released under the BSD license under any terms
# that allow it to be compatible with tcllib

package provide math::lognorm 0.1

package require math::statistics

namespace eval math::lognorm {
# Maximum iterations for finding inverse normal
variable maxiter 100
variable epsilon 1.0e-9

namespace export norminv gini2sdlog sdlog2gini \

}

proc math::lognorm::quicknorminv {p} {
variable epsilon

if {\$p < \$epsilon || \$p > 1-\$epsilon} {
error "Value out of bounds for inverse normal: \$p"
return 0
}

set sign -1
if {\$p > 0.5} {
set p [expr {1 - \$p}]
set sign 1
}

set t [expr {sqrt(-2.0 * log(\$p))}]

set a0 2.30753
set a1 0.27061
set b1 0.99229
set b2 0.04481

set num [expr {\$a0 + \$a1 * \$t}]
set denom [expr {1 + \$b1 * \$t + \$b2 * \$t * \$t}]

return [expr {\$sign * (\$t - \$num/\$denom)}]

}

proc math::lognorm::norminv {p} {
variable maxiter
variable epsilon

set deltax [expr {100 * \$epsilon}]

# Initial value for x
set x [quicknorminv \$p]

set niter 0
while {abs([::math::statistics::pnorm \$x] - \$p) > \$epsilon} {
set pstar [::math::statistics::pnorm \$x]
set pl [::math::statistics::pnorm [expr {\$x - \$deltax}]]
set ph [::math::statistics::pnorm [expr {\$x + \$deltax}]]

set x [expr {\$x + 2.0 * \$deltax * (\$p - \$pstar)/(\$ph - \$pl)}]

incr niter
if {\$niter == \$maxiter} {
error "Inverse normal distribution did not converge after \$niter iterations"
return {}
}
}

return \$x

}

# Gini must be 0..1
proc math::lognorm::gini2sdlog {gini} {
set p [expr {0.5 * (1.0 + \$gini)}]
return [expr {sqrt(2.0) * [norminv \$p]}]
}

proc math::lognorm::sdlog2gini {sdlog} {
set x [expr {\$sdlog/sqrt(2.0)}]
return [expr {2.0 * [::math::statistics::pnorm \$x] - 1.0}]
}

# Ratio of median to mean
proc math::lognorm::medmean_ratio {gini} {
set sdlog [gini2sdlog \$gini]
return [expr {exp(-0.5 * \$sdlog * \$sdlog)}]
}

# Lorenz curve
proc math::lognorm::L {x gini} {
variable epsilon

if {\$x < \$epsilon || \$x > 1.0 - \$epsilon} {
return \$x
}
set p [norminv \$x]
set sdlog [gini2sdlog \$gini]
return [::math::statistics::pnorm [expr {\$p - \$sdlog}]]
}

proc math::lognorm::headcount {yc yave gini} {
variable epsilon

set sdlog [gini2sdlog \$gini]
set z [expr {1.0 * \$yc / \$yave}]
if {\$z < \$epsilon} {
return 0.0
}
set x [expr {(1.0/\$sdlog) * log(\$z) + 0.5 * \$sdlog}]
return [::math::statistics::pnorm \$x]
}

# Analogous to poverty gap
proc math::lognorm::gap {yc yave gini} {
variable epsilon

if {\$yc < \$epsilon * \$yave} {
return 0.0
}
set hc [headcount \$yc \$yave \$gini]
set Lhc [L \$hc \$gini]
return [expr {\$hc - (1.0 * \$yave/\$yc) * \$Lhc}]
}

proc math::lognorm::cutoff {p yave gini} {
variable epsilon

if {\$p < \$epsilon} {
return 0.0
}
set sdlog [gini2sdlog \$gini]
set x [norminv \$p]
return [expr {\$yave * exp(\$sdlog * (\$x - 0.5 * \$sdlog))}]
}

Test Suite

package require math::lognorm 0.1

namespace import math::lognorm::*

puts "--- math::lognorm::norminv ---"
puts {[norminv 0.5] should give 0.0}
puts [norminv 0.5]
puts {[norminv 0.7] should give 0.5244}
puts [norminv 0.7]
puts {[norminv 0.05] should give -1.6448}
puts [norminv 0.05]

puts "\n"
puts "--- math::lognorm::gini2sdlog ---"
puts {[gini2sdlog 0.45] should give 0.84536}
puts [gini2sdlog 0.45]
puts {[gini2sdlog 0.23] should give 0.413481}
puts [gini2sdlog 0.23]
puts {[gini2sdlog 0.90] should give 2.326174}
puts [gini2sdlog 0.90]
puts {[gini2sdlog 0.05] should give 0.088681}
puts [gini2sdlog 0.05]

puts "\n"
puts "--- math::lognorm::sdlog2gini ---"
puts {[sdlog2gini [gini2sdlog 0.52]] should give 0.52}
puts [sdlog2gini [gini2sdlog 0.52]]
puts {[sdlog2gini [gini2sdlog 0.01]] should give 0.01}
puts [sdlog2gini [gini2sdlog 0.01]]
puts {[sdlog2gini [gini2sdlog 0.92]] should give 0.92}
puts [sdlog2gini [gini2sdlog 0.92]]
puts {[sdlog2gini [gini2sdlog 0.27]] should give 0.27}
puts [sdlog2gini [gini2sdlog 0.27]]

puts "\n"
puts "--- math::lognorm::medmean_ratio ---"
puts {[medmean_ratio 0.45] should give 0.699551}
puts [medmean_ratio 0.45]
puts {[medmean_ratio 0.70] should give 0.341573}
puts [medmean_ratio 0.70]
puts {[medmean_ratio 0.23] should give 0.918069}
puts [medmean_ratio 0.23]

puts "\n"
puts "--- math::lognorm::L ---"
puts {[L 0.20 0.55] should give 0.02807}
puts [L 0.20 0.55]
puts {[L 0.90 0.70] should give 0.426934}
puts [L 0.90 0.70]

puts "\n"
puts {[headcount 7000 23000 0.35] should give 0.062651}
puts {[headcount 365 5000 0.55] should give 0.027698}
puts {[headcount 10000 15000 0.55] should give 0.561441}

puts "\n"
puts "--- math::lognorm::gap ---"
puts {[gap 7000 23000 0.35] should give 0.013925}
puts [gap 7000 23000 0.35]
puts {[gap 365 5000 0.55] should give 0.008215}
puts [gap 365 5000 0.55]
puts {[gap 10000 15000 0.55] should give 0.290783}
puts [gap 10000 15000 0.55]

puts "\n"
puts "--- math::lognorm::cutoff ---"
puts {[cutoff 0.20 5000 0.55] should give 1149.892}
puts [cutoff 0.20 5000 0.55]
puts {[cutoff 0.05 20000 0.70] should give 613.0022}
puts [cutoff 0.05 20000 0.70]
puts {[cutoff 0.80 15000 0.25] should give 19801.82}
puts [cutoff 0.80 15000 0.25]

Category Statistics