Version 4 of url-encoding

Updated 2005-05-18 20:53:49

proc init {} {

        variable formMap
        variable alphanumeric a-zA-Z0-9
        for {set i 0} {$i <= 256} {incr i} { 
            set c [format %c $i]
            if {![string match \[$alphanumeric\] $c]} {
                set formMap($c) %[format %.2x $i]
            }
        }
        # These are handled specially
        array set formMap { " " + \n %0d%0a }
    }
    init


  proc url-encode {string} {
    variable map
    variable alphanumeric

    # The spec says: "non-alphanumeric characters are replaced by '%HH'"
    # 1 leave alphanumerics characters alone
    # 2 Convert every other character to an array lookup
    # 3 Escape constructs that are "special" to the tcl parser
    # 4 "subst" the result, doing all the array substitutions

    regsub -all \[^$alphanumeric\] $string {$map(&)} string
    # This quotes cases like $map([) or $map($) => $map(\[) ...
    regsub -all {[][{})\\]\)} $string {\\&} string
    return [subst -nocommand $string]
  }



  proc url-decode str {
    # rewrite "+" back to space
    # protect \ from quoting another '\'
    set str [string map [list + { } "\\" "\\\\"] $str]

    # prepare to process all %-escapes
    regsub -all -- {%([A-Fa-f0-9][A-Fa-f0-9])} $str {\\u00\1} str

    # process \u unicode mapped chars
    return [subst -novar -nocommand $str]
  }

This is almost exactly source taken from the implementations of http and ncgi.