Version 6 of Regexp Integer Validation

Updated 2010-07-02 20:48:47 by CJB

Problem Statement:

  [CJB]: To validate a string as a variety of integer types (binary, octal, decimal, hexadecimal) excluding 0nnn octals.
  Also uses upvar to return the valid types for the given string, and strips left padded 0's for strings without a type modifier (0x|0b|0o).

Code:

#Figures out if a string could be a variety of integers, and then compares a given type to the list of types.
#Ignores 0nnn octal numbers.
#Will upvar a non-0nnn octal number to 'num_var' (strips extra 0's)
#Will upvar the valid types for this string into 'types_var'
#Empty string or sign only is interpreted as 0, includes type 'none' in list
#Only 0's is interpreted as a single 0, sign and modifers are ignored.
#Returns 1 if the given type is a valid type for this string, else returns 0
proc regex_validate {string {type hexadecimal} {num_var ""} {types_var ""}} {
    if {$types_var ne ""} {
            upvar $types_var types
        }
        set types [list hexadecimal decimal octal binary none]
        regexp -nocase -all -line -- {^([-+])?(0x|0b|0o)?(0*)([01]*)([0-7]*)([0-9]*)([0-9a-f]*)(\w*)$} $string match sgn mod pad0 bin oct dec hex str
        #Eliminate invalid types
        if {![info exists str]} {
            set types [list]
                if {$num_var ne ""} {
                        upvar $num_var num
                        set num ""
                }
                return 0
        } elseif {$str ne ""} {
            set types [list]
        } elseif {$hex ne ""} {
            set types [lrange $types 0 0]
        } elseif {$dec ne ""} {
            set types [lrange $types 0 1]
        } elseif {$oct ne ""} {
            set types [lrange $types 0 2]
        } elseif {$bin ne ""} {
            set types [lrange $types 0 3]
        } elseif {$pad0 ne ""} {
                set types [lrange $types 0 3]
                lassign {"" "" 0} sgn mod bin
        } else {
            lassign {"" "" 0} sgn mod bin
        }
        #Modifier check and elimination
        switch -nocase -- $mod {
            0x  {set types [lsearch -inline $types h*]}
            0o  {set types [lsearch -inline $types o*]}
            0b  {set types [lsearch -inline $types b*]}
        }
        #Return number via upvar if needed.  Removes left padded 0's for numbers without a modifier
        if {$num_var ne ""} {
            upvar $num_var   num
            if {$mod eq ""} {
                set num ${sgn}${bin}${oct}${dec}${hex}${str}
            } else {
                set num $match
            }
        }
        return [expr {[lsearch -nocase $types $type*] >= 0}]
}

Example:
% after 0 {puts $n:$t}; regex_validate "01234" hexadecimal n t
1
1234:hexadecimal decimal octal

Demo:

 package require Tk
 label .number -textvariable ::n -bg lightgray
 label .types  -textvariable ::t -bg gray -width [string length " Hexadecimal Decimal Octal Binary None "]
 ttk::combobox .combo -justify center -textvariable ::selection -state readonly -values [list Hex Decimal Octal Binary]
 entry .entry         -justify center -textvariable ::e -validate all -vcmd [list apply {{w value validation combobox} {
    variable ::n
    variable ::t
    catch {
        if {[regex_validate $value [$combobox get] ::n ::t]} {
            $w configure -bg #c0ffc0
        } else {
            if {$validation eq "key"} {
                $w configure -bg #ffffc0
            } else {
                $w configure -bg #ffc0c0
            }
        }
    }
    return 1
 }} %W %P %V .combo]
 bind .entry <Return> [list .entry validate]
 trace add variable ::selection {write} ".entry validate;#"
 .combo current 0
 grid .combo  .entry  -sticky news
 grid .number -       -sticky news
 grid .types  -       -sticky news

Alternate Solutions:

 #Alternate prototype using [scan] instead of [regexp]
 proc scan_validate {string} {
        scan $string %x%s hex s
        if {![info exists s]} {
                scan $string %d%s dec s
                if {![info exists s]} {
                        scan $string %o%s oct s
                        if {![info exists s]} {
                                #binary check?
                                return hex|dec|oct
                        } else {
                                return hex|dec
                        }
                } else {
                        return hex
                }
        } else {
                return NaN
        }
 }

Discussion:

  [CJB]: I put this here instead of in [Advanced Regular Expression Examples] due to being more than a few lines like the examples found there.

See also: