[CMcC] 20040925 I've written a compiler from unix magic(5) files to pure tcl. The idea is you generate a script which should perform precisely the same function as the unix file command (it gives you information about a file) This code isn't complete, but handles most common cases. You'll find that a lot of the magic files out there are buggy, too. ----- magiccompile.tcl: # file to parse the magic file from magic(5) into a tcl program package require fileutil source magiclib.tcl namespace eval magic {} # parse an individual line proc magic::parseline {line {maxlevel 10000}} { set unlevel [string trimleft $line >] set level [expr [string length $line] - [string length $unlevel]] if {$level > $maxlevel} { return -code continue "Skip - too high a level" } set parse [regexp -expanded -inline {^(\S+)\s+(\S+)\s*((\S|(\B\s))*)\s*(.*)$} $unlevel] if {$parse == {}} { error "Can't parse: '$unlevel'" } set value "" set command "" foreach {junk offset type value junk1 junk2 command} $parse break if {[string index $value end] eq "\\"} { append value " " } if {[string index $command end] eq "\\"} { append command " " } #set value [subst -nocommands -novariables $value] #set command [subst -nocommands -novariables $command] if {$value == ""} { error "no value" } #puts "level:$level offset:$offset type:$type value:'$value' command:'$command'" return [list $level $offset $type $value $command] } # process a magic file proc magic::process {file {maxlevel 10000}} { variable level ;# level of line variable linenum ;# line number set level 0 set script {} set linenum 0 ::fileutil::foreachLine line $file { incr linenum set line [string trim $line " "] if {[string index $line 0] eq "#"} { continue ;# skip comments } elseif {$line == ""} { continue } else { # parse line if {[catch {parseline $line $maxlevel} parsed]} { continue } # got a valid line foreach {level offset type value message} $parsed break set compare [string index $value 0] switch -glob -- $value { [<>]=* { set compare [string range $value 0 1] set value [string range $value 2 end] } <* - >* - &* - ^* { set value [string range $value 1 end] } =* { set compare "==" set value [string range $value 1 end] } !* { set compare "!=" set value [string range $value 1 end] } x { set value "" } default { set compare "==" if {[string match {\\[=]*} $value]} { set value [string range $value 1 end] } } } set qual "" switch -glob -- $type { pstring* - string* { set type [split $type /] set qual [lindex $type 1] set type [lindex $type 0] if {$type eq "pstring"} { append qual "p" set type "string" } set value [string map [list "\#" "\\\#"] $value] if {($value eq "\\0") && ($compare eq ">")} { set value "" set compare x } elseif {$compare eq "!="} { set value !$value set compare "==" } if {$type ne "string"} { puts stderr "Reject String: ${file}:$linenum $type - $line" continue } } regex { # I am *not* going to handle regex puts stderr "Reject Regex: ${file}:$linenum $type - $line" continue } *byte* - *short* - *long* - *date* { # break up the type set type [split $type &] set qual [lindex $type 1] if {$qual ne ""} { set qual &$qual } else { set type [split $type -] set qual [lindex $type 1] if {$qual ne ""} { set qual -$qual } } set type [lindex $type 0] if {$compare ne "x"} { set value [string trimright $value L] ;# redundant Long if {[catch {set value [expr $value]} x eo]} { puts stderr "Reject Value Error: ${file}:$linenum '$value' '$line' - $eo" continue; } set value [format "0x%x" $value] } } default { puts stderr "Reject Unknown Type: ${file}:$linenum $type - $line" continue } } } # collect some summaries variable types set types($type) $type variable quals set quals($qual) $qual #puts "$linenum level:$level offset:$offset type:$type qual:$qual compare:$compare value:'$value' message:'$message'" set message [string map [list "\#" "\\\#"] $message] set record [list $type $qual $compare $offset $value $message] if {$script == {}} { set level 0 } if {$level == 0} { lappend script $record } else { # find the growing edge set depth [lrepeat [expr $level] end] while {[catch { set insertion [lindex $script {expand}$depth] }]} { set depth [lreplace $depth end end] } #puts stderr "PRE: $depth - $script - $insertion" lappend insertion $record lset script {expand}$depth $insertion #puts stderr "POST: $script" } } #puts "Script: $script" return $script } # generate code for the alternative tests proc magic::run_else {type qual comp offset val message args} { set result "" if {[string match "(*)" $offset]} { set offset "\[magic::offset $offset\]" } switch -glob -- $type { *byte* - *short* - *long* - *date* { set type [list Numeric $type] } *string { set type String } } append result "if \{\[magic::$type $offset $comp [list $val] $qual\]\} \{" \n append result "\tmagic::emit [list $message]" \n if {[llength $args]} { append result [run_c {expand}$args] \n } append result "\} else" return $result } # generate code for the next level tests proc magic::run_c {args} { set result "" foreach el $args { append result [run_else {expand}$el] } if {[llength $args] != 0} { append result " \{\}" \n } return $result } # generate code for each of a series of tests proc magic::run_el {type qual comp offset val message args} { set result "" if {[string match "(*)" $offset]} { set offset "\[magic::offset $offset\]" } switch -glob -- $type { *byte* - *short* - *long* - *date* { set type [list numeric $type] } } append result "if \{\[magic::$type $offset $comp [list $val] $qual\]\} \{" \n append result "\tmagic::emit [list $message]" \n append result [run_c {expand}$args] append result "return \[magic::result\]" \n append result "\}" \n return $result } # generate code for a magic file's tests proc magic::run {script} { set result "" foreach el $script { append result [run_el {expand}$el] } return $result } # compile up magic files or directories of magic files proc magic::compile {args} { set tcl "" foreach arg $args { if {[file type $arg] == "directory"} { foreach file [glob [file join $arg *]] { set script1 [process $file] append tcl "magic::file_start $file" \n append tcl [run $script1] \n } } else { set script1 [process $arg] append tcl "magic::file_start $arg" \n append tcl [run $script1] \n } } return $tcl } proc magic::install {args} { foreach arg $args { proc magic::/[file tail $arg] {} [magic::compile $arg] } } proc magic::generate {args} { foreach arg $args { set out [::open [file tail $arg].tcl w] puts $out "proc magic::/[file tail $arg] {} \{" puts $out [magic::compile $arg] puts $out "return {}" puts $out \} close $out } } #puts [magic::compile {expand}$argv] magic::generate {expand}$argv ----- magiclib.tcl namespace eval magic {} set magic::debug 0 # open the file to be scanned proc magic::open {file} { variable fd set fd [::open $file] fconfigure $fd -translation binary # clear the fetch cache variable cache unset cache } # mark the start of a magic file proc magic::file_start {name} { variable debug if {$debug} { puts stderr "File: $name" } } # return the emitted result proc magic::result {} { variable result return $result } # emit a message proc magic::emit {msg} { variable string variable numeric set msg [::string map [list \\b "" %s $string %d $numeric] $msg] variable result append result $msg return 0 } # handle complex offsets - TODO proc magic::offset {where} { variable debug #if {$debug} { puts stderr "OFFSET: $where" #} return 0 } # proc magic::fetch {where what} { variable cache if {![info exists cache($where,$what)]} { variable fd seek $fd $where set cache($where,$what) [read $fd $what] } return $cache($where,$what) } # maps magic typenames to field characteristics array set magic::typemap { byte {1 -1 c} ubyte {1 0xff c} short {2 -1 S} ushort {2 0xffff S} beshort {2 -1 S} leshort {2 -1 s} ubeshort {2 0xffff S} uleshort {2 0xffff s} long {4 -1 I} belong {4 -1 I} lelong {4 -1 i} ubelong {4 0xffffffff I} ulelong {4 0xffffffff i} } proc magic::Numeric {type offset comp val {qual ""}} { variable typemap variable numeric foreach {size mask scan} $typemap($type) break set numeric [fetch $offset $size] if {[::string bytelength $numeric] == 0} { return 0 } if {$comp == "x"} { # anything matches - don't care binary scan $numeric $scan numeric return 1 } set val [binary format $scan $val] binary scan $numeric $scan numeric binary scan $val $scan val if {$qual != ""} { set numeric [expr $numeric $qual] } set c [expr $val $comp $numeric] variable debug if {$debug} { puts stderr "numeric $type: $val $comp $numeric / $qual - $c" } return $c } proc magic::String {offset comp val {qual ""}} { variable fd variable string set val [subst -nocommands -novariables $val] seek $fd $offset if {$comp eq "x"} { # anything matches - don't care set string "" while {([::string length $string] < 100) && ![::string is space [set c [read $fd 1]]]} { append string $c } return 1 } set string [read $fd [::string length $val]] set cmp [::string compare $val $string] set c [expr $cmp $comp 0] variable debug if {$debug} { puts "String '$val' $comp '$string' - $c" if {$c} { puts "offset $offset - $string" } } return $c }