Resource Usage Tracker

DKF, 2005-Feb-17

Here is a Tcl script I wrote to track resource usage. It is a wrapper around a program that observes how much time that program took to execute, and then it writes that information out somewhere relevant, together with other collected data (e.g. on the number of processors assigned to the program, who was running it, etc.) The output format is as an XML document, according to the spec found originally at http://www.gridforum.org/2003/ur-wg/urwg-schema.09.02.xsd though I think this document has moved subsequently. (Ah, the hazards of tracking a moving target!) In general Tcl terms, the interesting bit is probably the code to generate the XML, which is really quite neat and guarantees that everything is quoted correctly. The use of TclX features might also be interesting.

License is BSD. Have fun with this code!


package require Tcl  8.4
package require Tclx 8.4
 
# Generate a "unique" number that is also an XML id; the following
# code produces a value that is highly unlikely to be duplicated by
# accident, and so is good enough :^)
set id [string map {- {}} [id host][id userid][pid][clock seconds][clock clicks]]
 
###--- Everything below here should be OK as it is ---###
# ----------------------------------------------------------------------
 
set DATE_FMT "%a %b %e %H:%M:%S %Y"     ;#Tue Mar  2 11:56:34 2004
set UR_DATE_FMT "%Y-%m-%dT%H:%M:%SZ"    ;#2004-03-02T11:56:34Z
 
proc xmlEncode {string} {
    #put quote in quotes to keep the wiki syntax highlighter happy
    string map {< &lt; > &gt; & &amp; "\"" &quot; ' &apos;} $string
}
proc element {name -> content args} {
    upvar 1 chan chan inset inset
    if {[set ->] ne "->"} {
       error "missing ->"
    }
    if {![info exist inset]} {set inset ""}
    set start $name
    foreach {key val} $args {
       append start " $key=\"" [xmlEncode $val] "\""
    }
    puts $chan "$inset<$start>[xmlEncode $content]</$name>"
}
proc structElement {name {opts {}} {body {}}} {
    upvar 1 chan chan inset inset
    if {[string length $opts] && ![string length $body]} {
       set body $opts
       set opts {}
    }
    set start $name
    foreach {key val} $opts {
       append start " $key=\"" [xmlEncode $val] "\""
    }
    if {![info exist inset]} {set inset ""}
    puts $chan "$inset<$start>"
    set oldInset $inset
    append inset "    "
    uplevel 1 $body
    set inset $oldInset
    puts $chan "$inset</$name>"
}
 
# ----------------------------------------------------------------------
# The next two procedures are retained mostly for historical interest...
 
proc writeStartRecord {chan} {
    global env id argv
 
    set args [lassign $argv program]
 
    structElement start_record "id $id" {
       element  program    -> $program
       element  arguments  -> $args
       element  jobdir     -> [file normalize [pwd]]
       element  userid     -> [id user]
       element  host       -> [id host]
       if {[info exist env(UNICORE_IDENTITY)]} {
          element  unicore_identity -> $env(UNICORE_IDENTITY)
       }
    }
    flush $chan
}
 
proc writeFullRecord {chan exitCode} {
    global DATE_FMT id env argv execPid startInfo endInfo caeflag
 
    set args [lassign $argv program]
 
    set start [clock format $startInfo(date) -format $DATE_FMT]
    set end [clock format $endInfo(date) -format $DATE_FMT]
    set user [expr {([lindex $endInfo(times) 2] - [lindex $startInfo(times) 2]) * 0.001}]
    set sys [expr {([lindex $endInfo(times) 3] - [lindex $startInfo(times) 3]) * 0.001}]
 
    structElement license_record "id $id" {
       element  program    -> [auto_execok $program]
       element  caeflag    -> $caeflag
       element  arguments  -> $args
       element  jobdir     -> [file normalize [pwd]]
       if {[info exist env(UNICORE_IDENTITY)]} {
          element  unicore_identity -> $env(UNICORE_IDENTITY)
       }
       element  userid     -> [id user]
       element  gecos      -> $env(NAME)
       element  host       -> [id host]
       element  pid        -> $execPid
       element  startdate  -> $start
       element  enddate    -> $end
       element  walltime   -> [expr {$endInfo(date)-$startInfo(date)}]s
       element  usertime   -> [format %.2fs $user]
       element  systemtime -> [format %.2fs $sys]
    }
    flush $chan
}
 
# ----------------------------------------------------------------------
 
proc ur:clock {time} {
    global UR_DATE_FMT
    clock format $time -format $UR_DATE_FMT -gmt true
}
proc writeUsageRecord {chan exitCode} {
    global id env argv execPid startInfo endInfo caeflag
 
    set jobID [lindex [split [file tail [pwd]] _] 1]
    set args [lassign $argv program]
    set wall [expr {$endInfo(date) - $startInfo(date)}]
    set user [format %.3f [expr {
       ([lindex $endInfo(times) 2] - [lindex $startInfo(times) 2]) * 0.001
    }]]
    set sys [format %.3f [expr {
       ([lindex $endInfo(times) 3] - [lindex $startInfo(times) 3]) * 0.001
    }]]
 
    # Need to look up the right stuff!
    structElement UsageRecord {
       xmlns      http://www.gridforum.org/2003/ur-wg
       xmlns:urwg http://www.gridforum.org/2003/ur-wg
       xmlns:xsi  http://www.w3.org/2001/XMLSchema-instance
       xmlns:ds   http://www.w3.org/2000/09/xmldsig#
       xmlns:ucrl http://www.unicore.org/ajo/resources/log
       xmlns:egb  http://www.eurogrid.org/billing
       xsi:schemaLocation \
               http://www.gridforum.org/2003/ur-wg/urwg-schema.09.02.xsd
    } {
       element  RecordIdentity -> "" \
                urwg:createDate [ur:clock [clock seconds]] \
                urwg:recordId "urn:eurogrid:billing:$id"
       structElement JobIdentity {
          element  GlobalJobId -> unicore_job://[info hostname]/$jobID
          element  LocalJobId  -> $id
          element  ProcessId   -> $execPid
       }
       structElement UserIdentity {
          element  LocalUserId -> [id user]
       }
       element  WallDuration  -> PT${wall}S
       element  CpuDuration   -> PT${user}S       usageType "user"
       element  CpuDuration   -> PT${sys}S        usageType "system"
       element  EndTime       -> [ur:clock $endInfo(date)]
       element  StartTime     -> [ur:clock $startInfo(date)]
       element  Status        -> $exitCode        urwg:description "exit code"
       element  Host          -> [info hostname]  primary "true"
       if {[info exist env(UNICORE_JOB)] && $env(UNICORE_JOB)} {
          element NodeCount  -> $env(UC_NODES)
          element Processors -> $env(UC_PROCESSORS)
       }
       element  Resource      -> [auto_execok $program] \
                urwg:description "ucrl:executableName"
       element  Resource      -> [file normalize [pwd]] \
                urwg:description "ucrl:workingDirectory"
       element  Resource      -> $args     urwg:description "ucrl:arguments"
       # This field is really not useful to anyone!
       #element Resource      -> $caeflag  urwg:description "egb:caeflag"
    }
    flush $chan
}
 
# ----------------------------------------------------------------------
 
proc main {} {
    global serverHost serverPort startInfo execPid endInfo argv caeflag
 
    # Record that we are about to start executing...
    set startInfo(date) [clock seconds]
    set startInfo(times) [times]
 
    set executable [lindex [auto_execok [lindex $argv 0]] 0]
    if {![file executable $executable]} {
       puts stderr "cannot find executable: [lindex $argv 0]"
       exit 2
    }
 
    # Start the execution of the monitored program
    set myArgv0 [lindex $argv 0]
    set myArgs [lrange $argv 1 end]
    set program [auto_execok $myArgv0]
    if {[set execPid [fork]] == 0} {
       # Run the program
       execl -argv0 $myArgv0 $program $myArgs
       puts stderr "failed to execute $myArgv0: $msg"
       exit 2
    }
 
    # Wait for the monitored program to finish
    set code [lindex [wait $execPid] 2]
 
    # Record when we finished
    set endInfo(date) [clock seconds]
    set endInfo(times) [times]
 
    # Write the usage record to stderr
    writeUsageRecord stderr $code

    # Pass out the error code
    exit $code
}
 
catch { main } msg
puts stderr "usageTracker error: $msg"
puts stderr $errorInfo
exit 2