mailbreak

EMJ An email is a tree of parts. Feed a file containing a raw email to the script below, it will build a tree, then use it to save all the parts separately as well as giving you the tree structure.

I used this to learn about Tcllib MIME, struct::tree, and sha1.

The tree you get is a file that looks like this:

    leaf 69a2faa59d {params {format flowed charset ISO-8859-1} content text/plain encoding 7bit}
    leaf 3210ba9e34 {params {charset ISO-8859-1} content text/html encoding 7bit}
    wrapper d0a17db3c7 {content multipart/alternative} 69a2faa59d 3210ba9e34
    leaf ce9a16e7d3 {params {name {Globe.pdf}} content application/pdf encoding base64}
    wrapper 12102a8c68 {content multipart/mixed} d0a17db3c7 ce9a16e7d3

This is the tree of parts, with the root at the bottom. Actually the structure of this example is:

       +--leaf 69a2faa59d
       +--leaf 3210ba9e34
    +--wrapper d0a17db3c7
    +--leaf ce9a16e7d3
    wrapper 12102a8c68

The root is always a "wrapper", with the main headers saved to a file. Other "wrappers" also have headers saved to a file (which may be empty). A "leaf" has headers (saved to a file) and a body (decoded and saved to a file). The name of a "leaf" (and its two files) is the 40-character (shortened to 10 above) of the data. The name of a wrapper and its header file (and the tree file for the root wrapper) is the hash of the list of hashes of its immediate children.

This is 7 or 8 years old, and I'm afraid there aren't many comments.


    #!/bin/sh
    # the next line restarts using tclsh \
    exec tclsh "$0" ${1+"$@"}

    package require mime
    package require sha1
    package require struct::tree

    proc buildmimetree {token node} {
        set props [mime::getproperty $token -names]
        if {[lsearch $props parts] >= 0} {
            # this one has parts
            set partlist [mime::getproperty $token parts]
            foreach pt $partlist {
                # create a tree node for the part
                stree insert $node end $pt
                # process the part in its turn
                buildmimetree $pt $pt
            }
        }
    }

    proc mimeleaf {node} {
        set sha1tok [sha1::SHA1Init]
        set df [open $node.data w]
        # must be in binary mode
        fconfigure $df -translation binary
        set hash [mime::getbody $node -command [list bodypart $df $sha1tok]]
        close $df
        if [file exists $hash.data] {
            file delete $hash.data
        }
        file rename $node.data $hash.data
        stree set $node hash $hash
        savemimedata $hash $node
    }

    proc mimewrapper {node} {
        set childlist [list]
        foreach child [stree children $node] {
            lappend childlist [stree get $child hash]
        }
        set hash [sha1::sha1 $childlist]
        stree set $node hash $hash
        savemimedata $hash $node
    }

    proc savemimedata {hash node} {
        # write out headers
        set hf [open $hash.hdr w]
        fconfigure $hf -translation crlf
        writeheaders $hf $node
        close $hf
    }

    proc fixprops {node {type l}} {
        array set props [mime::getproperty $node]
        array unset props parts
        array unset props size
        if {[string length $props(encoding)] == 0} {
            array unset props encoding
        }
        if {[string equal $type w]} {
            array unset props encoding
        }
        array set pars $props(params)
        array unset pars boundary
        if {[llength [array names pars]] > 0} {
            set props(params) [array get pars]
        } else {
            array unset props params
        }
        return [list [array get props]]
    }

    proc writeheaders {channel node} {
        foreach header [::mime::getheader $node -names] {
            if {[string first "From " $header] == 0} {
                continue
            }
            # puts $channel "$header: [join [mime::getheader $node $header]]"
            # Keep repeats separate (good for Received:)
            foreach occ [mime::getheader $node $header] {
                puts $channel "$header: $occ"
            }
        }
    }

    proc bodypart {channel sha1token type {data ""}} {
        switch -exact -- $type {
            data {
                puts -nonewline $channel $data
                sha1::SHA1Update $sha1token $data
            }
            end {
                return [sha1::Hex [sha1::SHA1Final $sha1token]]
            }
        }
    }

    # -------- start of mainline code
    if {[llength $argv] != 1} {
        puts "Usage : mailbreak filename"
        exit 1
    }

    set fname [lindex $argv 0]
    if {![file readable $fname]} {
        puts "Can't open $fname"
        exit 1
    }

    # tokenize our file
    set token [mime::initialize -file $fname]

    # create a tree to use
    struct::tree stree
    # and rename the root after the main token
    stree rename root $token

    buildmimetree $token $token

    # walk the tree bottom-up collecting the pieces
    set toplevel [stree rootname]
    stree walk $toplevel -order post node {
        if [stree isleaf $node] {
            mimeleaf $node
        } else {
            mimewrapper $node
        }
    }

    # write something that could be a script to recreate the whole thing
    set fname [stree get $toplevel hash]
    set tf [open $fname.tree w]
    stree walk $toplevel -order post node {
        if [stree isleaf $node] {
            puts $tf "leaf [stree get $node hash] [fixprops $node l]"
        } else {
            set childlist [list]
            foreach child [stree children $node] {
                lappend childlist [stree get $child hash]
            }
            puts $tf "wrapper [stree get $node hash] [fixprops $node w] $childlist"
        }
    }
    close $tf

    exit 0