Ogg tools

EG I have a portable music player which supports Ogg files. While any modern unix/linux workstation comes with the tools to convert and otherwise handle this format, I want to do it with tcl.

So, after a while digging on the somewhat confusing documentation, I've come with this code to read the Ogg/Vorbis metadata and write comments to a new file (as the 'vorbiscomment' utility do). The readMetaData procedure takes a file name and returns a dictionary with the relevant information about the file (I've used the official documentation names as dictionary keys). The writeComments procedure takes a input filename, an output filename and a dict with key-value pairs of tags. They will be applied to the output file stripping any existing comments the input file already have.

For a list of standard key names see [L1 ]


 package require Tcl 8.5

 namespace eval ogg {
        # Relevant documents :
        # http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html
        # http://www.xiph.org/ogg/doc/framing.html
        variable reflected 0
        variable crc_table
 }

 proc ogg::writeComments { oggsource oggdest comments } {
        variable crc_table
        variable reflected

        # as this is needed for only for this proc, put it here
        package require crc16
        if {![info exists crc_table]} {
                set crc_table [::crc::Crc_table 32 0x04c11db7 $reflected]
        }

        set fdin [open $oggsource]
        chan configure $fdin -translation binary -encoding binary

        # verify that's a Ogg/Vorbis file
        if {![IsOgg $fdin]} {
                chan close $fdin
                return -code error "$oggsource is not an Ogg/Vorbis file"
        }
        
        # read the first Ogg page. it's fixed size, and contains the Ogg
        # header plus the Vorbis identification packet
        set oggpage1 [chan read $fdin 58]
        
        # read the number of page segments and the segments table
        set oggp2    [chan read $fdin 22]
        # read 4 bytes of the crc and replace them with zeros
        chan read $fdin 4; append oggp2 \x00\x00\x00\x00
        binary scan  [chan read $fdin 1] cu page_segments
        binary scan  [chan read $fdin $page_segments] cu* segment_table

        # determine the 2nd packet size and read it. read the rest of the data
        # until the end of the second ogg page
        set i        [lsearch -not -integer $segment_table 255]
        set vp2_lv   [lrange $segment_table 0 $i]
        set vp3_lv   [lrange $segment_table $i+1 end]

        set vp2_data [chan read $fdin [expr [join $vp2_lv +]]]
        set vp3_data [chan read $fdin [expr [join $vp3_lv +]]]

        # reuse the vendor string of the file
        binary scan [string range $vp2_data 7 10] iu vs_size
        binary scan [string range $vp2_data 11 10+$vs_size] A* vendor_string

        # build a new 2nd vorbis packet
        set vorbisp2 \x03vorbis
        # add the vendor string
        append vorbisp2 [binary format iuA* $vs_size $vendor_string]
        # hoy many fields of comments we have
        append vorbisp2 [binary format iu [dict size $comments]]
        # now, add the comments
        dict for {key value} $comments {
                set field  "${key}=[encoding convertto utf-8 $value]"
                set size   [string length $field]
                append vorbisp2 [binary format iuA* $size $field]
        }
        # finalize the second Vorbis packet
        append vorbisp2 [binary format c 1]
        # compute its size
        set vp2_size [string length $vorbisp2]
        # determine the new lacing values
        set lvalues [lrepeat [expr {$vp2_size / 255}] 255 ]
        lappend lvalues [expr {$vp2_size % 255}]
        lappend lvalues {*}$vp3_lv

        # reconstruct the second Ogg page. We have already the first 26 bytes
        append oggp2 [binary format cucu* [llength $lvalues] $lvalues]
        append oggp2 $vorbisp2
        append oggp2 $vp3_data

        # we are almost done. calculate the crc32 value as instructed in the
        # specification and put it on the bytes 22-25
        set crc_new  [::crc::Crc $oggp2 32 [namespace current]::crc_table 0 0 $reflected]
        set oggpage2 [string replace $oggp2 22 25 [binary format iu $crc_new]]

        # write the results to the output file
        set fdout [open $oggdest w]
        chan configure $fdout -encoding binary -translation binary
        chan puts -nonewline $fdout $oggpage1
        chan puts -nonewline $fdout $oggpage2

        # the rest of the file is unchanged. simply copy it to the new file
        chan copy $fdin $fdout
        chan close $fdin
        chan close $fdout
 }

 proc ogg::readMetaData { oggfile } {

        set fd [open $oggfile]
        chan configure $fd -translation binary -encoding binary

        # verify that's a Ogg/Vorbis file
        if {![IsOgg $fd]} {
                chan close $fd
                return -code error "$oggfile is not an Ogg/Vorbis file"
        }

        #this will be our returning value: a dict
        set result [dict create]

        # here comes the data of the identification packet
        # extract only the interesting information
            chan seek $fd 35
        binary scan [chan read $fd 21] iucuiuiii \
                version channels sample_r br_max br_nom br_min

        dict set result version $version
        dict set result audio_channels $channels
        dict set result audio_sample_rate $sample_r
        dict set result bitrate_maximum $br_max
        dict set result bitrate_nominal $br_nom
        dict set result bitrate_minimum $br_min
        
        # towards the end of the second Ogg header, this
        # byte counts how many bytes left are in the header
        chan seek $fd 84
        binary scan [chan read $fd 1] c bytesleft
        chan seek $fd [expr {85 + $bytesleft + 7}]

        # follows a vendor string
        binary scan [chan read $fd 4] iu size
        binary scan [chan read $fd $size] A* vendor

        dict set result vendor_string $vendor

        # and now a list of attributes in the form "key=value"
        binary scan [chan read $fd 4] iu numfields
        for {set i 0} {$i < $numfields} {incr i} {
                binary scan [chan read $fd 4] iu size
                binary scan [chan read $fd $size] a* field
                set field [encoding convertfrom utf-8 $field]
                lassign [split $field =] key value
                dict set result user_comment $key $value
        }

        chan close $fd
        return $result
 }

 proc ogg::IsOgg { fd } {
        set pos [chan tell $fd]
        chan seek $fd 0
        set data [chan read $fd 35]
        chan seek $fd $pos
        return [expr {[string range $data 0 3] eq "OggS" \
                && [string range $data 28 34] eq "\x01vorbis" ? 1 : 0}]
 }

 # simple test
 if 0 {
        # ogg::writeComments
        set f [lindex $argv 0]
        set nf [file rootname $f].new.ogg

        puts "Input File:  $f"
        puts "Output File: $nf"
        puts "Tags to apply: title: \"My tune\" artist: \"My favourite artist\""

        ogg::writeComments $f $nf {title "My tune" artist "My favourite artist"}
 }

 if 0 {
        # ogg::readMetaData
        set f [lindex $argv 0]
        puts "File:  $f"
        puts [ogg::readMetaData $f]
 }