Reading MP3 file info

by Martin Lemburg aka male


I searched the internet to get information about reading information from MP3 files like sampling rate, bit rate, duration and so on.

So I found the page http://www.dv.co.yu/mpgscript/mpeghdr.htm , from where I got my first impression how to start.

Then I downloaded the sources from the author of this page and the sources of snack to build this MP3 file information reader in pure tcl.

I tried it with several MP3 files and got satisfying results, what doesn't mean, that there are no errors!

Please feel so free to test and to give comments, suggestions, ...! Thanks!


usage: mp3InfoReader fileName arrayVarName ?debugFlag?

fileName
path to the MP3 file
arrayVarName
name of the array variable to be used as container for the MP3 information, will be erased before filled
debugFlag
boolean - true if information about the first 2 frame headers shouldn't be deleted and be returned

 namespace eval ::mp3InfoReader {
    variable bitRateTable;
    variable sampleRateTable;
    variable channelModeTable;
 
    array set bitRateTable {
       0.1.0      0   0.1.1      8   0.1.2     16   0.1.3     24
       0.1.4     32   0.1.5     40   0.1.6     48   0.1.7     56
       0.1.8     64   0.1.9     80   0.1.10    96   0.1.11   112
       0.1.12   128   0.1.13   144   0.1.14   160   0.1.15    -1
       0.3.0      0   0.3.1     32   0.3.2     48   0.3.3     56
       0.3.4     64   0.3.5     80   0.3.6     96   0.3.7    112
       0.3.8    128   0.3.9    144   0.3.10   160   0.3.11   176
       0.3.12   192   0.3.13   224   0.3.14   256   0.3.15    -1
       3.1.0      0   3.1.1     32   3.1.2     40   3.1.3     48
       3.1.4     56   3.1.5     64   3.1.6     80   3.1.7     96
       3.1.8    112   3.1.9    128   3.1.10   160   3.1.11   192
       3.1.12   224   3.1.13   256   3.1.14   320   3.1.15    -1
       3.2.0      0   3.2.1     32   3.2.2     48   3.2.3     56
       3.2.4     64   3.2.5     80   3.2.6     96   3.2.7    112
       3.2.8    128   3.2.9    160   3.2.10   192   3.2.11   224
       3.2.12   256   3.2.13   320   3.2.14   384   3.2.15    -1
       3.3.0      0   3.3.1     32   3.3.2     64   3.3.3     96
       3.3.4    128   3.3.5    160   3.3.6    192   3.3.7    224
       3.3.8    256   3.3.9    288   3.3.10   320   3.3.11   352
       3.3.12   384   3.3.13   416   3.3.14   448   3.3.15    -1
       0.2.10    -1   0.2.12    -1   0.2.7     -1   0.2.0     -1
       0.2.4     -1   0.2.8     -1   0.2.6     -1   0.2.3     -1
       0.2.14    -1   0.2.5     -1   0.2.2     -1
    }
 
    array set sampleRateTable {
       0.0   11025   0.1   12000   0.2    8000
       2.0   22050   2.1   24000   2.2   16000
       3.0   44100   3.1   48000   3.2   32000
    }
    
    array set channelModeTable {
       3.0   {4 31}   3.1   {8 31}                 3.2   {12 31}         3.3   {16 31}
       1.0   {}       1.1   {"intensity stereo"}   1.2   {"ms stereo"}   1.3   {"intensity stereo" "ms stereo"}
    };
 
    proc readI4 {fileId} {
       if {[binary scan [read $fileId 4] I I4] != 1} {
          error "couldn't read a 4byte bit-endian integer value from \"$fileId\"";
       }
 
       return $I4
    }
 
    variable genres {
        Blues                   {Classic Rock}          Country                 Dance
        Disco                   Funk                    Grunge                  Hip-Hop
        Jazz                    Metal                   {New Age}               Oldies
        Other                   Pop                     R&B                     Rap
        Reggae                  Rock                    Techno                  Industrial
        Alternative             Ska                     {Death Metal}           Pranks
        Soundtrack              Euro-Techno             Ambient                 Trip-Hop
        Vocal                   Jazz+Funk               Fusion                  Trance
        Classical               Instrumental            Acid                    House
        Game                    {Sound Clip}            Gospel                  Noise
        AlternRock              Bass                    Soul                    Punk
        Space                   Meditative              {Instrumental Pop}      {Instrumental Rock}
        Ethnic                  Gothic                  Darkwave                Techno-Industrial
        Electronic              Pop-Folk                Eurodance               Dream
        {Southern Rock} Comedy                  Cult                    Gangsta
        {Top 40}                {Christian Rap}         Pop/Funk                Jungle
        {Native American}       Cabaret                 {New Wave}              Psychadelic
        Rave                    Showtunes               Trailer                 Lo-Fi
        Tribal                  {Acid Punk}             {Acid Jazz}             Polka
        Retro                   Musical                 {Rock & Roll}           {Hard Rock}
        Folk                    Folk-Rock               {National Folk}         Swing
        {Fast Fusion}           Bebob                   Latin                   Revival
        Celtic                  Bluegrass               Avantgarde              {Gothic Rock}
        {Progressive Rock}      {Psychedelic Rock}      {Symphonic Rock}        {Slow Rock}
        {Big Band}              Chorus                  {Easy Listening}        Acoustic
        Humour                  Speech                  Chanson                 Opera
        {Chamber Music} Sonata                  Symphony                {Booty Brass}
        Primus                  {Porn Groove}           Satire                  {Slow Jam}
        Club                    Tango                   Samba                   Folklore
        Ballad                  {Power Ballad}          {Rhytmic Soul}          Freestyle
        Duet                    {Punk Rock}             {Drum Solo}             {A Capela}
        Euro-House              {Dance Hall}
    }


    proc mpegBin {byte2} {
       return [expr {($byte2 >> 3) & 0x3}];
    }
 
    proc mpeg {byte2} {
       switch -exact -- [mpegBin $byte2] {
          0 {return 2.5;}
          2 {return 2;}
          3 {return 1;}
       }
 
       return 0;
    } 
 
    proc layerBin {byte2} {
       return [expr {($byte2 >> 1) & 0x3}];
    }
 
    proc layer {byte2} {
       set layer [expr {4 - [layerBin $byte2]}];
 
       if {$layer > 3} {
          return 0;
       }
 
       return $layer
    } 
 
    proc protection {byte2} {
       return [expr {($byte2 & 0x1) == 0}];
    }
 
    proc bitRateBin {byte3} {
       return [expr {($byte3 >> 4) & 0xF}];
    }
 
    proc bitRate {byte2 byte3} {
       if {[set bitRateIdx [bitRateBin $byte3]] == 0xF} {
          return -1;
       }
 
       if {[set layerIdx [layerBin $byte2]] == 0} {
          return -1;
       }
 
       if {[set mpegIdx [mpegBin $byte2]] == -1} {
          return -1;
       } elseif {$mpegIdx == 2} {
          if {$layerIdx == 2} {
             set layerIdx 1;
          }
 
          set mpegIdx 0;
       }
 
       variable bitRateTable;
 
       return $bitRateTable($mpegIdx.$layerIdx.$bitRateIdx);
    }
 
    proc sampleRateBin {byte3} {
       return [expr {($byte3 >> 2) & 0x3}];
    }
 
    proc sampleRate {byte2 byte3} {
       if {[set sampleRateIdx [sampleRateBin $byte3]] == 0x3} {
          return 0;
       }
       
       if {[set mpegIdx [mpegBin $byte2]] == 1} {
          return 0;
       }
 
       variable sampleRateTable;
 
       return $sampleRateTable($mpegIdx.$sampleRateIdx);
    }
 
    proc private {byte3} {
       return [expr {($byte3 & 0x1) == 1}];
    }
 
    proc padding {byte3} {
       return [expr {(($byte3 >> 1) & 0x1) == 1}];
    }
 
    proc channelModeBin {byte4} {
       return [expr {($byte4 >> 6) & 0x3}];
    }
 
    proc channelMode {byte2 byte4} {
       switch -exact -- [channelModeBin $byte4] {
          3 {return "single";}
          2 {return "dual";}
          0 {return "stereo";}
       }
 
       if {[set layerIdx [layerBin $byte2]] == 0} {
          return "joint";
       } elseif {$layerIdx == 2} {
          set layerIdx 3;
       }
 
       variable channelModeTable;
 
       return [list "joint" $channelModeTable($layerIdx.[expr {($byte4 >> 4) & 0x3}])];
    } 
 
    proc copyright {byte4} {
       return [expr {(($byte4 >> 3) & 0x1) == 1}];
    }
 
    proc original {byte4} {
       return [expr {(($byte4 >> 2) & 0x1) == 1}];
    }
 
    proc emphasisBin {byte4} {
       return [expr {$byte4 & 0x3}];
    }
 
    proc emphasisDesc {byte4} {
       switch -exact -- [emphasisBin $byte4] {
          3 {return "CCIT J.17";}
          2 {return "reserved";}
          1 {return "50/15 ms";}
       }
 
       return "none";
    } 
 
    proc frameLength {byte1 byte2 byte3} {
       set mpeg       [mpegBin $byte2];
       set layer      [layerBin $byte2];
       set protection [protection $byte2];
       set padding    [padding $byte3];
 
       set bitRate    [bitRate $byte2 $byte3];
       set sampleRate [sampleRate $byte2 $byte3];
 
       if {!$bitRate} {
          # (Free bit rate) This will move the scanner one step forward
          #
          set frameLength 1;
       } else {
          if {$layer == 3} {
             set frameLength [expr {(12 * $bitRate * 1000 / $sampleRate) + (4 * $padding) + (2 * $protection)}]
          } else {
             set frameLength [expr {(144 * $bitRate * 1000 / $sampleRate) + $padding + (2 * $protection)}]
          }
       }
 
       return $frameLength;
    }
 
    proc isValidFrameHeader {byte1 byte2 byte3 byte4} {
       # 1. MPEG version not unknown
       # 2. layer not reserved
       # 2. sample rate index, 3 not allowed 
       # 3. bitrate, 15 not allowed 
       #
       if {([mpegBin $byte2]       != 0x1) &&
           ([layer $byte2]         != 0x0) &&
           ((($byte3 >> 2) & 0x3)  != 0x3) &&                          
           ((($byte3 >> 4) & 0xF)  != 0xf)} {     
          return 1;
       }
 
       return 0;
    } 
 
    proc mp3InfoReader {fileName arrayVar {debug 0}} {
       upvar $arrayVar info;
       variable genres ; #PWQ 26 Nov 04 
       catch {unset info;};
       
       if {[catch {set fd [open $fileName r];} reason]} {
          error $reason $::errorInfo $::errorCode;
       }
 
       fconfigure $fd -encoding binary -translation binary -buffering full -buffersize 1000000;
 
       set idx          1;
       set result      "";
 
       while {![eof $fd]} {
          # read until frame header is complete and valid
          #
          while {![eof $fd]} {
             scan [read $fd 1] %c byte1;
 
             if {($byte1 & 0xFF) == 0xFF} {
                scan [read $fd 1] %c byte2;
 
                if {($byte2 & 0xE0) == 0xE0} {
                   scan [read $fd 2] %c%c byte3 byte4;
 
                   if {[isValidFrameHeader $byte1 $byte2 $byte3 $byte4]} {
                      break;
                   }
                }
             }
          }
 
          # recognize all (normally) frame independent header data
          #
          set info($idx.mpeg)        [mpeg $byte2];
          set info($idx.layer)       [layer $byte2];
          set info($idx.protection)  [protection $byte2];
          set info($idx.sampleRate)  [sampleRate $byte2 $byte3];
          set info($idx.bitRate)     [bitRate $byte2 $byte3];
          set info($idx.private)     [private $byte3];
          set info($idx.channelMode) [channelMode $byte2 $byte4];
          set info($idx.copyright)   [copyright $byte4];
          set info($idx.original)    [original $byte4];
          set info($idx.emphasis)    [emphasisDesc $byte4];
          set info($idx.padding)     [padding $byte3];
 
          if {$info($idx.layer) == 1} {
             set info($idx.samplesPerFrame) 384;
          } else {
             set info($idx.samplesPerFrame) 1152;
          }
 
 
          if {$idx == 1} {
             # looking for a Xing VBR header (variable bitrate)
             #
             set info(vbr)   0;
 
             if {$info($idx.mpeg) == 1} {
                set xingHeaderStart [expr {$info($idx.channelMode) != "single" ? 32 : 17}];
             } else {
                set xingHeaderStart [expr {$info($idx.channelMode) != "single" ? 17 : 9}];
             }
 
             seek $fd $xingHeaderStart current;
 
             if {[read $fd 4] == "Xing"} {
                # found a Xing VBR header - looking for the average bit rate
                #
                set info(vbr) 1;
                set xingFrames 0;
                set xingBytes  0;
 
                if {[set xingHeadFlags [readI4 $fd]] & 0x0001} {
                   set xingFrames [readI4 $fd];
                }
 
                if {$xingHeadFlags & 0x0002} {
                   set xingBytes  [readI4 $fd];
                }
 
                if {($xingFrames > 0) &&
                    ($xingBytes  > 0) &&
                    ($xingHeadFlags & (0x0002 | 0x0001))} {
                   set info(bitRate)   [expr {(($xingBytes / $xingFrames) * $info($idx.sampleRate)) / ($info($idx.mpeg) == 1 ? 144000 : 72000)}];
                }
             } else {
                # first recognized bit rate is assumed to be the global one
                #
                set info(bitRate)      $info($idx.bitRate);
                set info(bitRate.calc) 0;
             }
          } elseif {!$info(vbr) && (!$info($idx.bitRate) || ($info(bitRate) != $info($idx.bitRate)))} {
             # another bit rate is not identical to the global (first) one
             # => calculate bit rate later on!
             #
             set info(bitRate.calc) 1;
             set info(bitRate.list) [list $info(bitRate) $info($idx.bitRate)];
          }
 
          # jump over audio data, if frame length calculated
          #
          seek $fd [frameLength $byte1 $byte2 $byte3] current;
 
          if {$idx == 2} {
             break;
          }
 
          incr idx;
       }
 
        # Now reread the last 128 bytes to decode the MP3 Tag
        if {[catch {seek $fd -128 end}]} {
            set tag ""
        } else {
            set tag [read $fd]
        }
      close $fd;
 
       # setting or calculating the bit rate
       #
       if {!$info(vbr)} {
          if {$info(bitRate.calc)} {
             # calculating the mean bit rate
             #
             set result        "warning: variable bit rate - published approximated duration and average bit rate!";
             set info(bitRate) 0;
 
             foreach value [set info(bitRate.list) [lsort -unique $info(bitRate.list)]] {
                incr info(bitRate) $value;
             }
 
             set info(bitRate) [expr {int($info(bitRate) / double($idx))}];
          }
       }
 
       # calculating the durating using the bit rate and the file size
       #
       set info(duration) [expr {int([file size $fileName]*8 / double(1000*$info(bitRate)))}];
 
       # copy all frame independent data into the return array
       #
       foreach {name value} [array get info 1.*] {
          set name   [join [lrange [split $name "."] 1 end] "."];
 
          if {[string match "bitRate*" $name] ||
              ($name == "crc")                ||
              ($name == "duration")           ||
              ($name == "frameLength")        ||
              ($name == "padding")            ||
              ($name == "protection")} {
             continue;
          }
 
          set info($name) $value;
       }
       
       if {!$debug} {
          array unset info {[0-9]*};
       }

        binary scan $tag A3 id
        if {[string equal $id TAG]} {
            set info(hastag) 1
            set info(genre) 12
            binary scan $tag a3a30a30a30a4a28ccc id info(title) info(artist) info(album) info(year) info(comment) zero info(track) info(genreid)
        } else {
            array set info {
                hastag 0
                title ""
                artist ""
                album ""
                year ""
                comment ""
                track 0
                genreid -1
            }
        }
        set info(genre) [lindex $genres $info(genreid)]
 
       return $result;
    }
 
    namespace export -clear mp3InfoReader;
 }

ps 23April2004

Tagging MP3 files is a reader/writer for MP3 tags, which contains the artist name, album info, etc. I have integrated the reading of that info into the code above. Have fun.


MGS [2004/11/24] - Two minor changes above: sampleRatesTable entry should be 3.2, not 3.3, and binary scan format in proc mp3InfoReader should be a3a30a30a30a4a28ccc instead of A3A30A30A30A4A28ccc (tag fields are null-padded, not space-padded).

PWQ 26 Nov 04, added missing variable command to mp3InfoReader proc.


LV Has anyone taken a look at what it would take to access CD-TEXT information (see for example http://www.ncf.carleton.ca/~aa571/cdtext.htm for details) from a CD to get information to add into the MP3 files?


nihaopaul [2005/09/10] Added the 0.2.x to the bitRate array, but couldnt find any information on them so used -1 I'm indexing ~4000 mp3s with this to a mysql database, via.. eggdrop :D


bll [2012/06/15] Here are some code fragments that will handle id3v2. The following is untested/unimplemented: unsychronized, extended header, footer, SEEK. Also note there is a disconnect between the ID3v2 frame names and the ones built in to mp3info.

    proc mungeSize { len } {
      set nlen 0
      for { set p 0 } { $p < 4 } { incr p } {
        set t [expr ($len >> ( $p * 8 )) & 0x7f];
        set nlen [expr $nlen | [expr $t << ( $p * 7 )]];
      }
      return $nlen
    }

    proc readID3v2Info { infov fdv tsz } {
      upvar $infov info
      upvar $fdv fd

      set info(hastag) 1
      while {![eof $fd]} {
        scan [read $fd 2] %c%c byte1 byte2;
        if {($byte1 & 0xFF) == 0x00 && ($byte2 & 0xFF) == 0x00} {
          seek $fd $tsz start;
          return;
        }
        if {($byte1 & 0xFF) == 0xFF && ($byte2 & 0xE0) == 0xE0} {
          seek $fd -2 current;
          return;
        }
        seek $fd -2 current;
        scan [read $fd 4] %4s tag;

        set len [readI4 $fd]
        set len [mungeSize $len]
        scan [read $fd 2] %c%c flag1 flag2;
        set enc1 0
        if { $tag != "UFID" } {
          # is a byte less than some value an encoding byte?
          scan [read $fd 1] %c enc1;
          incr len -1; # skipped encoding byte already
        }
        incr len -1; # don't read null
        set val [read $fd $len]
        if { $enc1 == 0x00 } {
          set val [encoding convertfrom iso8859-1 $val]
        } elseif { $enc1 == 0x01 } {
          set val [encoding convertfrom unicode [string range $val 2 end]]
        } elseif { $enc1 == 0x02 } {
          set val [encoding convertfrom unicode $val]
        } elseif { $enc1 == 0x03 } {
          set val [encoding convertfrom utf-8 $val]
        } elseif { $enc1 != 0x00 } {
          puts "Unknown encoding value"
        }
        read $fd 1; # null byte
        if { $tag != "APIC" && $val != "" } {
          set info($tag) $val
        }
      }
    }

    # this code goes before the '# read until frame header is complete and valid' loop.

         if { ! $headerChecked } {
           binary scan [read $fd 3] A3 hd

           if { $hd == "ID3" } {
             set isid3v2 true;
             # ID3(1-3) min-ver(4-5) flags(6) size(7-10)
             scan [read $fd 3] %c%c%c byte4 byte5 flags;
             set tsz [readI4 $fd]
             set tsz [mungeSize $tsz]
             incr tsz -10
             if { ($flags & 0x80) == 0x80 } {
               set id3v2unsync true;
               puts "mp3: unsynchronized"
             }
             if { ($flags & 0x40) == 0x40 } {
               set id3v2ext true;
               puts "mp3: has extended header"
             }
             if { ($flags & 0x10) == 0x10 } {
               set id3v2footer true;
               puts "mp3: has footer"
               incr tsz -10
             }
             seek $fd 10 start;

             if { $id3v2ext } {
               set sz [readI4 $fd]
               set sz [mungeSize $sz]
               seek $fd $sz current;  # skip extended header
             }
             readID3v2Info info fd $tsz
           } else {
             seek $fd -3 current;
           }
           set headerChecked true
         }

See also: