Version 1 of BibTeX parser

Updated 2005-03-03 18:13:49 by lwv

NEM 3Mar2005: Here's a very simple parser for the BibTeX bibliography database format. It's rather braindead at the moment, and is really not much more than a tokeniser. To use it, call bibtex::parse with the contents of a database, and you'll get back a list of entries. Each entry is itself a list of three elements: type (e.g. article, thesis, etc), key (whatever key it was given in the database), and a final list of key/value pairs (in dict form) representing the record. It doesn't handle BibTeXs (or TeXs) insane file format fully, and doesn't handle @string macros etc. There may well be further restrictions on what it can cope with. It suffices for what I need it for though -- a quick and dirty hack to dump a bibtex db to a website. Anyway, enjoy!

 # bibtex.tcl --
 #
 #        A basic parser for BibTeX bibliography databases.
 #
 # Copyright (c) 2005 Neil Madden.
 # License: Tcl/BSD style.

 package require Tcl 8.4
 package provide bibtex 0.1

 namespace eval bibtex {

    proc parse {bibtex} {
        set ret [list]
        foreach block [lrange [split $bibtex @] 1 end] {
            if {[regexp {([^\{]+)\{([^,]+),(.*)\}[^\}]*} $block -> type key rest]} {
                lappend ret [list [Tidy $type] [string trim $key] [ParseBlock $rest]]
            } else {
                puts stderr "Skipping: $block"
            }
        }
        return $ret
    }
    proc Tidy {str} {
        string tolower [string trim $str]
    }

    proc ParseBlock {block} {
        set ret [list]
        set index 0
        while {1} {
            if {[regexp -start $index -indices -- {(\S+)[^=]*=(.*)} $block -> \
                key rest]} {

                foreach {ks ke} $key { break }
                set k [Tidy [string range $block $ks $ke]]
                foreach {rs re} $rest { break }
                foreach {v index} \
                    [ParseBibString $rs [string range $block $rs $re]] \
                    { break }
                lappend ret $k $v
            } else { break }
        }
        return $ret
    }

    proc ParseBibString {index str} {
        set count 0
        set retstr ""
        set escape 0
        foreach char [split $str ""] {
            incr index
            if {$escape} {
                set escape 0
            } else {
                if {$char eq "\{"} {
                    incr count
                    continue
                } elseif {$char eq "\}"} {
                    incr count -1
                    if {$count < 0} { incr index -1; break }
                    continue
                } elseif {$char eq ","} {
                    if {$count == 0} { break }
                } elseif {$char eq "\\"} {
                    set escape 1; continue
                } elseif {$char eq "\""} { continue }
                append retstr $char
            }
        }
        regsub -all {\s+} $retstr { } retstr
        return [list [string trim $retstr] $index]
    }
 }

Category Application Category Word and Text Processing