Chemical arithmetics

Summary

Richard Suchenwirth 2005-01-18: Here's a little fun project that does "arithmetics" on chemical formulae (see example at bottom).

Description

proc chem args {
    set mode   +
    set factor 1
    foreach arg $args {
        switch -regexp -- $arg {
            [+] - [-] {set mode $arg; set factor 1}
            ^[0-9]+$ {set factor $arg}
            default {
                foreach e [chem'split $arg] {
                    inc a($e) [expr {$mode eq "-"?"-":""}]$factor
                }
            }
        }
    }
    chem'Hill [array get a]
}
if 0 {This splits a compound into its atoms, e.g.
% chem'split H2SO4
H H S O O O O
}
proc chem'split compound {
    set res {}
    foreach {- el n} [regexp -all -inline {([A-Z][a-z]?)([0-9]*)} $compound] {
         if {$n eq ""} {set n 1}
         for {set i 0} {$i<$n} {incr i} {lappend res $el}
    }
    set res
}
#-- A generic incrementor that creates a variable if it doesn't exist:
proc inc {varName {amount 1}} {
     upvar 1 $varName var
     if ![info exists var] {set var 0}
     incr var $amount
}

Testing - still wrong (what are the rules of ordering elements in compounds?), but the element sum is right at least:

% chem NaOH + HCl - H2O
ClNa

Tcling chemists, take it with a grain of salt - and please contribute to this page ! :^)

DKF: Rules for ionic substances are that you start from most electropositive ion and go to most electronegative ion. I think. ;^)

TS: If computer generated it's best to use Hill order

RS: Thanks for the hint! Here's my Tcl implementation of Hill order:

proc chem'Hill chemdict {
    array set arr $chemdict
    set res ""
    if {[info exists arr(C)] && $arr(C)>0} {
        append res C [expr {$arr(C)>1? "$arr(C)":""}]
        unset arr(C)
        if {[info exists arr(H)] && $arr(H)>0} {
            append res H [expr {$arr(H)>1? "$arr(H)":""}]
            unset arr(H)
        }
    }
    foreach elem [lsort [array names arr]] {
        if {$arr($elem)>0} {
           append res $elem [expr {$arr($elem)>1? "$arr($elem)":""}]
        }
    }
    set res
}

Testing (samples from the link above):

% chem'Hill {Al 6 Ca 5 O 14}
Al6Ca5O14
% chem'Hill {Na 1 H 1 C 1 O 3}
CHNaO3
% chem'Hill {Se 2 S 2 N 2 H 2 C 4}
C4H2N2S2Se2