Taxes: csv to txf converter

Facing the daunting task of hand entering close to 100 1099-B transactions, I investigated alternative import methods.

TurboTax can import tax data using the TXF file format. All the available convertors on the web are Windows programs (yuck!), so I decided to write one in Tcl.

I did a quick search, but somehow missed Bezoar's converter written in TclOO: Taxes and TXF file import. But, since mine is done as well, I thought it wouldn't hurt to have an alternative :-)

So here it is:

#!/opt/local/bin/tclsh
#
# csv2txf - Convert a csv file to a txf file
# Usage:  csv2txf <csv-file> <txf-file>
#
# The csv file must have a header that has column names containing:
#   Description   (*desc*)    : the name of the stock or fund
#   Date Sold     (*sold*)    : the date the asset was sold
#   Date Acquired (*acquire*) : the purchase date of the asset, or "various"
#   Proceeds      (*proceed*) : the $ amount of the sale
#   Basis         (*basis*)   : the original $ cost of the asset
#   Quantity      (*quant*)   : the number of shares sold
#   Type: "Long Term" or "Short Term" sale, or "LT" or "ST"
#
# The order of the columns does not matter.  
# Other columns are ignored.  
# The Type column is only needed sales with "various" data acquired.
#
# Technical details about the TXF file format comes from:
#    http://turbotax.intuit.com/txf/TXF041.jsp
#
# Author: Brian Griffin
# Date: April, 2011 

#
# Process command arguments
#
if {[llength $argv] != 2} {
    puts stderr "Usage: csv2txf <csv-file> <txf-file>"
    exit
}

set infilename [lindex $argv 0]
set outfilename [lindex $argv 1]

if {![file exists $infilename]} {
    puts stderr "$infilename not found"
    exit
}


#
# CSV file parser
#   Processes csv file into 2 Tcl lists:
#     header list: list of column names in order
#     data list: list of data rows.  Each row is a list in column order
#
# Supporting functions: gettok, getrec
#

#set keymap {" " _ / _ - _ \( _ \) _ : _}
set keymap {}

proc gettok {tokvar bufvar} {
    upvar $tokvar tok
    upvar $bufvar buf

    set cur ""
    set tok ""
    set state comma
    set i 0
    set TOK TOK_WORD
    foreach char $buf {
        switch -- $state {
            comma {
                switch -- $char {
                    , -
                    \n {
                        if {[string length $cur] == 0} {
                            append cur $char
                            incr i
                            if {$char eq ","} {
                                set TOK TOK_COL
                            } else {
                                set TOK TOK_EOR
                            }
                        }
                        set state done
                        break
                    }
                    \" {
                        set state quote0
                    }
                    default {
                        append cur $char
                    }
                }
            }
            quote0 {
                if {$char eq "\""} {
                    set state comma
                } elseif {$char eq "," || $char eq "\n"} {
                    if {[string length $cur] == 0} {
                        append cur $char
                        incr i
                        if {$char eq ","} {
                            set TOK TOK_COL
                        } else {
                            set TOK TOK_EOR
                        }
                    }
                    set state done
                    break
                } else {
                    set state quote1
                }
                append cur $char
            }
            quote1 {
                if {$char eq "\""} {
                    set state quote2
                } else {
                    append cur $char
                }
            }
            quote2 {
                if {$char eq "\""} {
                    append cur $char
                    set state quote1
                } elseif {$char eq "," || $char eq "\n"} {
                    if {[string length $cur] == 0} {
                        append cur $char
                        incr i
                        if {$char eq ","} {
                            set TOK TOK_COL
                        } else {
                            set TOK TOK_EOR
                        }
                    }
                    set state done
                    break
                } else {
                    append cur $char
                    set state comma
                }
            }
        }
        if {$state eq "done"} {
            break
        }
        incr i
    }
    set buf [lrange $buf $i end]
    set tok $cur
    if {[string length $tok] == 0 && [llength $buf] == 0} {
        set TOK TOK_END
    }
    return $TOK
}

proc getrec {bufvar} {
    upvar $bufvar buf
    set tok ""
    set rec [list]
    set word ""
    for {set done 0} {!$done} {} {
        set t [gettok tok buf]
        switch -- $t {
            TOK_EOR -
            TOK_END {
                lappend rec $word
                set done 1
            }
            TOK_WORD {
                set word $tok
            }
            TOK_COL {
                lappend rec $word
                set word ""
            }
        }
    }
    return $rec
}

proc read_csv {filename headervar listvar} {
    upvar $headervar cl
    upvar $listvar j
    global keymap
    set f [open $filename]
    set buf [read $f]
    close $f
    set buflist [split $buf {}]
    
    set cmd ""
    set cl0 [getrec buflist]
    set cl [list]
    foreach col $cl0 {
        set col [string trim $col]
        set col [string map $keymap $col]
        lappend cl $col
    }
    set ncol [llength $cl0]    
    set cnt 0
    set j [list]
    while {[llength $buflist] > 0} {
        set rec [getrec buflist]
        incr cnt
        set reclen [llength $rec]
        for {set i $reclen} {$i < $ncol} {incr i} {
            lappend rec "-"
        }
        lappend j $rec
    }
}

#
# Supporting functions for converter
#

# MDY: take any date string and format in MM/DD/YYYY format
#      allows "various" as a valid date
proc MDY {date} {
    set date [string trim $date]
    if {[string match -nocase various $date]} {
        return various
    }
    return [clock format [clock scan $date] -format %D]
}

# S: Cleans up dollar values.
proc S {value} {
    set value [string map {( {} ) {} { } {} , {} {$} {}} $value]
    if {[string length $value] == 0} {
        set value 0
    }
    set value [expr {$value}]
    return "\$$value"
}

#
# InitTXFHeader
#
#  writes out the header portion of the TXF file.
proc InitTXFHeader {outputfile} {
    set ProgramName csv2txf.tcl
    puts $outputfile "V041\nA${ProgramName}\nD [clock format [clock seconds] -format %D]\n^"
}

#
# Data access convience function
# 
proc LX {r x} {
    return [string trim [lindex $r $x]]
}

#
# ProcessData
#
#  produces this format:
#    Record Format 4
#      P security
#      D date acquired
#      D date sold
#      $ cost basis
#      $ sales net (net of commission)
#
proc ProcessData {outputfile header data} {
    #{Description text} {Date_Sold text} {Net_Proceeds text} {Date_Acquired text} {Cost_Basis text}
    set dx [lsearch -nocase $header "*desc*"]
    set sx [lsearch -nocase $header "*sold*"]
    set ax [lsearch -nocase $header "*acquire*"]
    set px [lsearch -nocase $header "*proceed*"]
    set bx [lsearch -nocase $header "*basis*"]
    set qx [lsearch -nocase $header "*quant*"]
    set tx [lsearch -nocase $header "*type*"]
    foreach record $data {
        set buydate [LX $record $ax]
        set selldate [LX $record $sx]
        set description [LX $record $dx]
        set quantity [LX $record $qx]
        set cost [LX $record $bx]
        set proceeds [LX $record $px]
        set cgtype [LX $record $tx]
        if {$description eq "" || $buydate eq "" || $selldate eq ""} {
            continue
        }
        if {[TXFEntryHeader $outputfile $buydate $selldate $cgtype]} {
            puts stderr "$quantity $description with \"Various\" Buy date, reported as Long Term"
        }
        puts $outputfile "P$quantity $description"
        puts $outputfile "D[MDY $buydate]"
        puts $outputfile "D[MDY $selldate]"
        puts $outputfile [S $cost]
        puts $outputfile [S $proceeds]
        TXFEntryEnding $outputfile
    }
}

#
# TXFENtryHeader
#  write the start for an entry
# 
#  The type of entry is determined by the type of sale:
#    L RNum Name                           Cpy Srt Sgn Frm          Line
#    -----------------------------------------------------------------------------
#    0 320  "Schedule D"                   Y   N   I   1            2004:D
#>   2 321  "ST gain/loss - security"      Y   N   I   4            2004:D:1
#>   2 323  "LT gain/loss - security"      Y   N   I   4            2004:D:8
#    2 673  "Short/Long gain or loss"      Y   N   I   4            2004:D
#    2 682  "Wash Sale - security"         Y   N   I   4            2004:D
#    2 644  "28% cap gain"                 Y   A   I   3            2004:D:20
#    2 645  "Unrec sec 1250"               Y   A   I   3            2004:D:19
#    2 646  "Sec 1202 gain"                Y   A   I   3            2004:D:20
#    2 677  "Qualified 5-year gain"        Y   A   I   3            2004:D:35
#
# This code only handles security sales of long or short term.
#
proc TXFEntryHeader {outputfile BuyDate SellDate CGType} {
    set warning 0
    set LongTerm 0
    if {$CGType ne "" &&
        ([string match -nocase "*LT*" $CGType] ||
         [string match -nocase "*Long*" $CGType])} {
        set LongTerm 1
    } elseif {$CGType ne "" &&
              ([string match -nocase "*ST*" $CGType] ||
               [string match -nocase "*Short*" $CGType])} {
        set LongTerm 0
    } else {
        set BuyDate [string trim $BuyDate]
        if {[string match -nocase various $BuyDate]} {
            set LongTerm 1
            set warning 1
        } else {
            set bd [clock scan $BuyDate]
            set sd [clock scan $SellDate]
            set delta [expr {$sd - $bd}]
            set years [expr {[clock format $delta -gmt 1 -format %Y] - 1970}]
            set LongTerm [expr {$years >= 1}]
        }
    }
    if {$LongTerm} {
        puts $outputfile "TD\nN323\nC1\nL1"
    } else {
        puts $outputfile "TD\nN321\nC1\nL1"
    }
    return $warning
}

#
# TXFEntryEnding
#  writes ending for record entry
#
proc TXFEntryEnding {outputfile} {
    puts $outputfile "\n^"
}

#
# Begin processing
#
read_csv $infilename head data

if {[catch {open $outfilename w} outfile]} {
    puts stderr "Cannot open $outfilename: $outfile"
    exit
}

InitTXFHeader $outfile
ProcessData $outfile $head $data

close $outfile

#EOF