Facing the daunting task of hand entering close to 100 1099-B transactions, I investigated alternative import methods. TurboTax can import tax data using the TXF file format. All the available convertors on the web are Windows programs (yuck!), so I decided to write one in Tcl. I did a quick search, but somehow missed [LV]'s converter written in TCLOO: [Taxes and TXF file import]. But, since mine is done as well, I thought it wouldn't hurt to have an alternative :-) So here it is: ====== #!/opt/local/bin/tclsh # # csv2txf - Convert a csv file to a txf file # Usage: csv2txf # # The csv file must have a header that has column names containing: # Description (*desc*) : the name of the stock or fund # Date Sold (*sold*) : the date the asset was sold # Date Acquired (*acquire*) : the purchase date of the asset, or "various" # Proceeds (*proceed*) : the $ amount of the sale # Basis (*basis*) : the original $ cost of the asset # Quantity (*quant*) : the number of shares sold # Type: "Long Term" or "Short Term" sale, or "LT" or "ST" # # The order of the columns does not matter. # Other columns are ignored. # The Type column is only needed sales with "various" data acquired. # # Technical details about the TXF file format comes from: # http://turbotax.intuit.com/txf/TXF041.jsp # # Author: Brian Griffin # Date: April, 2011 # # Process command arguments # if {[llength $argv] != 2} { puts stderr "Usage: csv2txf " exit } set infilename [lindex $argv 0] set outfilename [lindex $argv 1] if {![file exists $infilename]} { puts stderr "$infilename not found" exit } # # CSV file parser # Processes csv file into 2 Tcl lists: # header list: list of column names in order # data list: list of data rows. Each row is a list in column order # # Supporting functions: gettok, getrec # #set keymap {" " _ / _ - _ \( _ \) _ : _} set keymap {} proc gettok {tokvar bufvar} { upvar $tokvar tok upvar $bufvar buf set cur "" set tok "" set state comma set i 0 set TOK TOK_WORD foreach char $buf { switch -- $state { comma { switch -- $char { , - \n { if {[string length $cur] == 0} { append cur $char incr i if {$char eq ","} { set TOK TOK_COL } else { set TOK TOK_EOR } } set state done break } \" { set state quote0 } default { append cur $char } } } quote0 { if {$char eq "\""} { set state comma } elseif {$char eq "," || $char eq "\n"} { if {[string length $cur] == 0} { append cur $char incr i if {$char eq ","} { set TOK TOK_COL } else { set TOK TOK_EOR } } set state done break } else { set state quote1 } append cur $char } quote1 { if {$char eq "\""} { set state quote2 } else { append cur $char } } quote2 { if {$char eq "\""} { append cur $char set state quote1 } elseif {$char eq "," || $char eq "\n"} { if {[string length $cur] == 0} { append cur $char incr i if {$char eq ","} { set TOK TOK_COL } else { set TOK TOK_EOR } } set state done break } else { append cur $char set state comma } } } if {$state eq "done"} { break } incr i } set buf [lrange $buf $i end] set tok $cur if {[string length $tok] == 0 && [llength $buf] == 0} { set TOK TOK_END } return $TOK } proc getrec {bufvar} { upvar $bufvar buf set tok "" set rec [list] set word "" for {set done 0} {!$done} {} { set t [gettok tok buf] switch -- $t { TOK_EOR - TOK_END { lappend rec $word set done 1 } TOK_WORD { set word $tok } TOK_COL { lappend rec $word set word "" } } } return $rec } proc read_csv {filename headervar listvar} { upvar $headervar cl upvar $listvar j global keymap set f [open $filename] set buf [read $f] close $f set buflist [split $buf {}] set cmd "" set cl0 [getrec buflist] set cl [list] foreach col $cl0 { set col [string trim $col] set col [string map $keymap $col] lappend cl $col } set ncol [llength $cl0] set cnt 0 set j [list] while {[llength $buflist] > 0} { set rec [getrec buflist] incr cnt set reclen [llength $rec] for {set i $reclen} {$i < $ncol} {incr i} { lappend rec "-" } lappend j $rec } } # # Supporting functions for converter # # MDY: take any date string and format in MM/DD/YYYY format # allows "various" as a valid date proc MDY {date} { set date [string trim $date] if {[string match -nocase various $date]} { return various } return [clock format [clock scan $date] -format %D] } # S: Cleans up dollar values. proc S {value} { set value [string map {( {} ) {} { } {} , {} {$} {}} $value] if {[string length $value] == 0} { set value 0 } set value [expr {$value}] return "\$$value" } # # InitTXFHeader # # writes out the header portion of the TXF file. proc InitTXFHeader {outputfile} { set ProgramName csv2txf.tcl puts $outputfile "V041\nA${ProgramName}\nD [clock format [clock seconds] -format %D]\n^" } # # Data access convience function # proc LX {r x} { return [string trim [lindex $r $x]] } # # ProcessData # # produces this format: # Record Format 4 # P security # D date acquired # D date sold # $ cost basis # $ sales net (net of commission) # proc ProcessData {outputfile header data} { #{Description text} {Date_Sold text} {Net_Proceeds text} {Date_Acquired text} {Cost_Basis text} set dx [lsearch -nocase $header "*desc*"] set sx [lsearch -nocase $header "*sold*"] set ax [lsearch -nocase $header "*acquire*"] set px [lsearch -nocase $header "*proceed*"] set bx [lsearch -nocase $header "*basis*"] set qx [lsearch -nocase $header "*quant*"] set tx [lsearch -nocase $header "*type*"] foreach record $data { set buydate [LX $record $ax] set selldate [LX $record $sx] set description [LX $record $dx] set quantity [LX $record $qx] set cost [LX $record $bx] set proceeds [LX $record $px] set cgtype [LX $record $tx] if {$description eq "" || $buydate eq "" || $selldate eq ""} { continue } if {[TXFEntryHeader $outputfile $buydate $selldate $cgtype]} { puts stderr "$quantity $description with \"Various\" Buy date, reported as Long Term" } puts $outputfile "P$quantity $description" puts $outputfile "D[MDY $buydate]" puts $outputfile "D[MDY $selldate]" puts $outputfile [S $cost] puts $outputfile [S $proceeds] TXFEntryEnding $outputfile } } # # TXFENtryHeader # write the start for an entry # # The type of entry is determined by the type of sale: # L RNum Name Cpy Srt Sgn Frm Line # ----------------------------------------------------------------------------- # 0 320 "Schedule D" Y N I 1 2004:D #> 2 321 "ST gain/loss - security" Y N I 4 2004:D:1 #> 2 323 "LT gain/loss - security" Y N I 4 2004:D:8 # 2 673 "Short/Long gain or loss" Y N I 4 2004:D # 2 682 "Wash Sale - security" Y N I 4 2004:D # 2 644 "28% cap gain" Y A I 3 2004:D:20 # 2 645 "Unrec sec 1250" Y A I 3 2004:D:19 # 2 646 "Sec 1202 gain" Y A I 3 2004:D:20 # 2 677 "Qualified 5-year gain" Y A I 3 2004:D:35 # # This code only handles security sales of long or short term. # proc TXFEntryHeader {outputfile BuyDate SellDate CGType} { set warning 0 set LongTerm 0 if {$CGType ne "" && ([string match -nocase "*LT*" $CGType] || [string match -nocase "*Long*" $CGType])} { set LongTerm 1 } elseif {$CGType ne "" && ([string match -nocase "*ST*" $CGType] || [string match -nocase "*Short*" $CGType])} { set LongTerm 0 } else { set BuyDate [string trim $BuyDate] if {[string match -nocase various $BuyDate]} { set LongTerm 1 set warning 1 } else { set bd [clock scan $BuyDate] set sd [clock scan $SellDate] set delta [expr {$sd - $bd}] set years [expr {[clock format $delta -gmt 1 -format %Y] - 1970}] set LongTerm [expr {$years >= 1}] } } if {$LongTerm} { puts $outputfile "TD\nN323\nC1\nL1" } else { puts $outputfile "TD\nN321\nC1\nL1" } return $warning } # # TXFEntryEnding # writes ending for record entry # proc TXFEntryEnding {outputfile} { puts $outputfile "\n^" } # # Begin processing # read_csv $infilename head data if {[catch {open $outfilename w} outfile]} { puts stderr "Cannot open $outfilename: $outfile" exit } InitTXFHeader $outfile ProcessData $outfile $head $data close $outfile #EOF ====== <>Utilities|PIM|Currency and Finance