Version 6 of from ut8 to ascii (from french to us)

Updated 2012-06-06 15:45:15 by AK

The following script transforms a utf-8 encoded file with french spécial characters into a plain us.ascii text file without accents on a, e, i, o, u, and c.

proc remove_accent { line } {
        set result $line
        if {[regsub -all "à" $result "a" match]} {
                set result $match
        }        
        if {[regsub -all "é|è|ê|ë" $result "e" match]} {
                set result $match
        }        
        if {[regsub -all "î|ï" $result "i" match]} {
                set result $match
        }        
        if {[regsub -all "ô|ö" $result "o" match]} {
                set result $match
        }        
        if {[regsub -all "ù|û|ü" $result "u" match]} {
                set result $match
        }        
        if {[regsub -all "ç" $result "c" match]} {
                set result $match
        }        
        return $result
}

        
set inputfile [lindex $argv 0]

set fin [open $inputfile "r"]
set fout [open $inputfile.ascii "w"]
fconfigure $fin -encoding utf-8
fconfigure $fout -encoding ascii 

while {1} {
        set line [gets $fin]
        if [eof $fin] {
                break;
        } else {
                        puts $fout [remove_accent $line] 
        }
}
close $fin
close $fout