German language un-genderized

wdb Currently, German offices tend to genderize German language. Thatʼs a pestilence. German language is my secret beloved.

Gegenwärtig gibt es in Behörden die Tendenz, die Deutsche Sprache zu gendern. Das ist eine Seuche. Die Deutsche Sprache ist meine heimliche Geliebte.

The current application transforms deformed language to what was intended. See http://wolf-dieter-busch.de/html/Tagebuch/2019/Text_EntGendern.htm

#!/usr/bin/tclsh
package require Tcl 8.6.1

proc findGenderSternSingular txt {
  regexp -inline -all\
    {\m[[:alpha:]]+[*_/]in\M}\
    $txt
}
proc findGenderStern txt {
  regexp -inline -all\
    {\m[[:alpha:]]+[*_/]innen[[:alpha:]]*\M}\
    $txt
}
proc findKreativStern txt {
  set result {}
  foreach hit [regexp -inline -all\
    {\m[[:alpha:]]+[*_][[:alpha:]*_]*?\M}\
    $txt] {
    if {![string match *innen* $hit]} then {
      lappend result $hit
    }
  }
  set result
}
proc findGenderBinnenMajuskel txt {
  regexp -inline -all\
    {\m[[:upper:]][[:lower:]]+I[[:lower:]]+\M}\
    $txt
}

proc findGenderWeiblichFolgt txt {
  regexp -inline -all\
    {\m([[:alpha:]]+)\s+(?:und|&|\+)\s+\1innen\M}\
    $txt
}
proc findGenderWeiblichFuehrt txt {
  regexp -inline -all\
    {\m([[:alpha:]]+)innen\s+(?:und|&|\+)\s+\1\M}\
    $txt
}

proc findGenderWeiblichFolgtMitE txt {
  set result {}
  foreach {a b} [regexp -inline -all\
    {\m([[:alpha:]]+)e\s+(?:und|&|\+)\s+\1innen\M}\
    $txt] {
    lappend result $a [append b e]
  }
  set result
}
proc findGenderWeiblichFuehrtMitE txt {
  set result {}
  foreach {a b} [regexp -inline -all\
    {\m([[:alpha:]]+)innen\s+(?:und|&|\+)\s+\1e\M}\
    $txt] {
    lappend result $a [append b e]
  }
  set result
}

proc findGenderWeiblichFolgtMitN txt {
  set result {}
  foreach {a b} [regexp -inline -all\
    {\m([[:alpha:]]+)n\s+(?:und|&|\+)\s+\1innen\M}\
    $txt] {
    lappend result $a [append b n]
  }
  set result
}
proc findGenderWeiblichFuehrtMitN txt {
  set result {}
  foreach {a b} [regexp -inline -all\
    {\m([[:alpha:]]+)innen\s+(?:und|&|\+)\s+\1n\M}\
    $txt] {
    lappend result $a [append b n]
  }
  set result
}

proc findGenderWeiblichFolgtMitEn txt {
  set result {}
  foreach {a b} [regexp -inline -all\
    {\m([[:alpha:]]+)en\s+(?:und|&|\+)\s+\1innen\M}\
    $txt] {
    lappend result $a [append b en]
  }
  set result
}
proc findGenderWeiblichFuehrtMitEn txt {
  set result {}
  foreach {a b} [regexp -inline -all\
    {\m([[:alpha:]]+)innen\s+(?:und|&|\+)\s+\1en\M}\
    $txt] {
    lappend result $a [append b en]
  }
  set result
}

proc GenderStringIsShorter {a b} {
  expr {[string length [lindex $a 0]] < [string length [lindex $b 0]]}
}

proc setMapList txt {
  lappend map\
    {*}[lmap {a b} [findGenderWeiblichFolgt $txt] {list $a $b}]\
    {*}[lmap {a b} [findGenderWeiblichFuehrt $txt] {list $a $b}]\
    {*}[lmap {a b} [findGenderWeiblichFolgtMitE $txt] {list $a $b}]\
    {*}[lmap {a b} [findGenderWeiblichFuehrtMitE $txt] {list $a $b}]\
    {*}[lmap {a b} [findGenderWeiblichFolgtMitN $txt] {list $a $b}]\
    {*}[lmap {a b} [findGenderWeiblichFuehrtMitN $txt] {list $a $b}]\
    {*}[lmap {a b} [findGenderWeiblichFolgtMitEn $txt] {list $a $b}]\
    {*}[lmap {a b} [findGenderWeiblichFuehrtMitEn $txt] {list $a $b}]\
    {*}[lmap x [findKreativStern $txt] {list $x [regsub -all {[*_]} $x ""]}]\
    {*}[lmap x [findGenderSternSingular $txt] {
    list $x [string range $x 0 end-3]
  }]\
    {*}[lmap x [findGenderStern $txt] {
    set n [regsub {[*_/].*$} $x ""]
    if {![string match *er $n]} then {
      append n en
    }
    if {[regexp {[*_/]innen(.*)$} $x - appendix]} then {
      append n $appendix
    }
    list $x $n
  }]\
    {*}[lmap x [findGenderBinnenMajuskel $txt] {
    set n [regsub {[[:upper:]][[:lower:]]+$} $x ""]
    if {![string match *er $n]} then {
      append n en
    }
    list $x $n
  }]
  foreach pair {
    {Studierende Studenten}
    {Studierenden Studenten}
    {"zu Fuß gehende" Fußgänger}
  } {
    lassign $pair genderForm normalForm
    if {[regexp \\m$genderForm $txt]} then {
      lappend map $pair
    }
    if {[regexp [string tolower $genderForm] $txt]} then {
      lappend map [string tolower $pair]
    }
  }
  set map [lsort -index 0 -unique $map]
  set map [lsort -command GenderStringIsShorter $map]
  concat {*}$map
}

proc escapeText {word {back ""}} {
  set map [list \\ \\\\ \u007b \\\u007b \u007d \\\u007d]
  if {$back eq ""} then {
    string map $map $word
  } else {
    string map [lreverse $map] $word
  }
}

proc txtToList txt {
  set txt1 "{[escapeText $txt]} {} {}"
  set map [setMapList $txt1]
  if {[llength $map] > 0} then {
    foreach {a b} $map {
      set repl \}
      append repl " {$a} {$b} "
      append repl \{
      lappend map1 $a $repl
    }
    lmap x [string map $map1 $txt1] {
      escapeText $x back
    }
  } else {
    list $txt {} {}
  }
}

#
# text window
#

package require Tk
bind [winfo class .] <Destroy> exit

wm title . {Nie wieder Gender-Texte!}
wm geometry . 500x350

proc -- args #

text .t -font {Times 14} -wrap word -yscrollcommand {.s set}\
  -highlightthickness 0
scrollbar .s -orient vertical -command {.t yview}

place .t -anchor nw -relheight 1.0 -relwidth 1.0 -width -[winfo reqwidth .s]
place .s -anchor ne -relheight 1 -relx 1.0

-- .t insert 1.0 {Pünktlich zum 1. April\
                    ist die internationale Gemeinschaft\
                    der Gravitationswellenforscher*innen\
                    mit der nächsten Messperiode O3 gestartet.}

bind . <FocusIn> {focus .t}

bind .t <Control-plus> [list apply {
  text {
    set font [$text cget -font]
    lassign $font family size
    if {$size < 24} then {
      $text configure -font [list $family [incr size 2]]
      $text tag configure normal -font [list $family $size bold]
    }
  }
} %W]
bind .t <Control-minus> [list apply {
  text {
    set font [$text cget -font]
    lassign $font family size
    if {$size > 8} then {
      $text configure -font [list $family [incr size -2]]
      $text tag configure normal -font [list $family $size bold]
    }
  }
} %W]
bind .t <Control-0> {.t configure -font {Times 14}}

bind .t <Key> {
  if {[string is print -strict %A] &&
      ("normal" in [%W tag names insert-1chars] ||
       "normal" in [%W tag names insert])} then {
    %W insert insert %A normal
    break
  }
}

.t tag configure gender -foreground red -underline yes
.t tag configure normal -foreground green\
  -font [concat [.t cget -font] bold]
.t tag configure hidden -elide yes

proc genderTextToWin {txt win} {
  foreach {norm gender repl} [txtToList $txt] {
    $win insert insert\
      $norm {}\
      $gender {gender hidden}\
      $repl normal
  }
}

proc showGender {text index} {
  set hiddenRange [$text tag prevrange hidden $index+1chars]
  set normalRange [$text tag prevrange normal $index+1chars]
  set genderRange [$text tag prevrange gender $index+1chars]
  $text tag remove hidden {*}$hiddenRange
  $text tag add hidden {*}$normalRange
}

proc hideGender {text index} {
  set hiddenRange [$text tag nextrange hidden $index-1chars]
  set normalRange [$text tag nextrange normal $index-1chars]
  set genderRange [$text tag prevrange gender $index+1chars]
  $text tag remove hidden {*}$hiddenRange
  $text tag add hidden {*}$genderRange
}

proc visibleText {text {from 1.0} {to end}} {
  lappend ranges [$text index $from]
  if true {
    set range\
      [$text tag nextrange hidden [lindex $ranges end] $to]
    if {$range ne ""} then {
      lappend ranges {*}$range
    } else break
  }
  lappend ranges [$text index $to]
  set result ""
  foreach {f t} $ranges {
    append result [$text get $f $t]
  }
  set result
}

.t tag bind normal <Control-1> {
  showGender %W @%x,%y
  update
  %W mark set insert @%x,%y
}
.t tag bind gender <Control-1> {
  hideGender %W @%x,%y
  update
  %W mark set insert @%x,%y
}

bind .t <<Copy>> [list apply {
  text {
    clipboard clear
    clipboard append\
      [visibleText $text {*}[$text tag ranges sel]]
  }
} %W]
bind .t <<Copy>> +break

bind .t <<Paste>> {
  if {
    ![catch {
      genderTextToWin\
        [string trim\
        [subst -nocommand -novariable\
        [regsub -all {([^\n])\n([^\n])}\
        [subst -nocommand -novariable [clipboard get]]\
        {\1\n\n\2}]]] %W
    }]
  } break
}

after 100 "event generate .t <<Paste>>"

Perhaps not that elegance, but that is simply impossible for processing human language. Contenance, mon cher.