Adding text widget tags using regular expressions

Bryan Oakley 28-Dec-2008

The code on this page implements a command that allows you to add text widget tags using regular expressions.This code is "smart", in that it makes use of the event loop to schedule units of work so that it may be used in real time to highlight text without affecting performance.

Using this code it becomes fairly easy to implement a minimalist wiki-like markup language or to implement progressive searching. An example included on this page colorizes Tcl comments and variables, and provides a progressive search box.

package require Tk 8.5

# usage: 
#    tag::tag widget options pattern start end body
#    tag::cancel id
#
#    options may be one of -regexp, -elide, -nocase, -nolinestop, -exact
#    as defined for the search command of text widgets
#
#    start and end are text widget indices which define the range
#    of text in which to search.
#
#    In the script, the following substitutions will be made:
#    %W  path to the text widget
#    %s  the name of a tag that represents the beginning of a match
#    %e  the name of a tag that represents the end of a mach
#
#    tag::tag returns an id which may be passed to the cancel command
#    to cancel any pending actions.
# 
namespace eval tag {
    variable tagid 0
    variable afterid {}
}
    
proc tag::example {} {
    global search
    set search ""

    frame .toolbar
    label .l -text "Search string:"
    entry .e -textvariable ::search
    pack .l -in .toolbar -side left
    pack .e -in .toolbar -side left -fill both -expand true
    bind .e <Any-KeyRelease> [namespace code search]
    text .t -wrap none -background white
    pack .toolbar -side top -fill x
    pack .t -side top -fill both -expand true
    .t insert end "proc tag::tag {[info args tag]} {[info body tag]}"
    .t tag configure comment -foreground firebrick
    .t tag configure string -foreground \#007300
    .t tag configure variable -foreground blue

    tag .t -regexp {(^|;)\s*\#.*$} 1.0 end {
        %W tag add comment %s %e
    }
    tag .t -regexp {".*?"} 1.0 end {
        %W tag add string %s %e
    }
    tag .t -regexp {\${.*?}} 1.0 end {
        %W tag add variable %s %e
    }
    ::tag::tag .t -regexp {\$\w+} 1.0 end {
        %W tag add variable %s %e
    }

    proc search {} {
        variable id 

        if {![info exists id]} {set id ""}
        .t tag configure search -background blue -foreground white
        .t tag remove search 1.0 end
        tag::cancel $id
        if {$::search ne ""} {
            set id [tag::tag .t -exact -- $::search 1.0 end {
                %W tag add search %s %e
            }]
        }
    }
}

proc tag::tag {w args} {

    variable tagid

    incr tagid

    # the goal is to build up a text widget search command based on
    # the passed in arguments along with additional arguments needed
    # by this proc.
    set searchCommand [list $w search -count count]

    set supportedOptions [list -regexp -elide -nocase -nolinestop -exact --]
    while {[string match {-*} [lindex $args 0]]} {
        set arg [lindex $args 0]
        set args [lrange $args 1 end]
        if {$arg eq "--"} break
        set ::supportedOptions $supportedOptions
        if {[lsearch -glob $supportedOptions  $arg*] >= 0} {
            lappend searchCommand $arg
        } else {
            return -code error "bad switch \"$arg\""
        }
    }

    lassign $args pattern start end script
    set startMark _start$tagid
    set endMark _end$tagid

    set script [string map [list %W $w %s $startMark %e $endMark] $script]

    # These need to be in canonical form; if they are a mark they may
    # move as text is inserted or delete.
    set start [$w index $start]
    set end [$w index $end]

    # $startMark - mark that indentifies start of the matched text
    $w mark set $startMark $start
    $w mark gravity $startMark left

    # $endMark - mark that indentifies end of the matched text
    $w mark set $endMark $start
    $w mark gravity $endMark right

    # searchLimit - mark that identifies the end of the search region
    $w mark set searchLimit $end
    $w mark gravity searchLimit right

    lappend searchCommand -- $pattern $endMark searchLimit
            
    _doOneIteration $tagid $w $searchCommand $startMark $endMark $script 10

    return $tagid
}

proc ::tag::cancel {id} {
    variable afterid
    if {$id eq "all"} {
        foreach id [dict values $afterid] {
            after cancel $id
        }
        set afterid {}
    } else {
        if {[dict exists $afterid $id]} {
            after cancel [dict get $afterid $id]
            dict unset afterid $id
        }
    }
}

proc ::tag::_doOneIteration {id w searchCommand startMark endMark script limit} {
    variable afterid

    for {set i 0} {$limit == 0 || $i < $limit} {incr i} {
        dict unset afterid $id
        set index [eval $searchCommand]
        if {[string length $index] == 0} {
            return false
        }
        $w mark set $startMark $index
        $w mark set $endMark [$w index "$index + $count c"]

        uplevel \#0 $script
    }

    # If we get to here the above code did not exit prematurely,
    # meaning there's potentially more search results. Schedule
    # a job to run shortly after becoming idle to do more searching.
    dict set afterid $id \
        [after idle [namespace code [list _reschedule $id [info level 0]]]]
}

proc ::tag::_reschedule {id command} {
    variable afterid
    # it's interesting to change the delay to a large number so
    # you can watch the search progress. Use a small number to get
    # better performance. 
    set delay 10
    dict set afterid $id [after $delay [namespace code $command]]
}