[dbohdan] 2018-03-26: The following module lets you find the first ''N'' words two strings have in common, with ''words'' defined as sequences of non-separator characters. The separators may be any characters understood by `[regexp]`. ** See also ** * `sepsplit` in [Sqawk] * `[struct::list] longestCommonSubsequence` * `[textutil]::longestCommonPrefix` ** Code ** Download with [wiki-reaper]: `wiki-reaper -x 55229 0 | tee common-word-prefix.tcl` ====== #! /usr/bin/env tclsh package require Tcl 8.5 namespace eval ::cwp { variable version 0.2.0 } proc ::cwp::common-word-prefix {s1 s2 {sep {\s+}} {includeTailSep 0}} { if {[string length $s2] > [string length $s1]} { set t $s2 set s2 $s1 set s1 $t unset t } set offset 0 set tailSepLength 0 while 1 { lassign [read-word $s1 $offset $sep] label1 \ matchedFramement1 \ matchedSep1 lassign [read-word $s2 $offset $sep] label2 \ matchedFramement2 \ matchedSep2 # Handle fragments. if {$matchedFramement1 ne $matchedFramement2} { break } set fragmentLength [string length $matchedFramement1] incr offset $fragmentLength if {$fragmentLength > 0} { set tailSepLength 0 } # Handle separators. if {$matchedSep1 ne $matchedSep2} { break } incr tailSepLength [string length $matchedSep1] incr offset [string length $matchedSep1] # Handle string end. if {$label1 eq {END} || $label2 eq {END}} { break } } if {!$includeTailSep} { incr offset -$tailSepLength } return [string range $s1 0 $offset-1] } proc ::cwp::read-word {s offset sep} { if {[regexp -indices -start $offset -- $sep $s match]} { lassign $match start end set matchedFramement [string range $s $offset $start-1] set matchedSep [string range $s $start $end] set label MORE } else { set matchedFramement [string range $s $offset end] set matchedSep {} set label END } return [list $label $matchedFramement $matchedSep] } proc ::cwp::replace-prefix {prefix s {replacement { }}} { set prefixLen [string length $prefix] set replacementLen [string length $replacement] set repeats [expr { $replacementLen > 0 ? $prefixLen / $replacementLen + 1 : 0 }] set newPrefix [string range [string repeat $replacement $repeats] \ 0 \ $prefixLen-1] return $newPrefix[string range $s $prefixLen end] } # If this is the main script, run the tests. if {[info exists argv0] && ([file tail [info script]] eq [file tail $argv0])} { package require tcltest namespace path ::cwp if {$argv ne {}} { tcltest::configure -match $argv } tcltest::test common-word-prefix-1.1 {simple case} -body { common-word-prefix {hello world 1} {hello world 2} } -result {hello world} tcltest::test common-word-prefix-1.2 {all the same words} -body { list [common-word-prefix foo foo] \ [common-word-prefix {hello world} {hello world}] } -result {foo {hello world}} tcltest::test common-word-prefix-1.3 {all different words} -body { list [common-word-prefix foo bar] \ [common-word-prefix {foo bar} {baz qux}] } -result {{} {}} tcltest::test common-word-prefix-1.4 {words sharing a prefix} -body { list [common-word-prefix foo food] \ [common-word-prefix fool food] \ [common-word-prefix {hello world alpha} {hello world aleph}] } -result {{} {} {hello world}} tcltest::test common-word-prefix-1.5 {different length} -body { list [common-word-prefix {foo bar baz} foo] \ [common-word-prefix {foo bar baz} {foo bar}] \ [common-word-prefix {foo bar baz} {foo bar }] \ [common-word-prefix {foo bar } {foo bar baz}] \ [common-word-prefix {foo bar} {foo bar baz}] \ [common-word-prefix foo {foo bar baz}] } -result {foo {foo bar} {foo bar} {foo bar} {foo bar} foo} tcltest::test common-word-prefix-1.6 includeTailSep -body { list [common-word-prefix {hello world 1} {hello world 2} { } 0] \ [common-word-prefix {hello world 1} {hello world 2} { } 1] \ [common-word-prefix hello-world-1 hello-world-2 - 0] \ [common-word-prefix hello-world-1 hello-world-2 - 1] } -result {{hello world} {hello world } hello-world hello-world-} tcltest::test common-word-prefix-1.7 whitespace-1 -body { list [common-word-prefix {foo bar 1} {foo bar } { } 0] \ [common-word-prefix {foo bar 1} {foo bar } { } 1] \ [common-word-prefix { foo bar 1} { foo bar } { } 0] \ [common-word-prefix { foo bar 1} { foo bar } { } 1] } -result {{foo bar} {foo bar } { foo bar} { foo bar }} tcltest::test replace-prefix-1.1 {default use} -body { list [replace-prefix {} {}] \ [replace-prefix foo foo] \ [replace-prefix {foo bar} {foo bar baz}] } -result {{} { } { baz}} tcltest::test replace-prefix-1.2 pattern -body { list [replace-prefix {} {} 12345] \ [replace-prefix foo foo 12345] \ [replace-prefix {foo bar } {foo bar baz} 12345] } -result {{} 123 12345123baz} # Exit with a nonzero status if there are failed tests. set failed [expr {$tcltest::numTests(Failed) > 0}] tcltest::cleanupTests if {$failed} { exit 1 } } ====== ** Use example ** *** Code *** ====== source common-word-prefix.tcl proc log text { set message "[clock format [clock seconds] -gmt 1] $text" set cwp [::cwp::common-word-prefix $::prevLogMessage $message {\s} 1] puts stderr [::cwp::replace-prefix $cwp $message $::logPlaceholder] set ::prevLogMessage $message } foreach logPlaceholder {{ } -=} { set prevLogMessage {} log "frobnicating file /foo/bar" log "frobnicating file /foo/baz" log "frobnicating file /foo/qux" } ====== *** Output *** ======none Mon Mar 26 16:37:34 GMT 2018 frobnicating file /foo/bar /foo/baz /foo/qux Mon Mar 26 16:37:34 GMT 2018 frobnicating file /foo/bar -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-/foo/baz -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-/foo/qux ====== <>Package | String Processing | Word and Text Processing