[dbohdan] 2018-03-26: The following module lets you find the first ''N'' words two strings have in common, with ''words'' defined as sequences of non-separator characters. The separators may be any characters understood by `[regexp]`. ** See also ** * `sepsplit` in [Sqawk] * `[struct::list] longestCommonSubsequence` * `[textutil]::longestCommonPrefix` ** Code ** Download with [wiki-reaper]: `wiki-reaper -x 55229 0 | tee common-word-prefix.tcl` ====== #! /usr/bin/env tclsh # Copyright (c) 2018 dbohdan # License: MIT package require Tcl 8.5 namespace eval ::cwp { variable version 0.1.0 } proc ::cwp::common-word-prefix {s1 s2 {sep {\s}} {includeLastSep 0}} { if {[string length $s2] > [string length $s1]} { set t $s2 set s2 $s1 set s1 $t unset t } set commonUpTo -1 set offset 0 set reWord [format {[^%s]+} $sep] set reSep [format {%s+} $sep] while 1 { if {!([regexp -indices -start $offset -- $reWord $s1 wordMatch] && [string range $s1 {*}$wordMatch] eq [string range $s2 {*}$wordMatch])} { break } lassign $wordMatch _ offset if {!([regexp -indices -start $offset -- $reSep $s1 sepMatch] && [string range $s1 {*}$sepMatch] eq [string range $s2 {*}$sepMatch])} { if {$offset == [string length $s2] - 1} { set commonUpTo $offset } break } lassign $sepMatch _ offset set commonUpTo [lindex [expr { $includeLastSep ? $sepMatch : $wordMatch }] 1] } if {$commonUpTo == -1} { return {} } else { return [string range $s1 0 $commonUpTo] } } proc ::cwp::replace-prefix {prefix s {replacement { }}} { set prefixLen [string length $prefix] set replacementLen [string length $replacement] set repeats [expr { $replacementLen > 0 ? $prefixLen / $replacementLen + 1: 0 }] set newPrefix [string range [string repeat $replacement $repeats] \ 0 \ $prefixLen-1] return $newPrefix[string range $s $prefixLen end] } # If this is the main script, run the tests. if {[info exists argv0] && ([file tail [info script]] eq [file tail $argv0])} { package require tcltest namespace path ::cwp tcltest::test common-word-prefix-1.1 {simple case} \ -body { common-word-prefix {hello world 1} {hello world 2} } -result {hello world} tcltest::test common-word-prefix-1.2 {all the same words} \ -body { list [common-word-prefix foo foo] \ [common-word-prefix {hello world} {hello world}] } -result {foo {hello world}} tcltest::test common-word-prefix-1.3 {all different words} \ -body { list [common-word-prefix foo bar] \ [common-word-prefix {foo bar} {baz qux}] } -result {{} {}} tcltest::test common-word-prefix-1.4 {words sharing a prefix} \ -body { list [common-word-prefix foo food] \ [common-word-prefix fool food] \ [common-word-prefix {hello world alpha} {hello world aleph}] } -result {{} {} {hello world}} tcltest::test common-word-prefix-1.5 {different length} \ -body { list [common-word-prefix {foo bar baz} foo] \ [common-word-prefix {foo bar baz} {foo bar}] \ [common-word-prefix {foo bar baz} {foo bar }] \ [common-word-prefix {foo bar } {foo bar baz}] \ [common-word-prefix {foo bar} {foo bar baz}] \ [common-word-prefix foo {foo bar baz}] } -result {foo {foo bar} {foo bar} {foo bar} {foo bar} foo} tcltest::test common-word-prefix-1.6 includeLastSep \ -body { list [common-word-prefix {hello world 1} {hello world 2} { } 0] \ [common-word-prefix {hello world 1} {hello world 2} { } 1] \ [common-word-prefix hello-world-1 hello-world-2 - 0] \ [common-word-prefix hello-world-1 hello-world-2 - 1] } -result {{hello world} {hello world } hello-world hello-world-} tcltest::test replace-prefix-1.1 {default use} \ -body { list [replace-prefix {} {}] \ [replace-prefix foo foo] \ [replace-prefix {foo bar} {foo bar baz}] } -result {{} { } { baz}} tcltest::test replace-prefix-1.2 pattern \ -body { list [replace-prefix {} {} 12345] \ [replace-prefix foo foo 12345] \ [replace-prefix {foo bar } {foo bar baz} 12345] } -result {{} 123 12345123baz} # Exit with a nonzero status if there are failed tests. set failed [expr {$tcltest::numTests(Failed) > 0}] tcltest::cleanupTests if {$failed} { exit 1 } } ====== ** Use example ** *** Code *** ====== source common-word-prefix.tcl proc log text { set message "[clock format [clock seconds] -gmt 1] $text" set cwp [::cwp::common-word-prefix $::prevLogMessage $message {\s} 1] puts stderr [::cwp::replace-prefix $cwp $message $::logPlaceholder] set ::prevLogMessage $message } foreach logPlaceholder {{ } -=} { set prevLogMessage {} log "frobnicating file /foo/bar" log "frobnicating file /foo/baz" log "frobnicating file /foo/qux" } ====== *** Output *** ======none Mon Mar 26 16:37:34 GMT 2018 frobnicating file /foo/bar /foo/baz /foo/qux Mon Mar 26 16:37:34 GMT 2018 frobnicating file /foo/bar -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-/foo/baz -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-/foo/qux ====== <>Package | String Processing | Word and Text Processing