dbohdan 2018-03-26: The following module lets you find the longest common word prefix of two strings, which is to say, the first N words the strings have in common. For our purposes words are defined as string fragments separated by separators. A separator is a string that matches a given regular expression understood by regexp.
Download with wiki-reaper: wiki-reaper -x 55230 0 | tee lcwp.tcl
#! /usr/bin/env tclsh # Copyright (c) 2018, 2021, 2024 D. Bohdan # License: MIT package require Tcl 8.5 9 namespace eval ::lcwp { variable version 0.3.0 interp alias {} lcwp {} longest-common-word-prefix } proc ::lcwp::longest-common-word-prefix { s1 s2 {sep {\s+}} {includeTailSep 0} } { if {[string length $s2] > [string length $s1]} { set t $s2 set s2 $s1 set s1 $t unset t } set offset 0 set tailSepLength 0 while 1 { lassign [read-word $s1 $offset $sep] label1 \ matchedFramement1 \ matchedSep1 lassign [read-word $s2 $offset $sep] label2 \ matchedFramement2 \ matchedSep2 # Handle fragments. if {$matchedFramement1 ne $matchedFramement2} { break } set fragmentLength [string length $matchedFramement1] incr offset $fragmentLength if {$fragmentLength > 0} { set tailSepLength 0 } # Handle separators. if {$matchedSep1 ne $matchedSep2} { break } incr tailSepLength [string length $matchedSep1] incr offset [string length $matchedSep1] # Handle string end. if {$label1 eq {END} || $label2 eq {END}} { break } } if {!$includeTailSep} { incr offset -$tailSepLength } return [string range $s1 0 $offset-1] } proc ::lcwp::read-word {s offset sep} { if {[regexp -indices -start $offset -- $sep $s match]} { lassign $match start end set matchedFramement [string range $s $offset $start-1] set matchedSep [string range $s $start $end] set label MORE } else { set matchedFramement [string range $s $offset end] set matchedSep {} set label END } return [list $label $matchedFramement $matchedSep] } proc ::lcwp::replace-prefix {prefix s {replacement { }}} { set prefixLen [string length $prefix] set replacementLen [string length $replacement] set repeats [expr { $replacementLen > 0 ? $prefixLen / $replacementLen + 1 : 0 }] set newPrefix [string range [string repeat $replacement $repeats] \ 0 \ $prefixLen-1] return $newPrefix[string range $s $prefixLen end] } # If this is the main script, run the tests. if {[info exists argv0] && ([file tail [info script]] eq [file tail $argv0])} { package require tcltest namespace path ::lcwp tcltest::test common-word-prefix-1.1 {simple case} -body { lcwp {hello world 1} {hello world 2} } -result {hello world} tcltest::test common-word-prefix-1.2 {all the same words} -body { list [lcwp foo foo] \ [lcwp {hello world} {hello world}] } -result {foo {hello world}} tcltest::test common-word-prefix-1.3 {all different words} -body { list [lcwp foo bar] \ [lcwp {foo bar} {baz qux}] } -result {{} {}} tcltest::test common-word-prefix-1.4 {words sharing a prefix} -body { list [lcwp foo food] \ [lcwp fool food] \ [lcwp {hello world alpha} {hello world aleph}] } -result {{} {} {hello world}} tcltest::test common-word-prefix-1.5 {different length} -body { list [lcwp {foo bar baz} foo] \ [lcwp {foo bar baz} {foo bar}] \ [lcwp {foo bar baz} {foo bar }] \ [lcwp {foo bar } {foo bar baz}] \ [lcwp {foo bar} {foo bar baz}] \ [lcwp foo {foo bar baz}] } -result {foo {foo bar} {foo bar} {foo bar} {foo bar} foo} tcltest::test common-word-prefix-1.6 includeTailSep -body { list [lcwp {hello world 1} {hello world 2} { } 0] \ [lcwp {hello world 1} {hello world 2} { } 1] \ [lcwp hello-world-1 hello-world-2 - 0] \ [lcwp hello-world-1 hello-world-2 - 1] } -result {{hello world} {hello world } hello-world hello-world-} tcltest::test common-word-prefix-1.7 whitespace-1 -body { list [lcwp {foo bar 1} {foo bar } { } 0] \ [lcwp {foo bar 1} {foo bar } { } 1] \ [lcwp { foo bar 1} { foo bar } { } 0] \ [lcwp { foo bar 1} { foo bar } { } 1] } -result {{foo bar} {foo bar } { foo bar} { foo bar }} tcltest::test replace-prefix-1.1 {default use} -body { list [replace-prefix {} {}] \ [replace-prefix foo foo] \ [replace-prefix {foo bar} {foo bar baz}] } -result {{} { } { baz}} tcltest::test replace-prefix-1.2 pattern -body { list [replace-prefix {} {} 12345] \ [replace-prefix foo foo 12345] \ [replace-prefix {foo bar } {foo bar baz} 12345] } -result {{} 123 12345123baz} # Exit with a nonzero status if there are failed tests. set failed [expr {$tcltest::numTests(Failed) > 0}] tcltest::cleanupTests if {$failed} { exit 1 } }
source lcwp.tcl proc log text { set message "[clock format [clock seconds] -gmt 1] $text" set prefix [::lcwp::longest-common-word-prefix $::prevLogMessage \ $message \ {\s} \ 1] puts stderr [::lcwp::replace-prefix $prefix $message $::logPlaceholder] set ::prevLogMessage $message } foreach logPlaceholder {{ } -=} { set prevLogMessage {} log "frobnicating file /foo/bar" log "frobnicating file /foo/baz" log "frobnicating file /foo/qux" }
Mon Mar 26 16:37:34 GMT 2018 frobnicating file /foo/bar /foo/baz /foo/qux Mon Mar 26 16:37:34 GMT 2018 frobnicating file /foo/bar -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-/foo/baz -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-/foo/qux