[dbohdan] 2018-03-26: The following module lets you find the first ''N'' words two strings have in common, with ''words'' defined as sequences of non-separator characters. The separators may be any characters understood by `[regexp]`.


** See also **

   * `sepsplit` in [Sqawk]
   * `[struct::list] longestCommonSubsequence`
   * `[textutil]::longestCommonPrefix`


** Code **

Download with [wiki-reaper]: `wiki-reaper -x 55229 0 | tee common-word-prefix.tcl`

======
#! /usr/bin/env tclsh
package require Tcl 8.5

namespace eval ::cwp {
    variable version 0.2.0
}

proc ::cwp::common-word-prefix {s1 s2 {sep {\s+}} {includeTailSep 0}} {
    if {[string length $s2] > [string length $s1]} {
        set t $s2
        set s2 $s1
        set s1 $t
        unset t
    }

    set offset 0
    set tailSepLength 0
    while 1 {
        lassign [read-word $s1 $offset $sep] label1 \
                                             matchedFramement1 \
                                             matchedSep1
        lassign [read-word $s2 $offset $sep] label2 \
                                             matchedFramement2 \
                                             matchedSep2

        # Handle fragments.
        if {$matchedFramement1 ne $matchedFramement2} {
            break
        }
        set fragmentLength [string length $matchedFramement1]
        incr offset $fragmentLength
        if {$fragmentLength > 0} {
            set tailSepLength 0
        }

        # Handle separators.
        if {$matchedSep1 ne $matchedSep2} {
            break
        }
        incr tailSepLength [string length $matchedSep1]
        incr offset [string length $matchedSep1]

        # Handle string end.
        if {$label1 eq {END} || $label2 eq {END}} {
            break
        }
    }
    if {!$includeTailSep} {
        incr offset -$tailSepLength
    }
    return [string range $s1 0 $offset-1]
}

proc ::cwp::read-word {s offset sep} {
    if {[regexp -indices -start $offset -- $sep $s match]} {
        lassign $match start end
        set matchedFramement [string range $s $offset $start-1]
        set matchedSep [string range $s $start $end]
        set label MORE
    } else {
        set matchedFramement [string range $s $offset end]
        set matchedSep {}
        set label END
    }
    return [list $label $matchedFramement $matchedSep]
}

proc ::cwp::replace-prefix {prefix s {replacement { }}} {
    set prefixLen [string length $prefix]
    set replacementLen [string length $replacement]
    set repeats [expr {
        $replacementLen > 0 ?
        $prefixLen / $replacementLen + 1 :
        0
    }]
    set newPrefix [string range [string repeat $replacement $repeats] \
                                0 \
                                $prefixLen-1]
    return $newPrefix[string range $s $prefixLen end]
}

# If this is the main script, run the tests.
if {[info exists argv0] && ([file tail [info script]] eq [file tail $argv0])} {
    package require tcltest
    namespace path ::cwp

    if {$argv ne {}} {
        tcltest::configure -match $argv
    }

    tcltest::test common-word-prefix-1.1 {simple case} -body {
        common-word-prefix {hello world 1} {hello world 2}
    } -result {hello world}

    tcltest::test common-word-prefix-1.2 {all the same words} -body {
        list [common-word-prefix foo foo] \
             [common-word-prefix {hello world} {hello world}]
    } -result {foo {hello world}}

    tcltest::test common-word-prefix-1.3 {all different words} -body {
        list [common-word-prefix foo bar] \
             [common-word-prefix {foo bar} {baz qux}]
    } -result {{} {}}

    tcltest::test common-word-prefix-1.4 {words sharing a prefix} -body {
        list [common-word-prefix foo food] \
             [common-word-prefix fool food] \
             [common-word-prefix {hello world alpha} {hello world aleph}]
    } -result {{} {} {hello world}}

    tcltest::test common-word-prefix-1.5 {different length} -body {
        list [common-word-prefix {foo bar baz} foo] \
             [common-word-prefix {foo bar baz} {foo bar}] \
             [common-word-prefix {foo bar baz} {foo bar }] \
             [common-word-prefix {foo bar } {foo bar baz}] \
             [common-word-prefix {foo bar} {foo bar baz}] \
             [common-word-prefix foo {foo bar baz}]
    } -result {foo {foo bar} {foo bar} {foo bar} {foo bar} foo}

    tcltest::test common-word-prefix-1.6 includeTailSep -body {
        list [common-word-prefix {hello world 1} {hello world 2} { } 0] \
             [common-word-prefix {hello world 1} {hello world 2} { } 1] \
             [common-word-prefix hello-world-1 hello-world-2 - 0] \
             [common-word-prefix hello-world-1 hello-world-2 - 1]
    } -result {{hello world} {hello world } hello-world hello-world-}

    tcltest::test common-word-prefix-1.7 whitespace-1 -body {
        list [common-word-prefix {foo  bar   1} {foo  bar } { } 0] \
             [common-word-prefix {foo  bar   1} {foo  bar } { } 1] \
             [common-word-prefix {  foo  bar   1} {  foo  bar  } { } 0] \
             [common-word-prefix {  foo  bar   1} {  foo  bar  } { } 1]
    } -result {{foo  bar} {foo  bar } {  foo  bar} {  foo  bar  }}

    tcltest::test replace-prefix-1.1 {default use} -body {
        list [replace-prefix {} {}] \
             [replace-prefix foo foo] \
             [replace-prefix {foo bar} {foo bar baz}]
    } -result {{} {   } {        baz}}

    tcltest::test replace-prefix-1.2 pattern -body {
        list [replace-prefix {} {} 12345] \
             [replace-prefix foo foo 12345] \
             [replace-prefix {foo bar } {foo bar baz} 12345]
    } -result {{} 123 12345123baz}

    # Exit with a nonzero status if there are failed tests.
    set failed [expr {$tcltest::numTests(Failed) > 0}]

    tcltest::cleanupTests
    if {$failed} {
        exit 1
    }
}
======


** Use example **

*** Code ***

======
source common-word-prefix.tcl

proc log text {
    set message "[clock format [clock seconds] -gmt 1] $text"
    set cwp [::cwp::common-word-prefix $::prevLogMessage $message {\s} 1]
    puts stderr [::cwp::replace-prefix $cwp $message $::logPlaceholder]
    set ::prevLogMessage $message
}

foreach logPlaceholder {{ } -=} {
    set prevLogMessage {}
    log "frobnicating file /foo/bar"
    log "frobnicating file /foo/baz"
    log "frobnicating file /foo/qux"    
}
======

*** Output ***

======none
Mon Mar 26 16:37:34 GMT 2018 frobnicating file /foo/bar
                                               /foo/baz
                                               /foo/qux
Mon Mar 26 16:37:34 GMT 2018 frobnicating file /foo/bar
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-/foo/baz
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-/foo/qux
======


<<categories>>Package | String Processing | Word and Text Processing