Version 0 of Common word prefix

Updated 2018-03-26 16:55:31 by dbohdan

dbohdan 2018-03-26: The following module lets you find the first N words two strings have in common, with words defined as sequences of non-separator characters.

See also

Code

Download with wiki-reaper: wiki-reaper -x 55229 0 | tee common-word-prefix.tcl

#! /usr/bin/env tclsh
# Copyright (c) 2018 dbohdan
# License: MIT
package require Tcl 8.5

namespace eval ::cwp {
    variable version 0.1.0
}

proc ::cwp::common-word-prefix {s1 s2 {sep {\s}} {includeLastSep 0}} {
    if {[string length $s2] > [string length $s1]} {
        set t $s2
        set s2 $s1
        set s1 $t
        unset t
    }
    set commonUpTo -1
    set offset 0

    set reWord [format {[^%s]+} $sep]
    set reSep [format {%s+} $sep]

    while 1 {
        if {!([regexp -indices -start $offset -- $reWord $s1 wordMatch] &&
              [string range $s1 {*}$wordMatch] eq
              [string range $s2 {*}$wordMatch])} {
            break
        }
        lassign $wordMatch _ offset
        if {!([regexp -indices -start $offset -- $reSep $s1 sepMatch] &&
              [string range $s1 {*}$sepMatch] eq
              [string range $s2 {*}$sepMatch])} {
            if {$offset == [string length $s2] - 1} {
                set commonUpTo $offset
            }
            break
        }
        lassign $sepMatch _ offset
        set commonUpTo [lindex [expr {
            $includeLastSep ? $sepMatch : $wordMatch
        }] 1]
    }
    if {$commonUpTo == -1} {
        return {}
    } else {
        return [string range $s1 0 $commonUpTo]   
    }
}

proc ::cwp::replace-prefix {prefix s {replacement { }}} {
    set prefixLen [string length $prefix]
    set replacementLen [string length $replacement]
    set repeats [expr {
        $replacementLen > 0 ?
        $prefixLen / $replacementLen + 1:
        0
    }]
    set newPrefix [string range [string repeat $replacement $repeats] \
                                0 \
                                $prefixLen-1]
    return $newPrefix[string range $s $prefixLen end]
}

# If this is the main script, run tests.
if {[info exists argv0] && ([file tail [info script]] eq [file tail $argv0])} {
    package require tcltest
    namespace path ::cwp

    tcltest::test common-word-prefix-1.1 {simple case} \
                -body {
        common-word-prefix {hello world 1} {hello world 2}
    } -result {hello world}

    tcltest::test common-word-prefix-1.2 {all the same words} \
                -body {
        list [common-word-prefix foo foo] \
             [common-word-prefix {hello world} {hello world}]
    } -result {foo {hello world}}

    tcltest::test common-word-prefix-1.3 {all different words} \
                -body {
        list [common-word-prefix foo bar] \
             [common-word-prefix {foo bar} {baz qux}]
    } -result {{} {}}

    tcltest::test common-word-prefix-1.4 {words sharing a prefix} \
                -body {
        list [common-word-prefix foo food] \
             [common-word-prefix fool food] \
             [common-word-prefix {hello world alpha} {hello world aleph}]
    } -result {{} {} {hello world}}

    tcltest::test common-word-prefix-1.5 {different length} \
                -body {
        list [common-word-prefix {foo bar baz} foo] \
             [common-word-prefix {foo bar baz} {foo bar}] \
             [common-word-prefix {foo bar baz} {foo bar }] \
             [common-word-prefix {foo bar } {foo bar baz}] \
             [common-word-prefix {foo bar} {foo bar baz}] \
             [common-word-prefix foo {foo bar baz}]
    } -result {foo {foo bar} {foo bar} {foo bar} {foo bar} foo}

    tcltest::test common-word-prefix-1.6 includeLastSep \
                -body {
        list [common-word-prefix {hello world 1} {hello world 2} { } 0] \
             [common-word-prefix {hello world 1} {hello world 2} { } 1] \
             [common-word-prefix hello-world-1 hello-world-2 - 0] \
             [common-word-prefix hello-world-1 hello-world-2 - 1]
    } -result {{hello world} {hello world } hello-world hello-world-}

    tcltest::test replace-prefix-1.1 {default use} \
                -body {
        list [replace-prefix {} {}] \
             [replace-prefix foo foo] \
             [replace-prefix {foo bar} {foo bar baz}]
    } -result {{} {   } {        baz}}

    tcltest::test replace-prefix-1.2 pattern \
                -body {
        list [replace-prefix {} {} 12345] \
             [replace-prefix foo foo 12345] \
             [replace-prefix {foo bar } {foo bar baz} 12345]
    } -result {{} 123 12345123baz}

    # Exit with a nonzero status if there are failed tests.
    set failed [expr {$tcltest::numTests(Failed) > 0}]

    tcltest::cleanupTests
    if {$failed} {
        exit 1
    }
}

Use example

Code

source common-word-prefix.tcl

proc log text {
    set message "[clock format [clock seconds] -gmt 1] $text"
    set cwp [::cwp::common-word-prefix $::prevLogMessage $message {\s} 1]
    puts stderr [::cwp::replace-prefix $cwp $message $::logPlaceholder]
    set ::prevLogMessage $message
}

foreach logPlaceholder {{ } -=} {
    set prevLogMessage {}
    log "frobnicating file /foo/bar"
    log "frobnicating file /foo/baz"
    log "frobnicating file /foo/qux"    
}

Output

Mon Mar 26 16:37:34 GMT 2018 frobnicating file /foo/bar
                                               /foo/baz
                                               /foo/qux
Mon Mar 26 16:37:34 GMT 2018 frobnicating file /foo/bar
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-/foo/baz
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-/foo/qux