Version 3 of Search Engine in Javascript

Updated 2006-09-17 09:00:04

if false {

A colleage of mine publishes technical sheets not only on our Intranet but also on CD-ROM.

Last friday, she asked me how to realise the search page on CD-ROM. I answered: maybe there's the ASP, but no little dwarf to work out the ASP, so no way ...

But, next day was saturday ... maybe the wheather ... I found a solution in Javascript. It is a single file search.js to be sourced. Here the source:


 proc echo args {puts $args}

 proc cat {file} {
     # return contents of $file
     set port [open $file]
     set contents [read $port]
     close $port
     set contents

 if {$argv ne ""} then {
     set startFile [lindex $argv 0]
 } else {
     set startFile [lindex [glob *.htm*] 0]

 proc relPathFromTo {fromDir toDir} {
     # return path string relative from $fromDir to $toDir.
     # $fromDir is assumed to be a directory (not a file).
     set from [file normalize $fromDir]
     set to [file normalize $toDir]
     if {$::tcl_platform(platform) eq "windows"} {
         set driveMap {
             a: A: b: B: c: C: d: D: e: E: f: F: g: 
             G: h: H: i: I: j: J: k: K: l: L: m: M:
             n: N: o: O: p: P: q: Q: r: R: s: S: t:
             T: u: U: v: V: w: W: x: X: y: Y: z: Z:
         regexp {^[a-zA-Z]:} [pwd] drive
         if {![regexp {^[a-zA-Z]:} $from]} {
             set from $drive$from
         set from [string map $driveMap $from]
         if {![regexp {^[a-zA-Z]:} $to]} {
             set to $drive$to
         set to [string map $driveMap $to]
     set fromList [file split $from]
     set fromLength [llength $fromList]
     set toList [file split $to]
     set toLength [llength $toList]
     set commonList {}
     foreach path1 $fromList path2 $toList {
         if {$path1 ne $path2} {
         } else {
             lappend commonList $path1
     set commonLength [llength $commonList]
     set fromList1 [lrange $fromList $commonLength end]
     set toList1 [lrange $toList $commonLength end]
     set resultList {}
     foreach i $fromList1 {
         lappend resultList ..
     eval lappend resultList $toList1
     if {$resultList ne {}} {
         eval file join $resultList

 array set database {}
 set titles {}

 proc parseFile file {
     variable startFile
     variable links
     variable database
     variable titles
     if {$startFile eq $file} then {
         set links {}
         array unset database
         array set database {}
     set startDir [file dirname $startFile]
     set file [file normalize $file]
     set myPath [relPathFromTo [file normalize $startDir] $file]
     if {[lsearch $links $myPath] >= 0} then {
     echo processing $myPath
     lappend links $myPath
     set myDir [file dirname $myPath]
     set contents [cat $file]
     # title
     if {[regexp -nocase {<title>[^<]+</title>} $contents title]} then {
         set title [regsub -all { *</?title> *} $title ""]
     } else {
         set title [file rootname [file tail $file]]
     lappend titles $title
     # words
     set contents1 [regsub -all -- {<.*?>} $contents ""]
     set contents2 [string map {
         &auml;  ä
         &ouml;  ö
         &uuml;  ü
         &szlig; ß
         &Auml;  Ä
         &Ouml;  Ö
         &Uuml;  Ü
     } $contents1]
     set contents3 [regsub -all -- {[^[:alnum:]]+} $contents2 " "]
     set words {}
     foreach word [split $contents3] {
         if {$word ne ""} then {
             if {[lsearch $words $word] < 0} then {
                 lappend words [string tolower $word]
     array set database {}
     foreach word $words {
         if {![info exists database($word)]
             [lsearch $database($word) $myPath] < 0} then {
             lappend database($word) $myPath
     # links verfolgen
     foreach src [regexp -nocase -inline -all\
                      {<a [^>]*?href=['"][^:?]+["']>} $contents] {
         set href [regexp -inline {(?:href=".*"|href='.*')} $src]
         set target\
             [file normalize\
                  [file join  $myDir\
                       [string trim [string range $href 7 end-1] '\"\\]]]
         if {[file exists $target]} then {
             parseFile $target
         } else {
             puts stdout [list not found: $target]
     array get database

 parseFile $startFile

 set src {var files = }
 append src {[} \n\t\" [join $links \",\n\t\"] \"\n {]} \n\n\
     {var titles = }\
     {[} \n\t\" [join $titles \",\n\t\"] \"\n {]} \n\n\
     {var database = }  \{
 foreach key [array names database] {
     append src \n\t\" $key \": " " \[
     set indices {}
     foreach target $database($key) {
         lappend indices [lsearch $links $target]
     append src [join $indices ", "]
     append src \],
 # remove trailing comma ...
 set src [string range $src 0 end-1]
 append src \n \}

 set out [open search.js w]
 puts $out $src

 puts $out {
 // from here on fixed javascript

 // arrayContainsElement (arr, el)
 // return true if el is element of arr

 function arrayContainsElement (arr, el)
     for (var i in arr) if (arr[i] == el) return true
     return false

 // commonElementsOf (arr1, arr2)
 // return new array containing elements which are common in array

 function commonElementsOf (arr1, arr2)
     var result=[]
     for (var i in arr1)
         var el = arr1[i]
         if (arrayContainsElement (arr2, el)) result .push (el)
     return result

 var formFields = decodeURI (location .search) .slice (1) .split("&")

 var queryList = []
 var lang = "de"

 for (var i in formFields)
     var keyVal = formFields[i] .split("=")
     var key = keyVal[0]
     var val = keyVal[1]
     if (key == "query") {
         queryList = val .toLowerCase() .split("+")
         // if there should be the search form on position 0 ...
         if (document .forms .length && document .forms[0] .query) 
             document .forms [0] .query .value = val .replace ("+", " ")
     if (key == "lang") lang = val

 // write some feedback to HTML

 if (lang == "de")
     document .write ("<p>Suchergebnis f&uuml;r ",
                      queryList .toString() .replace(",", ", "),
     document .write ("<p>Search results for ",
                      queryList .toString() .replace(",", ", "),

 var resultList = []

 for (var i in queryList) {
     if (i == 0)
         resultList = database[queryList[i]]
     else if (queryList[i] != "")
         resultList = commonElementsOf(resultList, database[queryList[i]])

 if (resultList && resultList .length)
     document .write( "<ol>")
     for (var i in resultList)
         var result = resultList[i]
         document .write ("\n<li><a href='", files[result], "'>",
                          titles[result], "</a></li>")
     document .write( "</ol>")
     if (lang == "de") document .write ("<p>Nichts gefunden</p>")
     else document .write ("<p>No match</p>")

 close $out

 if false {

Here the contents of the search page:

      <input type="text" name="query" />
      <input type="submit" name="suche" />
    <script type="text/javascript" src="search.js"></script>


  1. cd to the directory where the start page of your site resides.
  2. start the script with the start page as argument, e.g. "tclsh makeSearch.tcl index.htm"
  3. Put the search above, e.g. named search.htm, into the same directory.

From now on, you can "google" your private site. Funny.