Version 0 of Search Engine in Javascript

Updated 2006-09-17 08:43:32

if false {

A colleage of mine publishes technical sheets not only on our Intranet but also on CD-ROM.

Last friday, she asked me how to realise the search page on CD-ROM. I answered: maybe there's the ASP, but no little dwarf to work out the ASP, so no way ...

But, next day was saturday ... maybe the wheather ... I found a solution in Javascript. It is a single file search.js to be sourced. Here the source:

 }

 proc echo args {puts $args}

 proc cat {file} {
     # return contents of $file
     set port [open $file]
     set contents [read $port]
     close $port
     set contents
 }

 if {$argv ne ""} then {
     set startFile [lindex $argv 0]
 } else {
     set startFile [lindex [glob *.htm*] 0]
 }

 proc relPathFromTo {fromDir toDir} {
     # return path string relative from $fromDir to $toDir.
     # $fromDir is assumed to be a directory (not a file).
     set from [file normalize $fromDir]
     set to [file normalize $toDir]
     if {$::tcl_platform(platform) eq "windows"} {
         set driveMap {
             a: A: b: B: c: C: d: D: e: E: f: F: g: 
             G: h: H: i: I: j: J: k: K: l: L: m: M:
             n: N: o: O: p: P: q: Q: r: R: s: S: t:
             T: u: U: v: V: w: W: x: X: y: Y: z: Z:
         }
         regexp {^[a-zA-Z]:} [pwd] drive
         if {![regexp {^[a-zA-Z]:} $from]} {
             set from $drive$from
         }
         set from [string map $driveMap $from]
         if {![regexp {^[a-zA-Z]:} $to]} {
             set to $drive$to
         }
         set to [string map $driveMap $to]
     }
     set fromList [file split $from]
     set fromLength [llength $fromList]
     set toList [file split $to]
     set toLength [llength $toList]
     set commonList {}
     foreach path1 $fromList path2 $toList {
         if {$path1 ne $path2} {
             break
         } else {
             lappend commonList $path1
         }
     }
     set commonLength [llength $commonList]
     set fromList1 [lrange $fromList $commonLength end]
     set toList1 [lrange $toList $commonLength end]
     set resultList {}
     foreach i $fromList1 {
         lappend resultList ..
     }
     eval lappend resultList $toList1
     if {$resultList ne {}} {
         eval file join $resultList
     }
 }

 array set database {}
 set titles {}

 proc parseFile file {
     variable startFile
     variable links
     variable database
     variable titles
     if {$startFile eq $file} then {
         set links {}
         array unset database
         array set database {}
     }
     set startDir [file dirname $startFile]
     set file [file normalize $file]
     set myPath [relPathFromTo [file normalize $startDir] $file]
     if {[lsearch $links $myPath] >= 0} then {
         return
     }
     echo processing $myPath
     lappend links $myPath
     set myDir [file dirname $myPath]
     set contents [cat $file]
     # title
     if {[regexp -nocase {<title>[^<]+</title>} $contents title]} then {
         set title [regsub -all { *</?title> *} $title ""]
     } else {
         set title [file rootname [file tail $file]]
     }
     lappend titles $title
     # words
     set contents1 [regsub -all -- {<.*?>} $contents ""]
     set contents2 [string map {
         &auml;  -bä -A
         &ouml;  -bö -A
         &uuml;  -bü -A
         &szlig;  -bß -A
         &Auml;  -bÄ -A
         &Ouml;  -bÖ -A
         &Uuml;  -bÜ -A
     } $contents1]
     set contents3 [regsub -all -- {[^[:alnum:]]+} $contents2 " "]
     set words {}
     foreach word [split $contents3] {
         if {$word ne ""} then {
             if {[lsearch $words $word] < 0} then {
                 lappend words [string tolower $word]
             }
         }
     }
     array set database {}
     foreach word $words {
         if {![info exists database($word)]
             ||
             [lsearch $database($word) $myPath] < 0} then {
             lappend database($word) $myPath
         }
     }
     # links verfolgen
     foreach src [regexp -nocase -inline -all\
                      {<a [^>]*?href=['"][^:?]+["']>} $contents] {
         set href [regexp -inline {(?:href=".*"|href='.*')} $src]
         set target\
             [file normalize\
                  [file join  $myDir\
                       [string trim [string range $href 7 end-1] '\"\\]]]
         if {[file exists $target]} then {
             parseFile $target
         } else {
             puts stdout [list not found: $target]
         }
     }
     array get database
 }

 parseFile $startFile

 set src {var files = }
 append src {[} \n\t\" [join $links \",\n\t\"] \"\n {]} \n\n\
     {var titles = }\
     {[} \n\t\" [join $titles \",\n\t\"] \"\n {]} \n\n\
     {var database = }  \{
 foreach key [array names database] {
     append src \n\t\" $key \": " " \[
     set indices {}
     foreach target $database($key) {
         lappend indices [lsearch $links $target]
     }
     append src [join $indices ", "]
     append src \],
 }
 # remove trailing comma ...
 set src [string range $src 0 end-1]
 append src \n \}

 set out [open search.js w]
 puts $out $src

 puts $out {
 // from here on fixed javascript

 // arrayContainsElement (arr, el)
 // return true if el is element of arr

 function arrayContainsElement (arr, el)
 {
     for (var i in arr) if (arr[i] == el) return true
     return false
 }

 // commonElementsOf (arr1, arr2)
 // return new array containing elements which are common in array

 function commonElementsOf (arr1, arr2)
 {
     var result=[]
     for (var i in arr1)
     {
         var el = arr1[i]
         if (arrayContainsElement (arr2, el)) result .push (el)
     }
     return result
 }

 var formFields = decodeURI (location .search) .slice (1) .split("&")

 var queryList = []
 var lang = "de"

 for (var i in formFields)
 {
     var keyVal = formFields[i] .split("=")
     var key = keyVal[0]
     var val = keyVal[1]
     if (key == "query") {
         queryList = val .toLowerCase() .split("+")
         // if there should be the search form on position 0 ...
         if (document .forms .length && document .forms[0] .query) 
         {
             document .forms [0] .query .value = val .replace ("+", " ")
         }
     }
     if (key == "lang") lang = val
 }

 // write some feedback to HTML

 if (lang == "de")
     document .write ("<p>Suchergebnis f&uuml;r ",
                      queryList .toString() .replace(",", ", "),
                      ":</p>")
 else
     document .write ("<p>Search results for ",
                      queryList .toString() .replace(",", ", "),
                      ":</p>")

 var resultList = []

 for (var i in queryList) {
     if (i == 0)
         resultList = database[queryList[i]]
     else if (queryList[i] != "")
         resultList = commonElementsOf(resultList, database[queryList[i]])
 }

 if (resultList && resultList .length)
 {
     document .write( "<ol>")
     for (var i in resultList)
         var result = resultList[i]
         document .write ("\n<li><a href='", files[result], "'>",
                          titles[result], "</a></li>")
 }
 else 
 {
     if (lang == "de") document .write ("<p>Nichts gefunden</p>")
     else document .write ("<p>No match</p>")
 }
 }

 close $out

 if false {

Here the contents of the search page:

 <html>
  <head>
    <title>Suchseite</title>
  </head>
  <body>
    <h1>Suchseite</h1>
    <form>
      <input type="text" name="query" />
      <input type="submit" name="suche" />
      <p>
      <textarea
        name="in"
        onchange="this.form.out.value=eval(value)"
        cols="25"
        rows="5"></textarea>
      <textarea
        name="out"
        cols="25"
        rows="5"></textarea>
      </p>
    </form>
    <script type="text/javascript" src="search.js"></script>
  </body>
 </html>

 }

 }