Version 39 of LemonTree branch

Updated 2011-08-14 23:31:47 by Jorge

12 Dec 2009 JM This LemonTree version takes advantage of:

  1. htmlparse::2tree functionality that puts html contents on a tree structure
  2. how well developed LemonTree is that is easy to add new item types (Thanks RS!)

This script could be used as a tool for your web scraping coding if you use htmlparse as the main parsing engine.

usage:
- copy & paste URL from your web browser to the entry, or...
- browse your file system with the "Browse..." button
Then load the html contents with the button "Get html file" and start navigating the tree structure.

WikiDbImage LemonTreeBranch.gif

 #!/bin/sh
 #console show
 package require struct
 package require csv
 package require report
 package require htmlparse
 package require textutil
 package require http
 package require BWidget
 namespace eval LemonTree {variable uniqueID 0}
 
 #http://www.cs.grinnell.edu/~walker/fluency-book/labs/sample-table.html
 
 if 0 {The Tree widget allows a -data item for each node, which I use for a dict-like
 list that contains the node's type and "real name" (as opposed to the "display name"
 - for instance, a dir node would display only its [file tail], but the real name is
 the full path). This routine adds a node to the LemonTree: }
 
  proc LemonTree::add {w parent type name {text ""}} {
     variable uniqueID; variable icon
     if {$parent != "root"} {
     set val1 [::t get $name data]
     set val2 [::t get $name type]
     set val3 [::t index $name]
     if {$text eq ""} {set text "$val3,$val2,[string range $val1 0 20]"}
     }
     set id n[incr uniqueID]
     set data [list type $type name $name]
     #tk_messageBox -message "$type,$name"
     set fill [expr {[string match (* $text]? "blue": "black"}]
     set drawcross [expr {[info proc ::LemonTree::kids($type)] eq ""?
          "never": "allways"}]
     $w insert end $parent $id -text $text -data $data -drawcross $drawcross -fill $fill
     if [info exists icon($type)] {
              $w itemconfigure $id -image $icon($type)
     }
  }
 if 0 {For speed, a Tree isn't drawn fully expanded at the beginning.
 Instead, nodes are opened on demand, when the user clicks on the [+] icon.
 I use the -drawcross "allways" mode (shudder - should be fixed to "always",
 but then older code might break) to indicate that the node hasn't been opened before
 - after the first opening, the mode is set to "auto", meaning to draw a cross only if the node has children. }
 
 proc LemonTree::open {w node} {
     if {[$w itemcget $node -drawcross] eq "allways"} {
         set data [$w itemcget $node -data]
         set type [dict'get $data type]
         foreach {ktype kids} [kids($type) $w $node] {
             foreach kid $kids {add $w $node $ktype $kid}
         }
         $w itemconfigure $node -drawcross auto
     }
  }
 
 if 0 {So far for the generic LemonTree - the rest is already customization for specific item types.
 The kids($type) call above looks like an array element 
 - in fact it's a way of dispatching the generic operation of providing the list of children
 of an entity of given type, which of course depends on the type. For instance, the children
 of a directory are its subdirectories, and then its files (with special-casing for Windows,
 so that drive letters are the children of "/"): }
 
 proc LemonTree::kids(dir) {w node} {
     set name [dict'get [$w itemcget $node -data] name]
     if {$::tcl_platform(platform) eq "windows" && $name eq "/"} {
         return [list dir [file volumes]]
     }
     set dirs  [lsort [glob -nocomplain -type d $name/*]]
     set files [lsort [glob -nocomplain -type f $name/*]]
     list dir $dirs file $files
  }
 
  proc LemonTree::kids(html) {w node} {
     set name [dict'get [$w itemcget $node -data] name]
     list html [::t children $name]
  }
 
 if 0 {Namespaces have a hierarchy, but contain collections of commands and variables as well.
 So I introduced an intermediate layer (parents around the display name make these "meta-children"
 come displayed in blue):}
 
 proc LemonTree::kids(namespace) {w node} {
     list ns-commands (Commands) ns-vars (Variables) ns-children (Children)
  }
  
  proc LemonTree::kids(ns-children) {w node} {
     set ns [dict'get [$w itemcget [$w parent $node] -data] name]
     list namespace [lsort [namespace children $ns]]
  }
  
  proc LemonTree::kids(ns-commands) {w node} {
     set ns [dict'get [$w itemcget [$w parent $node] -data] name]
     list command [lsort [info commands ${ns}::*]]
  }
  
  proc LemonTree::kids(ns-vars) {w node} {
     set ns [dict'get [$w itemcget [$w parent $node] -data] name]
     set res ""
     foreach var [lsort [info vars ${ns}::*]] {
         lappend res [expr {[array exists $var]? "array": "variable"}] $var
     }
     set res
  }
 #-- Arrays can also be seen as a one-level subtree: 
 proc LemonTree::kids(array) {w node} {
     set name [dict'get [$w itemcget $node -data] name]
     list variable [lsort [array names $name]]
  }
  
  proc LemonTree::kids(widget) {w node} {
     set name [dict'get [$w itemcget $node -data] name]
     list widget [winfo children $name]
  }
  
 if 0 {A Tree looks prettier if nodes have icons, so I'm using some of those that BWidget comes with:}
 set path $BWIDGET::LIBRARY/images
 
  foreach {type name} {dir folder file file array copy html folder} {
     set LemonTree::icon($type) [image create photo -file $path/$name.gif]
  }
 # Some more icons come from adavis's Icons package:
 set LemonTree::icon(widget) [image create photo -data {
    R0lGODlhEAAQAIUAAFxaXGRmZFRWVGQmhFwmfFxeXOTm5MTCxLyWzLySzKyC
    vKSCvJxyrJRmrIxipIxWpNze3AQCBMTGxJRWtJRatIxOrIRCpHw+pHw6nHQ2
    lGwulOzu7Pz+/Pz+9Ozu5Pz+7NzexPz+5Pz+hPz+3NzevPz+BMTCBNzetMTC
    DPz+xNze1NzezPwCBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACH5BAEAACwALAAAAAAQABAAAAaV
    QEBAMCAUj0aCYFkwHBAJhWLBYDQcD8ghIjhIJhRKxXLBZDQaiYQLABDe8PiS
    u+HY7/dOh+PhQvB4eh8fIH6Adh2DHyGFAn+BiQAiISMkhnmSIQAlI5KXHIkf
    AiUmpCUnhoKLISgpIikmAlwqtCArkiUlIhwiuSKyEcHCESausMEsycrJEaal
    y9ARIizN1NPQ0dfJfkEAIf5oQ3JlYXRlZCBieSBCTVBUb0dJRiBQcm8gdmVy
    c2lvbiAyLjUNCqkgRGV2ZWxDb3IgMTk5NywxOTk4LiBBbGwgcmlnaHRzIHJl
    c2VydmVkLg0KaHR0cDovL3d3dy5kZXZlbGNvci5jb20AOw==}]
  set LemonTree::icon(namespace) [image create photo -data {
    R0lGODlhEAAQAIIAAPwCBAQCBPz+xERCBMTCBISCBDQyNAAAACH5BAEAAAAA
    LAAAAAAQABAAAANPCLoR+7AJ0SALYkxd79za12FgOTlAQBDhRxUFqrKEG8Py
    OqwEfMeKwGDI8zVGul0vFsAFdaxB43ecKZfUKm1lZD6ERZgBZWn0OpYvGeJP
    AAAh/mhDcmVhdGVkIGJ5IEJNUFRvR0lGIFBybyB2ZXJzaW9uIDIuNQ0KqSBE
    ZXZlbENvciAxOTk3LDE5OTguIEFsbCByaWdodHMgcmVzZXJ2ZWQuDQpodHRw
    Oi8vd3d3LmRldmVsY29yLmNvbQA7}]
  set LemonTree::icon(command) [image create photo -data {
    R0lGODlhEAAQAIIAAPwCBAQCBISChMTCxDQyNFxaXKSipPz+/CH5BAEAAAAA
    LAAAAAAQABAAAANdCLobwbAFMciLwBFSihBEFHSG8QnmpQQEBX6loI5G5QTl
    cMgrZSmEmsGxKqRWNV3hMrFlBtDoA1eTEaKHJdMYhR6+gxkF++UMGbiDzvDV
    ioyHAJSHcchuGLQq4k8AACH+aENyZWF0ZWQgYnkgQk1QVG9HSUYgUHJvIHZl
    cnNpb24gMi41DQqpIERldmVsQ29yIDE5OTcsMTk5OC4gQWxsIHJpZ2h0cyBy
    ZXNlcnZlZC4NCmh0dHA6Ly93d3cuZGV2ZWxjb3IuY29tADs=}]
  set LemonTree::icon(variable) [image create photo -data {
    R0lGODlhEAAQAIYAAPwCBFxaVMR+RPzKjNze3AQCBMR6RPzGjPyODPz+/MzO
    zPyKDPyKBPz29OTWzPyGDPyGBOx6BOza1OR2BKROBNSOXKRKBBwOBOzu7PTW
    xPzizOySZPyCDFxaXOy2lNRyRMxmJCQOBPTm1OzStPTKrMR+XIRWLFxGNCQS
    BDQyNIRSNDQuJERGRLyqlNzSvIx6ZKRuVEw6LLSyrLymhKSShBwaFFROTJyW
    jMS+vNzW1OTazNzKrHRqXOzezOTOpPTq3OzWvOTStLyedMS+rLy2pMSynMSu
    lAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACH5BAEAAAAA
    LAAAAAAQABAAAAewgAAAAYSFhoQCA4IBBI2OjgUGBwiLBAmXlpcKkgsMlZcJ
    BA0JDpIPEBGVjwkSBgOnExSfmBIVBxAMExYXswkYGRobHLq8gh2PHhoeHyAW
    IYKzIiMkJSYnKCnQg5YNHtQqKywtK9qMBC4vMDEBMjIz2dCMDTQ1Njc4OToz
    5PEEOzw3ZPToMcLHO23HfogQ0QMIkCA+hPBbhAPHECJFjMyYIUQIvEUpUqwQ
    OXKkSEF+AgEAIf5oQ3JlYXRlZCBieSBCTVBUb0dJRiBQcm8gdmVyc2lvbiAy
    LjUNCqkgRGV2ZWxDb3IgMTk5NywxOTk4LiBBbGwgcmlnaHRzIHJlc2VydmVk
    Lg0KaHR0cDovL3d3dy5kZXZlbGNvci5jb20AOw==}]
 
 if 0 {This thing is more useful if you can get more information about an item by clicking on it
 - for a file, its size and date; for a variable, its value; for a proc, its full specification, etc.
 As a small first shot, I selected a "balloon" for that purpose. }
 
 proc LemonTree::Info {w node} {
     set type [dict'get [$w itemcget $node -data] type]
     if {[info proc ::LemonTree::info($type)] ne ""} {
         balloon $w [info($type) $w $node]
     }
  }
 #-- type-specific info providers:
  proc LemonTree::info(array) {w node} {
     set name [dict'get [$w itemcget $node -data] name]
     return "$name: array, [array size $name] elements"
  }
  proc LemonTree::info(command) {w node} {
     set name [dict'get [$w itemcget $node -data] name]
     if {[info procs $name] ne ""} {
         return [procinfo $name]
     } else {return "$name: compiled command"}
  }
  
  proc LemonTree::info(dir) {w node} {
     set name [dict'get [$w itemcget $node -data] name]
     set mtime [clock format [file mtime $name] -format %y-%m-%d,%H:%M:%S]
     set nfiles [llength [glob -nocomplain $name/*]]
     return "$name\n$nfiles files\nModified: $mtime"
  }
 
  proc LemonTree::info(html) {w node} {
     set name [dict'get [$w itemcget $node -data] name]
     set val1 [::t get $name data]
     set val2 [::t get $name type]
     set val3 [::t index $name]
     
     return "$val1"
  }
  
  proc LemonTree::info(file) {w node} {
     set name [dict'get [$w itemcget $node -data] name]
     set mtime [clock format [file mtime $name] -format %y-%m-%d,%H:%M:%S]
     return "$name\n[file size $name] bytes\nModified: $mtime"
  }
  proc LemonTree::info(namespace) {w node} {
     set ns [dict'get [$w itemcget $node -data] name]
     return "namespace $ns\n[llength [info commands ${ns}::*]] commands,\
         [llength [info vars ${ns}::*]] variables,\
         [llength [namespace children $ns]] child(ren)"
  }
  proc LemonTree::info(variable) {w node} {
     set name [dict'get [$w itemcget $node -data] name]
     if [info exists $name] {
         list $name = [set $name]
     } else { #-- array element
         set arr [dict'get [$w itemcget [$w parent $node] -data] name]
         list ${arr}($name) = [set ${arr}($name)]
     }
  }
  
  proc LemonTree::info(widget) {w node} {
     set name [dict'get [$w itemcget $node -data] name]
     return "[winfo class $name] $name [winfo geometry $name]"
  }
 #-- A simple ballon, modified from Bag of Tk algorithms:  
 proc balloon {w text} {
     set top .balloon
     catch {destroy $top}
     toplevel $top -bd 1
     pack [message $top.txt -aspect 10000 -bg lightyellow \
         -borderwidth 0 -text $text -font {Helvetica 9}]
     wm overrideredirect $top 1
     wm geometry $top +[winfo pointerx $w]+[winfo pointery $w]
     bind  $top <1> [list destroy $top]
     raise $top
  }
 
 if 0 {From Tcl 8.5, one would use a real dict, but it's easy to make a replacement
 that works roughly the same in 8.4 (it returns "" for non- existing keys instead of throwing an error),
 and might be slower, but I won't notice on dicts with two elements ;-}
 
 proc dict'get {dict key} {
     foreach {k value} $dict {if {$k eq $key} {return $value}}
  }
 #-- reconstruct a proc's definition as a string:
 proc procinfo name {
     set args ""
     foreach arg [info args $name] {
         if [info default $name $arg def] {lappend arg $def}
         lappend args $arg
     }
     return "proc $name {$args} {[info body $name]}"
  }
  
 #
 #
 #
 proc main {} {
   set url [.txt get]
   catch {t destroy}
   .t delete [.t nodes root]

   if {$url == ""} {
   tk_messageBox -message "specify html location"
   return
   }
   ::struct::tree t
   if {[string range $url 0 3] == "http"} {
     set http  [::http::geturl $url]
     set html  [::http::data $http]
   } else {
     set html [read [set fh [open $url]]]
     close $fh
   }
   
   #puts $url
   htmlparse::2tree $html t
   htmlparse::removeVisualFluff t
   htmlparse::removeFormDefs t
   LemonTree::add .t root html    root  "(html)"
   
 return
 }
  
 # -------------------------------------------
 label .lbl -text "URL or path:"
 entry .txt -width 60
 button .btnBrowser -text Browse... -command {.txt insert end [tk_getOpenFile]}
 button .btnGet -text "Get html file" -command main
 button .btnSalir -text Exit -command {
 catch {t destroy}
 exit
 }
 grid .lbl .txt .btnBrowser
 grid .btnGet -row 1 -column 1
 grid .btnSalir -row 1 -column 2
 
 #-- Now to demonstrate and test the whole thing: 
 Tree .t -background white -opencmd {LemonTree::open .t} \
 -width 60 -height 30 -yscrollcommand {.y set}
  .t bindText  <1> {LemonTree::Info .t}
  .t bindImage <1> {LemonTree::Info .t}
  
 scrollbar .y -command {.t yview}

 grid .t -row 2 -column 0 -columnspan 3 
 grid .y -row 2 -column 3 -sticky ns
 .txt insert end "http://www.cs.grinnell.edu/~walker/fluency-book/labs/sample-table.html"
 #-- Little development helpers:
 bind . <Escape> {exec wish $argv0 &; exit}
 bind . <F1> {console show}

Jorge - 2011-08-14 19:31:47

added a fix to the main menu

 catch {t destroy}
 .t delete [.t nodes root]