Package DGA

Author Jackson McCann.

 #=======================================================================
 #
 # Package: DGA.tcl
 #
 # Purpose: A tcl package that implements a number of algorithms on top of 
 # the basic Tcl ::struct::graph package.

 package provide DGA 1.0

 #=======================================================================
 # NAMESPACE ::DGA::

 namespace eval DGA {

    #===================================================================
    # Export the functions that the user should be calling
    namespace export shortest_path min_span_tree predecessor_list
    namespace export pl_node pl_arc pl_dist
    namespace export topological_sort outonly_nodes is_DAG

    # The graph package is needed for obvious reasons so we include
    # the struct package that contains it
    package require struct

    # Define any variables used by the package
    variable topo_sort {}

    #-------------------------------------------------------------------
    # update_node
    #
    # g - The graph
    # n - The node
    # u - Used start 0 Unused, 1 Found, 2 Completely used
    # d - The distance from the starting node
    # p - The predecessor node
    proc update_node { g n d p u } {
        
        # Set this nodes predecessor
        $g node set $n -key predecessor $p
        
        # The distance to the source node
        $g node set $n -key distance $d

        # Set the used flag
        if { [expr $u > -1] } {
            $g node set $n -key "used" $u
        }
    }

    #-------------------------------------------------------------------
    # init_graph
    #    Algorithms such as Dijkstra'a and Prim's need to flag the nodes
    #    and arcs of the graph and to record additional information 
    #    against the nodes and arcs.  This function initializes these
    #    values for a graph
    proc init_graph { g } {

        # Setup.  Update each node with the keyed values that will hold 
        #         the following information for each node:
        #         The distance from the source to the node - infinite
        #         The predecessor of the node - nothing
        #         The nodes status 0 - Not found, 1 - Found, 2 - Finished
        foreach node [$g nodes] {
            # This node doesn't have a predecessor and the distance to the
            # source node is undefined and the node is unused
            update_node $g $node -1 {} 0
        }

        # Initialize the arcs in the graph.  Make sure they all
        # have a weight associated with them.  Give a default
        # weight of 1 if no weight is found.  Set a flag to show
        # if the arc has been used in the algorithm
        foreach arc [$g arcs] {
            $g arc set $arc -key used 0
            if { [catch {
                $g arc get $arc -key weight
            }] } {
                $g arc set $arc -key weight 1.0
            }
        }
    }

    #-------------------------------------------------------------------
    # min_arc
    #    Within the graph zero, one or many nodes will be in state 1,
    #    i.e., the node has been found and still has outward arcs that 
    #    have not been used.  Find the arc that has the minimum weight.
    #    As a side effect if a node has no unused outward arcs left 
    #    then mark the node as used.  If no arc can be found then
    #    return an empty list.
    # g - The graph to search
    proc min_arc { g } {

        # Initialise the working variables used by this proc
        # The arc from the source node
        set tArc {}     
        # The weight of the arc
        set tArcMin -1  

        # Find the shortest arc out of the nodes that have been found.
        # Use this arc if it provides a shorter route to the target or
        # if it discovers a new target node
        foreach node [$g nodes -key used -value 1] {
            
            # The hasArc counter is used to determine when a node has
            # no unused arcs left.
            set hasArc 0
            
            # Look at each arc that goes out of the current node
            foreach arc [$g arcs -out $node -key used -value 0] {
                # Set hasArc as this node has at least one unused arc
                set hasArc 1
                # Get the weight of this arc.  If it is less than the
                # current value of tArcMin then it will be used
                set arcWeight [$g arc get $arc -key weight]
                if { [expr $tArcMin == -1] || [expr $tArcMin > $arcWeight] } {
                    # Store the source node and arc names
                    set tArc $arc
                    set tArcMin $arcWeight
                }
            }
            
            # If a node doesn't have any unused arcs left then
            # mark the node as completely used
            if { [expr $hasArc == 0] } {
                $g node set $node -key used 2
            }
        }

        # Now mark this arc as having been used
        if { $tArc != {} } {
            $g arc set $tArc -key used 1
        }
        
        return $tArc 
    }

    #===================================================================
    # predecessor_list
    #    After an algorithm such as:
    #       shortest_path
    #       min_span_tree
    #    has been run on the graph it will contain a predecessor list,
    #    that is for each node that was found an arc will be held under
    #    the key 'predecessor' that defines the arc and node that this
    #    node was reached from.  This function returns this list as a
    #    Tcl list with a structure.  The pl_ functions can be used to
    #    access this list
    # g - The graph
    proc predecessor_list { g } {

        set idxList {}
        set detList {}
        foreach node [$g nodes] {

            # The distance node is from the source
            set nDist [$g node get $node -key distance]

            # The arc that links node to its predecessor
            set pArc  [$g node get $node -key predecessor]

            # The node that precedes this node, if there is one
            if { $pArc == {} } {
                set pNode {}
            } else {
                set pNode [$g arc source $pArc]
            }

            lappend idxList $node
            lappend detList [list $node $pNode $pArc $nDist]

        }

        return [list [lsort $idxList] [lsort $detList]]
    }

    #===================================================================
    # pl_node
    # pl_arc
    # pl_dist
    #    Three helper functions that make it easier to get
    #    details about a node's predecessor.
    # pl - The predecessor list as returned by predecessor_list
    # n  - The node
    proc pl_node { pl n } {
        set idx [lsearch -sorted [lindex $pl 0] $n]
        return [lindex [lindex [lindex $pl 1] $idx] 1]
    }

    proc pl_arc { pl n } {
        set idx [lsearch -sorted [lindex $pl 0] $n]
        return [lindex [lindex [lindex $pl 1] $idx] 2]
    }

    proc pl_dist { pl n } {
        set idx [lsearch -sorted [lindex $pl 0] $n]
        return [lindex [lindex [lindex $pl 1] $idx] 3]
    }

    #===================================================================
    # shortest_path
    #    Using Dijkstra's algorithm find a shortest path from the node
    #    n to every other node that is reachable from n.
    #
    # g - The graph to be processed
    # n - The starting node
    proc shortest_path { g n } {

        # Setup.  Update the nodes and arcs with the required 
        #         flags etc.
        init_graph $g

        # Mark the starting node as being used and as having a distance
        # of zero from itself.
        update_node $g $n 0.0 {} 1

        # Now start looking for nodes we don't know about by exploring out
        # from the starting node.
        while {1} {
            
            # Find the arc with minimum length that goes from
            # a found node
            set tArc [min_arc $g]

            # Test for the end of the graph, we have no unused nodes
            # or arcs left.  This does not imply that all of the nodes
            # in the graph have been discovered, some nodes may not be
            # reachable from the given starting node.  These can be  
            # identified as they have a distance of -1.
            if { $tArc == {} } { break } 

            # Get the distance from the first node of
            # this node
            set sDist [$g node get [$g arc source $tArc] -key distance]
            
            # Get the name of the target node
            set tNode [$g arc target $tArc]
            
            # Get the weight of the arc joining the source and
            # target nodes.
            set weight [$g arc get $tArc -key weight]
            
            # Get the distance from the source node of the 
            # target node.  This will be -1 if we have never
            # seen this node before.
            set tDist [$g node get $tNode -key distance]
            
            # Does this arc build a short path to the target node?
            if { [expr $tDist == -1] } {
                # Yes - it must do, this is the first time 
                # the target node has been encountered.
                
                # Save the distance from the source node and the 
                # arc that leads to the predecessor and mark the 
                # node as found, it will now be considered next time
                # we look for the shortest arc.
                update_node $g $tNode [expr $sDist + $weight] $tArc 1
                
            } else {
                # Does this arc provide a cheaper way to get to
                # the already discovered node?  Calculate the distance
                # based on the new arc's weight and compare it with the stored
                # distance for the node.
                set ttDist [expr $sDist + $weight]
                if { [expr $ttDist < $tDist] } {
                    # Update the node to use the new arc as its
                    # predecessor
                    update_node $g $tNode $ttDist $tArc -1
                }
            }
        }
    }

    #===================================================================
    # min_span_tree
    #    Using Prims's algorithm find a minimum spanning tree starting 
    #    at the node n and reaching to every other node that is reachable
    #    from n.
    #
    # g - The graph to be processed
    # n - The starting node
    proc min_span_tree { g n } {

        # Setup.  Update the nodes and arcs with the required 
        #         flags etc.
        init_graph $g

        # Mark the starting node as being used and as having a distance
        # of zero from itself.
        update_node $g $n 0.0 {} 1

        # Now start looking for nodes we don't know about by exploring out
        # from the starting node.
        while {1} {
            
            # Find the arc with minimum length that goes from
            # a found node
            set tArc [min_arc $g]

            # Test for the end of the graph, we have no unused nodes
            # or arcs left.  This does not imply that all of the nodes
            # in the graph have been discovered, some nodes may not be
            # reachable from the given starting node.  These can be  
            # identified as they have a distance of -1.
            if { $tArc == {} } { break } 

            # Get the distance from the first node of
            # this node
            set sDist [$g node get [$g arc source $tArc] -key distance]
            
            # Get the name of the target node.
            set tNode [$g arc target $tArc]
            
            # Get the weight of the arc joining the source and
            # target nodes.
            set weight [$g arc get $tArc -key weight]
            
            # Get the distance from the source node of the 
            # target node.  This will be -1 if we have never
            # seen this node before.
            set tDist [$g node get $tNode -key distance]
            
            # Does this arc discover a new target node?
            if { [expr $tDist == -1] } {
                # Yes it does 
                
                # Save the distance from the source node and the 
                # arc that leads to the predecessor and mark the 
                # node as found.  It will now be considered next time
                # we look for the shortest arc.
                update_node $g $tNode [expr $sDist + $weight] $tArc 1
            }
        }
    }

    #===================================================================
    # outonly_nodes
    #    An 'out only' node is one that has no inward arcs.  That is the
    #    node can have no predecessor as no no arc has it as its target
    #    This function finds all such nodes with the graph.
    # 
    # g - The graph
    proc outonly_nodes { g } {
        set result {}

        foreach node [$g nodes] {
            if { [$g node degree -in $node] == 0 } {
                lappend result $node
            }
        }

        return $result
    }

    #-------------------------------------------------------------------
    # make_toplevel_node
    #    When checking for DAGness and/or performing a topological sort
    #    a single top-level node to start the process from is required.
    #    This function transforms the graph so that it has that single
    #    top node.  The name of this new node is returned.
    proc make_toplevel_node { g } {

        # Find toplevel node(s)
        set oonList [outonly_nodes $g]

        # If there are no out-only nodes then we can't sort the
        # graph as it must have at least one cycle.
        if { [llength $oonList] == 0 } {
            error "$g is not a DAG.  Graph contains one or more cycles"
        }

        # If there are more than one out-only nodes a new parent node that
        # provides a single starting point for the sort must be created above
        # these nodes.  To make life simple we do this for one node as well.
        set startNode [$g node insert]
        
        # Connect the startNode to the original out-only nodes
        foreach node $oonList {
            $g arc insert $startNode $node
        }

        return $startNode
    }

    #-------------------------------------------------------------------
    # topo_node
    #    This routine is called by the topological_sort when a node has
    #    been completely explored.  The routine has two functions. 
    proc topo_node { dir g n } {
        variable topo_sort
        lappend topo_sort $n
    }

    #===================================================================
    # topological_sort
    #    Perform a topological sort on the graph.  This will return a
    #    list of nodes giving an ordering on the nodes such that all 
    #    arcs go from left to right. Only an acyclic graph can have a 
    #    topological sort, because a directed cycle must eventually 
    #    return home to the source of the cycle. However, every DAG 
    #    has at least one topological sort.
    #
    # g - The graph to sort
    proc topological_sort { g } {

        variable topo_sort {}

        # Make a toplevel (parentless) node to start the sort from
        set startNode [make_toplevel_node $g]

        # Setup.  Update the nodes and arcs with the required 
        #         flags etc.
        init_graph $g 

        # Walk the graph, nodes are added to the topo_sort list
        # in the order that they are marked as completely explored.
        # The topological sort is the reverse of this order.
        # NB - This code won't detect any cycles in the data so it's
        #      up to the user to determine that the graph is a DAG
        #      The is_DAG function can be used for this
        $g walk $startNode -order post -type dfs -command topo_node

        # Remove the start node we created from the graph, all of the
        # arcs will be removed as well.
        $g node delete $startNode

        # Reverse the list and discard the node that
        # was added to the graph by this routine.
        set result {}
        for { set i [expr [llength $topo_sort] - 2] } {$i >= 0} {incr i -1} {
            lappend result [lindex $topo_sort $i]
        }
        
        return $result

    }
    
    #===================================================================
    # dag_dfs
    #    Recursive Depth First Search of the graph.  If a node is 
    #    discovered that has the used flag set to 1, then it has been
    #    found by a cycle through one of its children and the graph
    #    is not a DAG.  A stack-based implementation of this function
    #    would be better.  It would not risk blowing up if the recursion
    #    level got too big.
    # 
    # g - The graph
    # n - The next node to check
    proc dag_dfs { g n } {

        # We have found a new node, mark it as such.
        $g node set $n -key used 1
        
        # Process each of the arcs out of the node.
        foreach arc [$g arcs -out $n] {
            
            # Find out the details of the target node.
            set tNode [$g arc target $arc]
            set used [$g node get $tNode -key used]
            
            # If the node has been discovered but not completed then
            # this is a back edge and the graph contains a cycle.
            if { $used == 1 } {
                error "$g is not a DAG.  Graph contains one or more cycles"
            }
            
            dag_dfs $g $tNode

        }

        # We have completely used this node.
        $g node set $n -key used 2

    }

    #===================================================================
    # is_DAG
    #    Determine if the graph is a DAG, that is that it contains no
    #    cycles.  If it isn't a DAG then an error is thrown!
    #
    # g - The graph
    proc is_DAG { g } {
        
        # Make a toplevel (parentless) node to start the sort from
        set startNode [make_toplevel_node $g]

        # Setup.  Update the nodes and arcs with the required 
        #         flags etc.
        init_graph $g

        # Use the simple recursive definition of depth-first search
        # to search through the tree for back edges.
        set result [dag_dfs $g $startNode]

        # Remove the start node we created from the graph. All of the
        # arcs will be removed as well.
        $g node delete $startNode

        return $result
    }
 }