Version 1 of ksh chatroom snaphost history

#! /bin/ksh

 # Author: Larry W. Virden < mailto:[email protected] >
 # Date:    Sept. 19, 2001
 # Version: 1.4
 # Purpose: to scrape tcl'ers chat log pages and accumulate them
 # NOTE: This script is going to record private messages and memos to your
 #   id, so you should be careful to review the information before making it
 #   public.  Also, right now it does not toss away /help msgs, the
 #  WELCOME message, etc.
 #
 # USAGE: scrape NUMBER_OF_SECONDS

 set -u

 if [ "$#" -ne 1 ] ; then
        echo "USAGE: $0 snapshot_time_in_seconds" 2>&1
        exit 1
 fi

 pslock()
 {
 set -o nolog
 machine=$(hostname)
 if [ -f $HOME/.pslock ] ; then
        read psmachine psprocess < $HOME/.pslock
        if [ "$psmachine" != "$machine" ] ; then
                # lock is remote
                prefix="rsh -n $psmachine "
        else
                prefix=""
        fi
        eval $prefix kill -0 $psprocess 2>/tmp/$$.lockck
        if [ -s /tmp/$$.lockck ] ; then
                echo "Removing (probably) dead link $psmachine:$psprocess" >&2
                rm /tmp/$$.lockck
                rm $HOME/.pslock
                echo  "$machine $$" > $HOME/.pslock
                chmod 600 $HOME/.pslock
        else
                rm /tmp/$$.lockck
                return 1
        fi
 else
        echo  "$machine $$" > $HOME/.pslock
        chmod 600 $HOME/.pslock
        return 0
 fi
 }
 pslock
 if [ $? -ne 0 ] ; then
        exit 2
 fi

 trap 'echo "TRAP encountered";rm $HOME/.pslock;exit 3' \
        HUP QUIT INT ABRT BUS SEGV SYS PIPE TERM \
        USR1 USR2 

 BASE="$HOME"                   # chat history file storage directory
 current=$BASE/chat.current

 time="$1"
 timestamp="0"

 #      Set scrape to some non-interactive command that can fetch a
 #      page of text
 scrape="/projects/intranet/bin/lynx -dump -nolist -nolog -nopause -nostatus -noprint -dont_wrap_pre -hiddenlinks=ignore -noreferer -width=10240"

 # Set URL to your chat information
 # To get this URL, use your web browser to visit the
 #  chat room, and then check the URL information for the dialog frame.
 # This script expects that the user has the chat room configured for
 #  new messages to appear at the bottom of the screen.
 # WARNING: The URL contains your chat password - so this file needs to be
 #  protected appropriately.
 URL=""

 # Looping forever
 while [ /bin/true ] ; do
    date=$(date +%Y%m%d)
    history=$BASE/chat.$date
    working1=$BASE/chat.scrape.$date
    working2=$BASE/chat.working.$date

    rm $current
    ln -s $history $current

 #  Create file with today's date
    touch $history
    touch $working1
    touch $working2
    chmod 600 $history $working1 $working2

 #  Looping once every N seconds
    while [ /bin/true ] ; do

 #   Scrape the chat's page into a file

      sdate=$(date)
      $scrape $URL > $BASE/chat.scrape.$date
 #   diff the historical file and the new scraping
 #   The deletion of 4 lines is because of my use of lynx;  It will
 #   be removed once I figure out how to tell lynx to stop adding stuff to
 #   the output.  If you don't use lynx, then you probably won't need that
 #   sed segment.
      diff $history $working1 | sed -n '/^>/p'| \
        sed -e 's!^> !!' | sed -e '1,4d' | \
        egrep -v '^ *[^ ]+ has (entered|left) the chat' > $working2

 #   Manipulate the results so that the unique output is then appended
 #    to the history
      if [ -s $working2 ] ; then
        if [ "$timestamp" = "1" ] ; then
                echo "$sdate" >> $history
        fi
        cat $working2 >> $history 
      fi

 #   If the date has changed, exit the inner loop
 #
      cdate=$(date +%Y%m%d)
      if [ $cdate != $date ] ; then
        break
      fi
      sleep $time
    done

 done

See also Tcl chatroom snaphost history (2) for a pure Tcl version that will also run on non-*nix platforms.

AK: I changed my version slightly.

Instead of using $HOME everywhere I use $BASE and define it at the beginning of the script. For me its value is $HOME/archives/chat for example. This paves the for a future redefinition via cmdline argument.

A second change is that I maintain a soft-link chat.current which points to the history file the scraper currently appends to. This makes tail'ing the log then crossing midnight easier.

LV AK's changes have been merged in above. Also added above is a process lock so that the script can run from a cron.