Initial revision
authorAdam Dickmeiss <adam@indexdata.dk>
Tue, 6 Aug 1996 14:04:22 +0000 (14:04 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Tue, 6 Aug 1996 14:04:22 +0000 (14:04 +0000)
Makefile.in [new file with mode: 0644]
configure.in [new file with mode: 0644]
hswitch.c [new file with mode: 0644]
init.c [new file with mode: 0644]
install-sh [new file with mode: 0755]
robot.tcl [new file with mode: 0755]
tclmain.c [new file with mode: 0644]
tclrobot.h [new file with mode: 0644]

diff --git a/Makefile.in b/Makefile.in
new file mode 100644 (file)
index 0000000..7729bb4
--- /dev/null
@@ -0,0 +1,50 @@
+# Makefile for Tcl Web Robot
+# $Id: Makefile.in,v 1.1 1996/08/06 14:04:22 adam Exp $
+SHELL=/bin/sh
+
+# Version
+VERSION=0.0
+
+# Directory prefix wich machine independent files
+prefix=@prefix@
+
+# Directory prefix with machine dependent files
+exec_prefix=@exec_prefix@
+
+BINDIR=$(exec_prefix)/bin
+LIBDIR=$(exec_prefix)/lib
+LIBRARY=@(prefix)/lib/tclrobot
+
+TCLLIB=@TCLLIB@
+TCLINC=@TCLINC@
+TKLIB=@TKLIB@
+TKINC=@TKINC@
+
+INCLUDE=$(TCLINC)
+DEFS=$(INCLUDE) 
+
+INSTALL=@INSTALL@
+INSTALL_PROGRAM=@INSTALL_PROGRAM@
+INSTALL_DATA=@INSTALL_DATA@
+RANLIB=@RANLIB@
+SHLIB_LD=@SHLIB_LD@
+
+O=hswitch.o init.o
+
+tclrobot: tclrobot.a tclmain.o
+       $(CC) -o tclrobot $(CFLAGS) tclmain.o tclrobot.a $(TCLLIB)
+
+tclrobot.a: $(O)
+       rm -f tclrobot.a
+       ar cr tclrobot.a $(O)
+       $(RANLIB) tclrobot.a
+
+libtclrobot.so: $(O)
+       $(SHLIB_LD) -o libtclrobot.so $(O)
+       $(RANLIB) libtclrobot.so
+
+.c.o:
+       $(CC) -c $(CFLAGS) $(DEFS) $<
+
+clean:
+       rm -f tclrobot core *.out *.o *.a *.so config.*
diff --git a/configure.in b/configure.in
new file mode 100644 (file)
index 0000000..b755929
--- /dev/null
@@ -0,0 +1,49 @@
+dnl Web robot toolkit for tcl
+dnl (c) Index Data 1996
+dnl See the file LICENSE for details.
+dnl $Id: configure.in,v 1.1 1996/08/06 14:04:22 adam Exp $
+AC_INIT(tclrobot.h)
+CC=${CC-cc}
+dnl ------ Substitutions
+AC_SUBST(CC)
+AC_SUBST(TCLLIB)
+AC_SUBST(TKLIB)
+AC_SUBST(TCLINC)
+AC_SUBST(TKINC)
+AC_SUBST(SHLIB_LD)
+AC_SUBST(RANLIB)
+dnl ------ Preliminary settings
+AC_PROG_INSTALL
+AC_PREFIX_PROGRAM(tclsh)
+AC_STDC_HEADERS
+if test "$ac_cv_header_stdc" = no; then
+       AC_MSG_WARN(Your system doesn't seem to support ANSI C)
+fi
+dnl ------ look for Tcl
+if test "x$prefix" = xNONE; then
+       tryprefix=/usr/local
+else
+       tryprefix=${prefix}
+fi
+if test -r ${tryprefix}/lib/tclConfig.sh; then
+       AC_MSG_CHECKING(for Tcl)
+       source ${tryprefix}/lib/tclConfig.sh
+       TCLLIB="${TCL_LIB_SPEC} ${TCL_LIBS}"
+       TCLINC=-I${TCL_PREFIX}/include
+       RANLIB=${TCL_RANLIB}
+       SHLIB_LD=${TCL_SHLIB_LD}
+       AC_MSG_RESULT($TCL_VERSION)
+else
+       AC_MSG_WARN(Didn't find Tcl)
+fi
+dnl ------ look for Tk
+AC_MSG_CHECKING(for Tk)
+if test -r ${tryprefix}/lib/tkConfig.sh; then
+       source ${tryprefix}/lib/tkConfig.sh
+       AC_MSG_RESULT($TK_VERSION)
+       TKINC=${TK_XINCLUDES}
+       TKLIB="${TK_PREFIX}/lib/${TK_LIB_FILE} ${TK_LIBS}"
+else
+        AC_MSG_WARN(Didn't find Tk)
+fi
+AC_OUTPUT(Makefile)
diff --git a/hswitch.c b/hswitch.c
new file mode 100644 (file)
index 0000000..e631c1c
--- /dev/null
+++ b/hswitch.c
@@ -0,0 +1,233 @@
+/*
+ * $Id: hswitch.c,v 1.1 1996/08/06 14:04:22 adam Exp $
+ */
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "tclrobot.h"
+
+#define TAG_MAX_LEN 32
+
+#define SPACECHR " \t\r\n\f"
+
+static int skipSpace (const char *cp)
+{
+    int i = 0;
+    while (strchr (SPACECHR, cp[i]))
+        i++;
+    return i;
+}
+
+static int skipTag (const char *cp, char *dst)
+{
+    int i;
+
+    for (i=0; i<TAG_MAX_LEN-1 && cp[i] && !strchr (SPACECHR "/>=", cp[i]); i++)
+        dst[i] = tolower(cp[i]);
+    dst[i] = '\0';
+    return i;
+}
+
+static int skipParm (const char *cp, char *name, char **value)
+{
+    int i = skipTag (cp, name);   
+    *value = NULL;
+    if (!i)
+        return skipSpace (cp);
+    i += skipSpace (cp + i);
+    if (cp[i] == '=')
+    {
+        int v0, v1;
+        i++;
+        i += skipSpace (cp + i);
+        if (cp[i] == '\"')
+        {
+            v0 = ++i;
+            while (cp[i] != '\"' && cp[i])
+                i++; 
+            v1 = i;
+            if (cp[i])
+                i++;
+        }
+        else
+        {
+            v0 = i;
+            while (cp[i] && !strchr (SPACECHR ">", cp[i]))
+                i++;
+            v1 = i;
+        }
+        *value = malloc (v1 - v0 + 1);
+        memcpy (*value, cp + v0, v1-v0);
+        (*value)[v1-v0] = '\0';
+    }
+    i += skipSpace (cp + i);
+    return i;
+}
+
+struct tagParm {
+    char name[TAG_MAX_LEN];
+    char *value;
+    struct tagParm *next;
+};
+
+struct tagInfo {
+    int level;
+    char *pattern;
+    char *code;
+
+    char name[TAG_MAX_LEN];
+    const char *body_start;
+    struct tagParm *tagParms;
+};
+
+static int tagLookup (struct tagInfo *tags, int tagNo, const char *tagString)
+{
+    int i;
+    for (i = 0; i<tagNo; i++)
+        if (!strcmp (tags[i].pattern, tagString))
+             return i;
+    return -1;
+}
+
+static int tagStart (struct tagInfo *tag, const char *tagString,
+                     const char *cp)
+{
+    int i;
+    char parm_name[TAG_MAX_LEN];
+    char *parm_value;
+    struct tagParm **nParms;
+
+    if (tag && !tag->level)
+    {
+        strcpy (tag->name, tagString);
+        tag->tagParms = NULL;
+        nParms = &tag->tagParms;
+    }
+
+    i = skipSpace (cp);
+    while (cp[i] && cp[i] != '>')
+    {
+        int nor =  skipParm (cp+i, parm_name, &parm_value);
+        i += nor;
+        if (nor && tag && !tag->level)
+        {
+            *nParms = malloc (sizeof(**nParms));
+            assert (*nParms);
+            (*nParms)->next = NULL;
+            strcpy ((*nParms)->name, parm_name);
+            (*nParms)->value = parm_value;
+        }
+        else
+        {
+            if (!nor)
+                i++;
+            free (parm_value);
+        }
+    }
+    if (cp[i])
+        i++;
+    if (tag)
+    {
+        if (!tag->level)
+            tag->body_start = cp+i;
+        ++(tag->level);
+    }
+    return i;
+}
+
+static int tagEnd (Tcl_Interp *interp, struct tagInfo *tag,
+                   const char *tagString, const char *cp, const char *body_end)
+{
+    int i = 0;
+
+    if (cp[i] == '>')
+        i++;
+
+    if (tag && tag->level)
+    {
+        -- (tag->level);
+        if (!tag->level)
+        {
+            struct tagParm *tp = tag->tagParms;
+            char *value = malloc (body_end - tag->body_start + 1);
+
+            assert (value);
+            memcpy (value, tag->body_start, body_end - tag->body_start);
+            value[body_end - tag->body_start] = '\0';
+            Tcl_SetVar (interp, "body", value, 0);
+            free (value);
+            while (tp)
+            {
+                char vname[TAG_MAX_LEN+30];
+                struct tagParm *tp0 = tp;
+                
+                sprintf (vname, "parm(%s)", tp->name);
+
+                Tcl_SetVar (interp, vname, tp->value ? tp->value : "",0);
+                tp = tp->next;
+                free (tp0);
+            }
+            Tcl_Eval (interp, tag->code);
+        }
+    }
+    return i;
+}
+
+int htmlSwitch (ClientData clientData, Tcl_Interp *interp,
+                int argc, char **argv)
+{
+    struct tagInfo *tags;
+    int noTags;
+    const char *cp;
+    int i, argi = 1;
+
+    cp = argv[argi++];
+    noTags = (argc - argi)/2;
+    if (noTags < 1)
+    {
+        interp->result =
+            "wrong # args: should be ?switches? string pattern body ...";
+        return TCL_ERROR;
+    }
+    tags = malloc (sizeof(*tags) * noTags);
+    assert (tags);
+    for (i = 0; i<noTags; i++)
+    {
+        tags[i].level = 0;
+        tags[i].pattern = argv[argi++];
+        tags[i].code = argv[argi++];
+    }
+    while (*cp)
+    {
+        if (cp[0] == '<' && cp[1] != '/')     /* start tag */
+        {
+            char tagStr[TAG_MAX_LEN];
+            int tagI;
+
+            cp++;
+            cp += skipTag (cp, tagStr);
+            tagI = tagLookup (tags, noTags, tagStr);
+            cp += tagStart (tagI >= 0 ? tags+tagI : NULL, tagStr, cp);
+        }
+        else if (cp[0] == '<')                /* end tag */
+        {
+            char tagStr[TAG_MAX_LEN];
+            const char *body_end = cp;
+            int tagI;
+
+            cp += 2;
+            cp += skipTag (cp, tagStr);
+            tagI = tagLookup (tags, noTags, tagStr);
+            cp += tagEnd (interp, tagI >= 0 ? tags+tagI : NULL,
+                          tagStr, cp, body_end);
+        }
+        else                                  /* no tag */
+            cp++;
+    }
+    free (tags);
+    return TCL_OK;
+}
+
+
diff --git a/init.c b/init.c
new file mode 100644 (file)
index 0000000..b3f7509
--- /dev/null
+++ b/init.c
@@ -0,0 +1,11 @@
+/*
+ * $Id: init.c,v 1.1 1996/08/06 14:04:22 adam Exp $
+ */
+#include "tclrobot.h"
+
+int TclRobot_Init (Tcl_Interp *interp)
+{
+    Tcl_CreateCommand (interp, "htmlSwitch", htmlSwitch, (ClientData) NULL,
+                       (Tcl_CmdDeleteProc *) NULL);
+    return TCL_OK;
+}
diff --git a/install-sh b/install-sh
new file mode 100755 (executable)
index 0000000..89fc9b0
--- /dev/null
@@ -0,0 +1,238 @@
+#! /bin/sh
+#
+# install - install a program, script, or datafile
+# This comes from X11R5.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# `make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch.
+#
+
+
+# set DOITPROG to echo to test this script
+
+# Don't use :- since 4.3BSD and earlier shells don't like it.
+doit="${DOITPROG-}"
+
+
+# put in absolute paths if you don't have them in your path; or use env. vars.
+
+mvprog="${MVPROG-mv}"
+cpprog="${CPPROG-cp}"
+chmodprog="${CHMODPROG-chmod}"
+chownprog="${CHOWNPROG-chown}"
+chgrpprog="${CHGRPPROG-chgrp}"
+stripprog="${STRIPPROG-strip}"
+rmprog="${RMPROG-rm}"
+mkdirprog="${MKDIRPROG-mkdir}"
+
+tranformbasename=""
+transform_arg=""
+instcmd="$mvprog"
+chmodcmd="$chmodprog 0755"
+chowncmd=""
+chgrpcmd=""
+stripcmd=""
+rmcmd="$rmprog -f"
+mvcmd="$mvprog"
+src=""
+dst=""
+dir_arg=""
+
+while [ x"$1" != x ]; do
+    case $1 in
+       -c) instcmd="$cpprog"
+           shift
+           continue;;
+
+       -d) dir_arg=true
+           shift
+           continue;;
+
+       -m) chmodcmd="$chmodprog $2"
+           shift
+           shift
+           continue;;
+
+       -o) chowncmd="$chownprog $2"
+           shift
+           shift
+           continue;;
+
+       -g) chgrpcmd="$chgrpprog $2"
+           shift
+           shift
+           continue;;
+
+       -s) stripcmd="$stripprog"
+           shift
+           continue;;
+
+       -t=*) transformarg=`echo $1 | sed 's/-t=//'`
+           shift
+           continue;;
+
+       -b=*) transformbasename=`echo $1 | sed 's/-b=//'`
+           shift
+           continue;;
+
+       *)  if [ x"$src" = x ]
+           then
+               src=$1
+           else
+               # this colon is to work around a 386BSD /bin/sh bug
+               :
+               dst=$1
+           fi
+           shift
+           continue;;
+    esac
+done
+
+if [ x"$src" = x ]
+then
+       echo "install:  no input file specified"
+       exit 1
+else
+       true
+fi
+
+if [ x"$dir_arg" != x ]; then
+       dst=$src
+       src=""
+       
+       if [ -d $dst ]; then
+               instcmd=:
+       else
+               instcmd=mkdir
+       fi
+else
+
+# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
+# might cause directories to be created, which would be especially bad 
+# if $src (and thus $dsttmp) contains '*'.
+
+       if [ -f $src -o -d $src ]
+       then
+               true
+       else
+               echo "install:  $src does not exist"
+               exit 1
+       fi
+       
+       if [ x"$dst" = x ]
+       then
+               echo "install:  no destination specified"
+               exit 1
+       else
+               true
+       fi
+
+# If destination is a directory, append the input filename; if your system
+# does not like double slashes in filenames, you may need to add some logic
+
+       if [ -d $dst ]
+       then
+               dst="$dst"/`basename $src`
+       else
+               true
+       fi
+fi
+
+## this sed command emulates the dirname command
+dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+
+# Make sure that the destination directory exists.
+#  this part is taken from Noah Friedman's mkinstalldirs script
+
+# Skip lots of stat calls in the usual case.
+if [ ! -d "$dstdir" ]; then
+defaultIFS='   
+'
+IFS="${IFS-${defaultIFS}}"
+
+oIFS="${IFS}"
+# Some sh's can't handle IFS=/ for some reason.
+IFS='%'
+set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
+IFS="${oIFS}"
+
+pathcomp=''
+
+while [ $# -ne 0 ] ; do
+       pathcomp="${pathcomp}${1}"
+       shift
+
+       if [ ! -d "${pathcomp}" ] ;
+        then
+               $mkdirprog "${pathcomp}"
+       else
+               true
+       fi
+
+       pathcomp="${pathcomp}/"
+done
+fi
+
+if [ x"$dir_arg" != x ]
+then
+       $doit $instcmd $dst &&
+
+       if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
+       if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
+       if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
+       if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
+else
+
+# If we're going to rename the final executable, determine the name now.
+
+       if [ x"$transformarg" = x ] 
+       then
+               dstfile=`basename $dst`
+       else
+               dstfile=`basename $dst $transformbasename | 
+                       sed $transformarg`$transformbasename
+       fi
+
+# don't allow the sed command to completely eliminate the filename
+
+       if [ x"$dstfile" = x ] 
+       then
+               dstfile=`basename $dst`
+       else
+               true
+       fi
+
+# Make a temp file name in the proper directory.
+
+       dsttmp=$dstdir/#inst.$$#
+
+# Move or copy the file name to the temp name
+
+       $doit $instcmd $src $dsttmp &&
+
+       trap "rm -f ${dsttmp}" 0 &&
+
+# and set any options; do chmod last to preserve setuid bits
+
+# If any of these fail, we abort the whole thing.  If we want to
+# ignore errors from any of these, just make sure not to ignore
+# errors from the above "$doit $instcmd $src $dsttmp" command.
+
+       if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
+       if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
+       if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
+       if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
+
+# Now rename the file to the real destination.
+
+       $doit $rmcmd -f $dstdir/$dstfile &&
+       $doit $mvcmd $dsttmp $dstdir/$dstfile 
+
+fi &&
+
+
+exit 0
diff --git a/robot.tcl b/robot.tcl
new file mode 100755 (executable)
index 0000000..b2a7224
--- /dev/null
+++ b/robot.tcl
@@ -0,0 +1,295 @@
+#
+# $Id: robot.tcl,v 1.1 1996/08/06 14:04:22 adam Exp $
+#
+proc RobotFileNext {area} {
+    if {[catch {set ns [glob $area/*]}]} {
+        return {}
+    }
+    set off [string first / $area]
+    incr off
+    foreach n $ns {
+       if {[file isfile $n]} {
+           if {[string first :.html $n] > 0} {
+               return http://[string range $area/ $off end]
+            }
+            return http://[string range $n $off end]
+        }
+        if {[file isdirectory $n]} {
+            set sb [RobotFileNext $n]
+            if {[string length $sb]} {
+                return $sb
+            }
+        }
+    }
+    return {}
+}
+
+proc RobotFileExist {area host path} {
+    set comp [split $area/$host$path /]
+    set l [llength $comp]
+    incr l -1
+    if {![string length [lindex $comp $l]]} {
+        set comp [split $area/$host$path:.html /]
+    }
+    return [file exists [join $comp /]]
+}
+
+proc RobotFileUnlink {area host path} {
+    set comp [split $area/$host$path /]
+    set l [llength $comp]
+    incr l -1
+    if {![string length [lindex $comp $l]]} {
+        set comp [split $area/$host$path:.html /]
+    }
+    if {[catch {exec rm [join $comp /]}]} return
+    incr l -1
+    for {set i $l} {$i > 0} {incr i -1} {
+        set path [join [lrange $comp 0 $i] /]
+       if {![catch {glob $path/*}]} return
+        exec rmdir ./$path
+    }
+}
+
+proc RobotFileOpen {area host path} {
+    set orgPwd [pwd]
+
+    set comp [split $area/$host$path /]
+    set len [llength $comp]
+    incr len -1
+    for {set i 0} {$i < $len} {incr i} {
+        set d [lindex $comp $i]
+        if {[catch {cd ./$d}]} {
+            exec mkdir $d
+            cd ./$d
+        }
+    }
+    set d [lindex $comp $len]
+    if {[string length $d]} {
+        set out [open $d w]
+    } else {
+        set out [open :.html w]
+    }
+    cd $orgPwd
+    return $out
+}
+
+proc RobotRestart {} {
+    global URL
+
+    while {1} {    
+        set url [RobotFileNext unvisited]
+        if {![string length $url]} break
+        set r [RobotGetUrl $url {}]
+        if {!$r} {
+           return
+        } else {
+            RobotFileUnlink unvisited $URL($url,host) $URL($url,path)
+        }
+    }
+    exit 0
+}
+
+proc headSave {url out title} {
+    global URL
+
+    puts $out {<nwi>}
+    puts $out "<ti> $title"
+    if {[info exists URL($url,head,Last-modified)]} {
+        puts $out "<dm> $URL($url,head,Last-modified)"
+    }
+    puts $out {<si>}
+    if {[info exists URL($url,head,Date)]} {
+        puts $out " <lc> $URL($url,head,Date)"
+    }
+    if {[info exists URL($url,head,Content-length)]} {
+        puts $out " <by> $URL($url,head,Content-length)"
+    }
+    if {[info exists URL($url,head,Server)]} {
+        puts $out " <srvr> $URL($url,head,Server)"
+    }
+    puts $out {</si>}
+    puts $out {<av>}
+    puts $out " <avli> $url"
+    if {[info exists URL($url,head,Content-type)]} {
+        puts $out " <ty> $URL($url,head,Content-type)"
+    }
+    puts $out {</av>}
+}
+
+proc RobotSave {url} {
+    global URL
+    
+    set out [RobotFileOpen visited $URL($url,host) $URL($url,path)]
+    set ti 0
+    if {[info exists URL($url,line)]} {
+        set htmlContent [join $URL($url,line)]
+        
+        htmlSwitch $htmlContent \
+                title {
+            if {!$ti} {
+                headSave $url $out $body
+                set ti 1
+            }
+        } a {
+            if {![info exists parm(href)]} continue
+            if {!$ti} {
+                headSave $url $out "untitled"
+                set ti 1
+            }
+            
+            if {[regexp {^\#} $parm(href)]} {
+                continue
+            } elseif {[regexp {^([^:]+):([^#]+)} $parm(href) x method hpath]} {
+                if {![string compare $method http]} {
+                    if {![regexp {^//([^/]+)(.*)} $hpath x host path]} {
+                        set host $URL($url,host)
+                        set path $hpath
+                    } 
+                   if {![regexp {\.dk$} $host]} continue
+                } else {
+                    continue
+                }
+            } elseif {[regexp {^([/~][^#]*)} $parm(href) x path]} {
+                set host $URL($url,host)
+                set method http
+            } else {
+                puts "     href=$parm(href)"
+                set ext [file extension $URL($url,path)] 
+                if {[string compare $ext {}]} {
+                    set dpart [file dirname $URL($url,path)]
+                } else {
+                    set dpart $URL($url,path)
+                }
+                regexp {^([^#]+)} $parm(href) x path
+                set host $URL($url,host)
+                set path [string trimright $dpart /]/$path
+                set method http
+            }
+            set ext [file extension $path]
+            if {![string length $ext]} {
+                set path [string trimright $path /]/
+            } else {
+                set path [string trimright $path /]
+            }
+           set c [split $path /]
+           set i [llength $c]
+           incr i -1
+           set path [lindex $c $i]
+           incr i -1
+           while {$i >= 0} {
+               switch -- [lindex $c $i] {
+                   .. {
+                       incr i -2
+                    }
+                   . {
+                       incr i -1
+                   }
+                   default {
+                       set path [lindex $c $i]/$path
+                       incr i -1
+                   }
+               }
+           }
+            set href "$method://$host$path"
+
+            puts $out "<cr>"
+            puts $out "<li> $href"
+            puts $out "<cp> $body"
+            puts $out "</cr>"
+            
+            if {![regexp {/.*bin/} $href)]} {
+                if {![RobotFileExist visited $host $path]} {
+                    set outf [RobotFileOpen unvisited $host $path]
+                    close $outf
+                }
+            }
+        }
+    }
+    if {!$ti} {
+        headSave $url $out "untitled"
+        set ti 1
+    }
+    puts $out "</nwi>"
+    close $out
+    RobotFileUnlink unvisited $URL($url,host) $URL($url,path)
+}
+
+proc RobotRead {url sock} {
+    global URL
+
+    set readCount [gets $sock line]
+    if {$readCount < 0} {
+        if [eof $sock] {
+            close $sock
+            RobotSave $url
+            RobotRestart
+        }
+    } elseif {$readCount > 0} {
+        switch $URL($url,state) {
+            head {
+                puts "head: $line" 
+                if {[regexp {([^:]+):[ ]+(.*)} $line x name value]} {
+                    set URL($url,head,$name) $value
+                }
+            }
+            html { 
+                lappend URL($url,line) $line 
+#                puts "body: $line"
+            }
+            skip {
+                close $sock
+                RobotSave $url
+                RobotRestart
+            }
+        }
+    } else {
+        set URL($url,state) skip
+        if {[info exists URL($url,head,Content-type)]} {
+            if {![string compare $URL($url,head,Content-type) text/html]} {
+                set URL($url,state) html
+            }
+        }
+    }
+}
+
+proc RobotConnect {url sock} {
+    global URL
+
+    fileevent $sock readable [list RobotRead $url $sock]
+    puts $sock "GET $URL($url,path) HTTP/1.0"
+    puts $sock ""
+    flush $sock
+}
+
+proc RobotNop {} {
+
+}
+
+proc RobotGetUrl {url phost} {
+    global URL
+    set port 80
+    puts "---------"
+    puts $url
+    if {[regexp {([^:]+)://([^/]+)([^ ?]*)} $url x method host path]} {
+        puts "method=$method host=$host path=$path"
+    } else {
+        return -1
+    }
+    set URL($url,method) $method
+    set URL($url,host) $host
+    set URL($url,port) $port
+    set URL($url,path) $path
+    set URL($url,state) head
+    if [catch {set sock [socket -async $host $port]}] {
+        return -1
+    }
+    fconfigure $sock -translation {auto crlf}
+    RobotConnect $url $sock
+
+    return 0
+}
+
+#RobotGetUrl http://www.dtv.dk/ {}
+RobotRestart
+vwait forever
+
diff --git a/tclmain.c b/tclmain.c
new file mode 100644 (file)
index 0000000..e99c71e
--- /dev/null
+++ b/tclmain.c
@@ -0,0 +1,58 @@
+/* 
+ * $Id: tclmain.c,v 1.1 1996/08/06 14:04:22 adam Exp $
+ */
+
+#include "tclrobot.h"
+
+/*
+ * The following variable is a special hack that is needed in order for
+ * Sun shared libraries to be used for Tcl.
+ */
+
+extern int matherr();
+int *tclDummyMathPtr = (int *) matherr;
+
+int main(int argc, char **argv)
+{
+    Tcl_Main(argc, argv, Tcl_AppInit);
+    return 0;                  /* Needed only to prevent compiler warning. */
+}
+
+int Tcl_AppInit(Tcl_Interp *interp)
+{
+    if (Tcl_Init(interp) == TCL_ERROR) {
+       return TCL_ERROR;
+    }
+
+    if (TclRobot_Init(interp) == TCL_ERROR) {
+       return TCL_ERROR;
+    }
+    Tcl_StaticPackage(interp, "TclRobot", TclRobot_Init,
+            (Tcl_PackageInitProc *) NULL);
+
+    /*
+     * Call the init procedures for included packages.  Each call should
+     * look like this:
+     *
+     * if (Mod_Init(interp) == TCL_ERROR) {
+     *     return TCL_ERROR;
+     * }
+     *
+     * where "Mod" is the name of the module.
+     */
+
+    /*
+     * Call Tcl_CreateCommand for application-specific commands, if
+     * they weren't already created by the init procedures called above.
+     */
+
+    /*
+     * Specify a user-specific startup file to invoke if the application
+     * is run interactively.  Typically the startup file is "~/.apprc"
+     * where "app" is the name of the application.  If this line is deleted
+     * then no user-specific startup file will be run under any conditions.
+     */
+
+    Tcl_SetVar(interp, "tcl_rcFileName", "~/.tclshrc", TCL_GLOBAL_ONLY);
+    return TCL_OK;
+}
diff --git a/tclrobot.h b/tclrobot.h
new file mode 100644 (file)
index 0000000..a582f62
--- /dev/null
@@ -0,0 +1,9 @@
+/*
+ * $Id: tclrobot.h,v 1.1 1996/08/06 14:04:22 adam Exp $
+ */
+#include <tcl.h>
+
+int htmlSwitch (ClientData clientData, Tcl_Interp *interp,
+                int argc, char **argv);
+
+int TclRobot_Init (Tcl_Interp *interp);