# Makefile for Tcl Web Robot
-# $Id: Makefile.in,v 1.1 1996/08/06 14:04:22 adam Exp $
+# $Id: Makefile.in,v 1.2 1998/10/15 12:30:59 adam Exp $
SHELL=/bin/sh
# Version
INSTALL_PROGRAM=@INSTALL_PROGRAM@
INSTALL_DATA=@INSTALL_DATA@
RANLIB=@RANLIB@
-SHLIB_LD=@SHLIB_LD@
+
+SHLIB_LD = @SHLIB_LD@
+SHLIB_CFLAGS = @SHLIB_CFLAGS@
+SHLIB_SUFFIX = @SHLIB_SUFFIX@
+SHLIB_VERSION = @SHLIB_VERSION@
O=hswitch.o init.o
ar cr tclrobot.a $(O)
$(RANLIB) tclrobot.a
-libtclrobot.so: $(O)
- $(SHLIB_LD) -o libtclrobot.so $(O)
- $(RANLIB) libtclrobot.so
+tclrobot$(SHLIB_SUFFIX): $(O)
+ $(SHLIB_LD) -o tclrobot$(SHLIB_SUFFIX) $(O)
.c.o:
- $(CC) -c $(CFLAGS) $(DEFS) $<
+ $(CC) -c $(CFLAGS) $(SHLIB_CFLAGS) $(DEFS) $<
clean:
rm -f tclrobot core *.out *.o *.a *.so config.*
dnl Web robot toolkit for tcl
-dnl (c) Index Data 1996
+dnl (c) Index Data 1996-1998
dnl See the file LICENSE for details.
-dnl $Id: configure.in,v 1.1 1996/08/06 14:04:22 adam Exp $
+dnl $Id: configure.in,v 1.2 1998/10/15 12:31:00 adam Exp $
AC_INIT(tclrobot.h)
CC=${CC-cc}
dnl ------ Substitutions
AC_SUBST(TKLIB)
AC_SUBST(TCLINC)
AC_SUBST(TKINC)
+AC_SUBST(SHLIB_CFLAGS)
AC_SUBST(SHLIB_LD)
+AC_SUBST(SHLIB_SUFFIX)
+AC_SUBST(SHLIB_VERSION)
AC_SUBST(RANLIB)
dnl ------ Preliminary settings
AC_PROG_INSTALL
AC_PREFIX_PROGRAM(tclsh)
+if test "x$prefix" = xNONE; then
+ AC_PREFIX_PROGRAM(tclsh8.1)
+fi
+if test "x$prefix" = xNONE; then
+ AC_PREFIX_PROGRAM(tclsh8.0)
+fi
+if test "x$prefix" = xNONE; then
+ AC_PREFIX_PROGRAM(tclsh7.6)
+fi
+if test "x$prefix" = xNONE; then
+ AC_PREFIX_PROGRAM(tclsh7.5)
+fi
AC_STDC_HEADERS
if test "$ac_cv_header_stdc" = no; then
AC_MSG_WARN(Your system doesn't seem to support ANSI C)
fi
if test -r ${tryprefix}/lib/tclConfig.sh; then
AC_MSG_CHECKING(for Tcl)
- source ${tryprefix}/lib/tclConfig.sh
+ . ${tryprefix}/lib/tclConfig.sh
TCLLIB="${TCL_LIB_SPEC} ${TCL_LIBS}"
TCLINC=-I${TCL_PREFIX}/include
RANLIB=${TCL_RANLIB}
- SHLIB_LD=${TCL_SHLIB_LD}
+ SHLIB_CFLAGS=$TCL_SHLIB_CFLAGS
+ SHLIB_LD=$TCL_SHLIB_LD
+ SHLIB_SUFFIX=$TCL_SHLIB_SUFFIX
+ SHLIB_VERSION=$TCL_SHLIB_VERSION
AC_MSG_RESULT($TCL_VERSION)
else
AC_MSG_WARN(Didn't find Tcl)
dnl ------ look for Tk
AC_MSG_CHECKING(for Tk)
if test -r ${tryprefix}/lib/tkConfig.sh; then
- source ${tryprefix}/lib/tkConfig.sh
+ . ${tryprefix}/lib/tkConfig.sh
AC_MSG_RESULT($TK_VERSION)
TKINC=${TK_XINCLUDES}
TKLIB="${TK_PREFIX}/lib/${TK_LIB_FILE} ${TK_LIBS}"
/*
- * $Id: hswitch.c,v 1.1 1996/08/06 14:04:22 adam Exp $
+ * $Id: hswitch.c,v 1.2 1998/10/15 12:31:01 adam Exp $
*/
#include <assert.h>
#include <string.h>
#define SPACECHR " \t\r\n\f"
+#define DEBUG(x)
+
static int skipSpace (const char *cp)
{
int i = 0;
static int skipTag (const char *cp, char *dst)
{
int i;
-
- for (i=0; i<TAG_MAX_LEN-1 && cp[i] && !strchr (SPACECHR "/>=", cp[i]); i++)
- dst[i] = tolower(cp[i]);
- dst[i] = '\0';
+ int j = 0;
+
+ for (i=0; cp[i] && !strchr (SPACECHR "/>=", cp[i]); i++)
+ if (j < TAG_MAX_LEN-1)
+ {
+ dst[j] = tolower(cp[j]);
+ j++;
+ }
+ dst[j] = '\0';
return i;
}
if (tag && !tag->level)
{
strcpy (tag->name, tagString);
+ DEBUG(printf ("------ consuming this %s\n", tag->name));
tag->tagParms = NULL;
nParms = &tag->tagParms;
}
i = skipSpace (cp);
while (cp[i] && cp[i] != '>')
{
- int nor = skipParm (cp+i, parm_name, &parm_value);
+ int nor = skipParm (cp+i, parm_name, &parm_value);
i += nor;
+ if (nor && tag)
+ {
+ DEBUG(printf ("parm_name=%s parm_value=%s\n", parm_name, parm_value));
+ }
if (nor && tag && !tag->level)
{
*nParms = malloc (sizeof(**nParms));
assert (*nParms);
- (*nParms)->next = NULL;
strcpy ((*nParms)->name, parm_name);
(*nParms)->value = parm_value;
+ (*nParms)->next = NULL;
+ nParms = &(*nParms)->next;
}
else
{
struct tagParm *tp0 = tp;
sprintf (vname, "parm(%s)", tp->name);
+ DEBUG(printf ("vname=%s\n", vname));
Tcl_SetVar (interp, vname, tp->value ? tp->value : "",0);
tp = tp->next;
cp++;
cp += skipTag (cp, tagStr);
tagI = tagLookup (tags, noTags, tagStr);
+ DEBUG(printf ("tagStr = %s tagI = %d\n", tagStr, tagI));
cp += tagStart (tagI >= 0 ? tags+tagI : NULL, tagStr, cp);
}
- else if (cp[0] == '<') /* end tag */
+ else if (cp[0] == '<' && cp[1] == '/')/* end tag */
{
char tagStr[TAG_MAX_LEN];
const char *body_end = cp;
/*
- * $Id: init.c,v 1.1 1996/08/06 14:04:22 adam Exp $
+ * $Id: init.c,v 1.2 1998/10/15 12:31:02 adam Exp $
*/
#include "tclrobot.h"
-int TclRobot_Init (Tcl_Interp *interp)
+int Tclrobot_Init (Tcl_Interp *interp)
{
Tcl_CreateCommand (interp, "htmlSwitch", htmlSwitch, (ClientData) NULL,
(Tcl_CmdDeleteProc *) NULL);
-#
-# $Id: robot.tcl,v 1.1 1996/08/06 14:04:22 adam Exp $
+#!/usr/bin/tclsh
+# $Id: robot.tcl,v 1.2 1998/10/15 12:31:03 adam Exp $
#
proc RobotFileNext {area} {
- if {[catch {set ns [glob $area/*]}]} {
+ if {[catch {set ns [glob ${area}/*]}]} {
return {}
}
set off [string first / $area]
set out [RobotFileOpen visited $URL($url,host) $URL($url,path)]
set ti 0
if {[info exists URL($url,line)]} {
- set htmlContent [join $URL($url,line)]
+ set htmlContent [join $URL($url,line) \n]
htmlSwitch $htmlContent \
- title {
+ title {
if {!$ti} {
headSave $url $out $body
set ti 1
}
+ } body {
+ regsub -all -nocase {<script.*</script>} $body {} abody
+ regsub -all {<[^\>]+>} $abody {} nbody
+ puts $out "<body>"
+ puts $out $nbody
+ puts $out "</body>"
} a {
- if {![info exists parm(href)]} continue
+ if {![info exists parm(href)]} {
+ puts "no href"
+ continue
+ }
if {!$ti} {
headSave $url $out "untitled"
set ti 1
set host $URL($url,host)
set path $hpath
}
- if {![regexp {\.dk$} $host]} continue
+ if {![regexp {\.indexdata\.dk$} $host]} continue
} else {
continue
}
set host $URL($url,host)
set method http
} else {
- puts " href=$parm(href)"
set ext [file extension $URL($url,path)]
if {[string compare $ext {}]} {
set dpart [file dirname $URL($url,path)]
}
}
} else {
- set URL($url,state) skip
+ set URL($url,state) html
if {[info exists URL($url,head,Content-type)]} {
if {![string compare $URL($url,head,Content-type) text/html]} {
set URL($url,state) html
return 0
}
-#RobotGetUrl http://www.dtv.dk/ {}
+if {![llength [info commands htmlSwitch]]} {
+ set e [info sharedlibextension]
+ if {[catch {load ./tclrobot$e}]} {
+ load tclrobot$e
+ }
+}
+
+if {![llength $argv]} {
+ puts "Tclrobot: specify one or more sites."
+ exit 1
+}
+foreach site $argv {
+ set x [RobotFileOpen unvisited $site /]
+ close $x
+}
RobotRestart
vwait forever
/*
- * $Id: tclmain.c,v 1.1 1996/08/06 14:04:22 adam Exp $
+ * $Id: tclmain.c,v 1.2 1998/10/15 12:31:04 adam Exp $
*/
#include "tclrobot.h"
return TCL_ERROR;
}
- if (TclRobot_Init(interp) == TCL_ERROR) {
+ if (Tclrobot_Init(interp) == TCL_ERROR) {
return TCL_ERROR;
}
- Tcl_StaticPackage(interp, "TclRobot", TclRobot_Init,
+ Tcl_StaticPackage(interp, "TclRobot", Tclrobot_Init,
(Tcl_PackageInitProc *) NULL);
/*
/*
- * $Id: tclrobot.h,v 1.1 1996/08/06 14:04:22 adam Exp $
+ * $Id: tclrobot.h,v 1.2 1998/10/15 12:31:05 adam Exp $
*/
#include <tcl.h>
int htmlSwitch (ClientData clientData, Tcl_Interp *interp,
int argc, char **argv);
-int TclRobot_Init (Tcl_Interp *interp);
+int Tclrobot_Init (Tcl_Interp *interp);