quote the domain when given on the command line so that your
shell doesn't expand this. This option may be repeated thus
allowing you to specify many "allowed" domains.
+
+ -r rules Specifies a file with rules. See the rules file for an
+ example.
Example 1: Harvest three links away from www.somwhere.com world-wide:
./robot.tcl -c 3 http://www.somwhere.com/
ac_default_prefix=/usr/local
# Any additions from configure.in:
ac_help="$ac_help
- --with-tclconfig Path for tclConfig.sh/tkConfig.sh"
+ --with-tclconfig Path for tclConfig.sh"
# Initialize some variables set by options.
# The variables have the same names as the options, with
tryprefix=${prefix}
prefix=${saveprefix}
fi
-if test -r ${tclconfig}/tclConfig.sh; then
- echo $ac_n "checking for Tcl""... $ac_c" 1>&6
-echo "configure:942: checking for Tcl" >&5
- . ${tclconfig}/tclConfig.sh
+echo $ac_n "checking for Tcl""... $ac_c" 1>&6
+echo "configure:941: checking for Tcl" >&5
+if test -d ${tclconfig}; then
+ tclconfig=${tclconfig}/tclConfig.sh
+fi
+if test -r ${tclconfig}; then
+ . ${tclconfig}
TCLLIB="${TCL_LIB_SPEC} ${TCL_LIBS}"
- TCLINC=-I${TCL_PREFIX}/include
+
+ if test -d ${TCL_PREFIX}/include/tcl${TCL_VERSION}; then
+ TCLINC=-I${TCL_PREFIX}/include/tcl${TCL_VERSION}
+ else
+ TCLINC=-I${TCL_PREFIX}/include
+ fi
RANLIB=$TCL_RANLIB
SHLIB_CFLAGS=$TCL_SHLIB_CFLAGS
SHLIB_LD=$TCL_SHLIB_LD
echo "$ac_t""$TCL_VERSION" 1>&6
CC=$TCL_CC
else
- # Extract the first word of "gcc", so it can be a program name with args.
-set dummy gcc; ac_word=$2
-echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
-echo "configure:957: checking for $ac_word" >&5
-if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then
- echo $ac_n "(cached) $ac_c" 1>&6
-else
- if test -n "$CC"; then
- ac_cv_prog_CC="$CC" # Let the user override the test.
-else
- IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
- ac_dummy="$PATH"
- for ac_dir in $ac_dummy; do
- test -z "$ac_dir" && ac_dir=.
- if test -f $ac_dir/$ac_word; then
- ac_cv_prog_CC="gcc"
- break
- fi
- done
- IFS="$ac_save_ifs"
-fi
-fi
-CC="$ac_cv_prog_CC"
-if test -n "$CC"; then
- echo "$ac_t""$CC" 1>&6
-else
- echo "$ac_t""no" 1>&6
-fi
-
-if test -z "$CC"; then
- # Extract the first word of "cc", so it can be a program name with args.
-set dummy cc; ac_word=$2
-echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
-echo "configure:987: checking for $ac_word" >&5
-if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then
- echo $ac_n "(cached) $ac_c" 1>&6
-else
- if test -n "$CC"; then
- ac_cv_prog_CC="$CC" # Let the user override the test.
-else
- IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
- ac_prog_rejected=no
- ac_dummy="$PATH"
- for ac_dir in $ac_dummy; do
- test -z "$ac_dir" && ac_dir=.
- if test -f $ac_dir/$ac_word; then
- if test "$ac_dir/$ac_word" = "/usr/ucb/cc"; then
- ac_prog_rejected=yes
- continue
- fi
- ac_cv_prog_CC="cc"
- break
- fi
- done
- IFS="$ac_save_ifs"
-if test $ac_prog_rejected = yes; then
- # We found a bogon in the path, so make sure we never use it.
- set dummy $ac_cv_prog_CC
- shift
- if test $# -gt 0; then
- # We chose a different compiler from the bogus one.
- # However, it has the same basename, so the bogon will be chosen
- # first if we set CC to just the basename; use the full file name.
- shift
- set dummy "$ac_dir/$ac_word" "$@"
- shift
- ac_cv_prog_CC="$@"
- fi
-fi
-fi
-fi
-CC="$ac_cv_prog_CC"
-if test -n "$CC"; then
- echo "$ac_t""$CC" 1>&6
-else
- echo "$ac_t""no" 1>&6
-fi
-
- if test -z "$CC"; then
- case "`uname -s`" in
- *win32* | *WIN32*)
- # Extract the first word of "cl", so it can be a program name with args.
-set dummy cl; ac_word=$2
-echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
-echo "configure:1038: checking for $ac_word" >&5
-if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then
- echo $ac_n "(cached) $ac_c" 1>&6
-else
- if test -n "$CC"; then
- ac_cv_prog_CC="$CC" # Let the user override the test.
-else
- IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
- ac_dummy="$PATH"
- for ac_dir in $ac_dummy; do
- test -z "$ac_dir" && ac_dir=.
- if test -f $ac_dir/$ac_word; then
- ac_cv_prog_CC="cl"
- break
- fi
- done
- IFS="$ac_save_ifs"
-fi
-fi
-CC="$ac_cv_prog_CC"
-if test -n "$CC"; then
- echo "$ac_t""$CC" 1>&6
-else
- echo "$ac_t""no" 1>&6
-fi
- ;;
- esac
- fi
- test -z "$CC" && { echo "configure: error: no acceptable cc found in \$PATH" 1>&2; exit 1; }
-fi
-
-echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6
-echo "configure:1070: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5
-
-ac_ext=c
-# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
-ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
-cross_compiling=$ac_cv_prog_cc_cross
-
-cat > conftest.$ac_ext << EOF
-
-#line 1081 "configure"
-#include "confdefs.h"
-
-main(){return(0);}
-EOF
-if { (eval echo configure:1086: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
- ac_cv_prog_cc_works=yes
- # If we can't run a trivial program, we are probably using a cross compiler.
- if (./conftest; exit) 2>/dev/null; then
- ac_cv_prog_cc_cross=no
- else
- ac_cv_prog_cc_cross=yes
- fi
-else
- echo "configure: failed program was:" >&5
- cat conftest.$ac_ext >&5
- ac_cv_prog_cc_works=no
-fi
-rm -fr conftest*
-ac_ext=c
-# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
-ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
-cross_compiling=$ac_cv_prog_cc_cross
-
-echo "$ac_t""$ac_cv_prog_cc_works" 1>&6
-if test $ac_cv_prog_cc_works = no; then
- { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; }
-fi
-echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6
-echo "configure:1112: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5
-echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6
-cross_compiling=$ac_cv_prog_cc_cross
-
-echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6
-echo "configure:1117: checking whether we are using GNU C" >&5
-if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then
- echo $ac_n "(cached) $ac_c" 1>&6
-else
- cat > conftest.c <<EOF
-#ifdef __GNUC__
- yes;
-#endif
-EOF
-if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:1126: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then
- ac_cv_prog_gcc=yes
-else
- ac_cv_prog_gcc=no
-fi
-fi
-
-echo "$ac_t""$ac_cv_prog_gcc" 1>&6
-
-if test $ac_cv_prog_gcc = yes; then
- GCC=yes
-else
- GCC=
-fi
-
-ac_test_CFLAGS="${CFLAGS+set}"
-ac_save_CFLAGS="$CFLAGS"
-CFLAGS=
-echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6
-echo "configure:1145: checking whether ${CC-cc} accepts -g" >&5
-if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then
- echo $ac_n "(cached) $ac_c" 1>&6
-else
- echo 'void f(){}' > conftest.c
-if test -z "`${CC-cc} -g -c conftest.c 2>&1`"; then
- ac_cv_prog_cc_g=yes
-else
- ac_cv_prog_cc_g=no
-fi
-rm -f conftest*
-
-fi
-
-echo "$ac_t""$ac_cv_prog_cc_g" 1>&6
-if test "$ac_test_CFLAGS" = set; then
- CFLAGS="$ac_save_CFLAGS"
-elif test $ac_cv_prog_cc_g = yes; then
- if test "$GCC" = yes; then
- CFLAGS="-g -O2"
- else
- CFLAGS="-g"
- fi
-else
- if test "$GCC" = yes; then
- CFLAGS="-O2"
- else
- CFLAGS=
- fi
-fi
-
- SHLIB_CFLAGS=""
- SHLIB_LD="shared-linker"
- SHLIB_SUFFIX=""
- SHLIB_VERSION=""
- # Extract the first word of "ranlib", so it can be a program name with args.
-set dummy ranlib; ac_word=$2
-echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
-echo "configure:1183: checking for $ac_word" >&5
-if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then
- echo $ac_n "(cached) $ac_c" 1>&6
-else
- if test -n "$RANLIB"; then
- ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
-else
- IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
- ac_dummy="$PATH"
- for ac_dir in $ac_dummy; do
- test -z "$ac_dir" && ac_dir=.
- if test -f $ac_dir/$ac_word; then
- ac_cv_prog_RANLIB="ranlib"
- break
- fi
- done
- IFS="$ac_save_ifs"
- test -z "$ac_cv_prog_RANLIB" && ac_cv_prog_RANLIB=":"
-fi
-fi
-RANLIB="$ac_cv_prog_RANLIB"
-if test -n "$RANLIB"; then
- echo "$ac_t""$RANLIB" 1>&6
-else
- echo "$ac_t""no" 1>&6
-fi
-
- echo $ac_n "checking for Tcl""... $ac_c" 1>&6
-echo "configure:1211: checking for Tcl" >&5
- TCL_VERSION=nope
- for i in "" 7.4 7.5 7.6 8.0; do
- if test -r ${tryprefix}/lib/libtcl${i}.a; then
- TCL_VERSION=$i
- fi
- done
- if test "$TCL_VERSION" = nope; then
- echo "configure: warning: Didn't find Tcl" 1>&2
- TCL_LIB_SPEC=-ltcl
- TCLINC=" # -I.. Tcl/Tk include path"
- else
- if test "$TCL_VERSION" = ""; then
- echo "$ac_t""7.3" 1>&6
- else
- echo "$ac_t""$TCL_VERSION" 1>&6
- fi
- TCL_LIB_SPEC=${tryprefix}/lib/libtcl${TCL_VERSION}.a
- TCLINC=-I${tryprefix}/include
- fi
- TCLLIB="${TCL_LIB_SPEC} ${LIBS} -lm"
+ echo "$ac_t""Not found" 1>&6
fi
trap '' 1 2 15
cat > confcache <<\EOF
s%@TCLSH8.0@%$TCLSH8.0%g
s%@TCLSH7.6@%$TCLSH7.6%g
s%@TCLSH7.5@%$TCLSH7.5%g
-s%@RANLIB@%$RANLIB%g
CEOF
EOF
-dnl (c) Index Data 1996-2000
+dnl (c) Index Data 1996-2001
dnl See the file LICENSE for details.
-dnl $Id: configure.in,v 1.6 2001/01/23 12:05:07 adam Exp $
+dnl $Id: configure.in,v 1.7 2001/10/26 13:26:11 adam Exp $
AC_INIT(hswitch.c)
dnl ------ Substitutions
AC_SUBST(CC)
dnl ------ look for Tcl
tclconfig=NONE
tryprefix=NONE
-AC_ARG_WITH(tclconfig, [ --with-tclconfig Path for tclConfig.sh/tkConfig.sh], [tclconfig=$withval])
+AC_ARG_WITH(tclconfig, [ --with-tclconfig Path for tclConfig.sh], [tclconfig=$withval])
if test "x$tclconfig" = xNONE; then
saveprefix=${prefix}
AC_PREFIX_PROGRAM(tclsh)
tryprefix=${prefix}
prefix=${saveprefix}
fi
-if test -r ${tclconfig}/tclConfig.sh; then
- AC_MSG_CHECKING(for Tcl)
- . ${tclconfig}/tclConfig.sh
+AC_MSG_CHECKING(for Tcl)
+if test -d ${tclconfig}; then
+ tclconfig=${tclconfig}/tclConfig.sh
+fi
+if test -r ${tclconfig}; then
+ . ${tclconfig}
TCLLIB="${TCL_LIB_SPEC} ${TCL_LIBS}"
- TCLINC=-I${TCL_PREFIX}/include
+
+ if test -d ${TCL_PREFIX}/include/tcl${TCL_VERSION}; then
+ TCLINC=-I${TCL_PREFIX}/include/tcl${TCL_VERSION}
+ else
+ TCLINC=-I${TCL_PREFIX}/include
+ fi
RANLIB=$TCL_RANLIB
SHLIB_CFLAGS=$TCL_SHLIB_CFLAGS
SHLIB_LD=$TCL_SHLIB_LD
AC_MSG_RESULT($TCL_VERSION)
CC=$TCL_CC
else
- AC_PROG_CC
- SHLIB_CFLAGS=""
- SHLIB_LD="shared-linker"
- SHLIB_SUFFIX=""
- SHLIB_VERSION=""
- AC_PROG_RANLIB
- AC_MSG_CHECKING(for Tcl)
- TCL_VERSION=nope
- for i in "" 7.4 7.5 7.6 8.0; do
- if test -r ${tryprefix}/lib/libtcl${i}.a; then
- TCL_VERSION=$i
- fi
- done
- if test "$TCL_VERSION" = nope; then
- AC_MSG_WARN(Didn't find Tcl)
- TCL_LIB_SPEC=-ltcl
- TCLINC=" # -I.. Tcl/Tk include path"
- else
- if test "$TCL_VERSION" = ""; then
- AC_MSG_RESULT(7.3)
- else
- AC_MSG_RESULT($TCL_VERSION)
- fi
- TCL_LIB_SPEC=${tryprefix}/lib/libtcl${TCL_VERSION}.a
- TCLINC=-I${tryprefix}/include
- fi
- TCLLIB="${TCL_LIB_SPEC} ${LIBS} -lm"
+ AC_MSG_RESULT(Not found)
fi
AC_OUTPUT(Makefile)
#!/usr/bin/tclsh
-# $Id: robot.tcl,v 1.20 2001/06/29 22:25:55 adam Exp $
+# $Id: robot.tcl,v 1.21 2001/10/26 13:26:11 adam Exp $
#
proc RobotFileNext1 {area lead} {
# puts "RobotFileNext1 area=$area lead=$lead"
}
proc RobotFileNext {area} {
- global robotSeq global idleTime ns
+ global robotSeq global idletime ns
# puts "RobotFileNext robotSeq=$robotSeq"
if {$robotSeq < 0} {
if {![string length $n]} {
set robotSeq -1
flush stdout
- puts "------------ N E X T R O U N D --------"
+ puts "Round robin"
return wait
}
incr robotSeq
proc RobotStart {} {
global URL
- global robotsRunning robotsMax idleTime
+ global robotsRunning robotsMax idletime
# puts "RobotStart"
while {1} {
}
incr robotsRunning
if {[string compare $url wait] == 0} {
- after $idleTime RobotRR
+ after $idletime RobotRR
return
}
set r [RobotGetUrl $url {}]
}
proc RobotHref {url hrefx hostx pathx} {
- global URL domains
+ global URL domains debuglevel
upvar $hrefx href
upvar $hostx host
upvar $pathx path
- puts "Ref url = $url href=$href"
+ if {$debuglevel > 1} {
+ puts "Ref input url = $url href=$href"
+ }
if {[string first { } $href] >= 0} {
return 0
}
regsub -all {~} $path {%7E} path
set href "$method://$host$path"
- puts "Ref href = $href"
- return 1
+
+ if {$debuglevel > 1} {
+ puts "Ref result = $href"
+ }
+ return [checkrule url $href]
}
proc RobotError {url code} {
global URL
- puts "Bad URL $url, $code"
+ puts "Bad URL $url (code $code)"
set fromurl {}
set distance -1
if {[RobotFileExist unvisited $URL($url,hostport) $URL($url,path)]} {
}
proc RobotTextHtml {url out} {
- global URL maxDistance
+ global URL maxdistance
set distance 0
- if {$maxDistance < 1000 && [info exists URL($url,dist)]} {
+ if {$maxdistance < 1000 && [info exists URL($url,dist)]} {
set distance [expr $URL($url,dist) + 1]
}
htmlSwitch $URL($url,buf) \
puts "no href"
continue
}
- if {[expr $distance <= $maxDistance]} {
+ if {[expr $distance <= $maxdistance]} {
set href [string trim $parm(href)]
if {![RobotHref $url href host path]} continue
puts "no href"
continue
}
- if {[expr $distance <= $maxDistance]} {
+ if {[expr $distance <= $maxdistance]} {
set href [string trim $parm(href)]
if {![RobotHref $url href host path]} continue
puts -nonewline $out $URL($url,buf)
RobotFileClose $out
+ if {![checkrule mime $URL($url,head,content-type)]} {
+ RobotError $url mimedeny
+ return
+ }
+
set out [RobotFileOpen visited $URL($url,hostport) $URL($url,path)]
puts $out "<zmbot>"
headSave $url $out
puts "Parsing $url distance=$distance"
switch $URL($url,head,content-type) {
- text/html {
- if {[string length $distance]} {
- RobotTextHtml $url $out
- }
- }
- text/plain {
- RobotTextPlain $url $out
- }
- application/pdf {
- set pdff [open test.pdf w]
- puts -nonewline $pdff $URL($url,buf)
- close $pdff
- }
+ text/html {
+ if {[string length $distance]} {
+ RobotTextHtml $url $out
+ }
+ }
+ text/plain {
+ RobotTextPlain $url $out
+ }
+ application/pdf {
+ set pdff [open test.pdf w]
+ puts -nonewline $pdff $URL($url,buf)
+ close $pdff
+ }
}
puts $out "</zmbot>"
RobotFileClose $out
}
proc RobotReadHeader {url sock} {
- global URL
+ global URL debuglevel
- puts "RobotReadHeader $url"
+ if {$debuglevel > 1} {
+ puts "HTTP head $url"
+ }
if {[catch {set buffer [read $sock 2148]}]} {
RobotError $url 404
RobotRestart $url $sock
set URL($url,head,[string tolower $name]) [string trim $value]
}
}
- puts "code = $code"
+ puts "HTTP CODE $code"
set URL($url,state) skip
switch $code {
301 {
proc RobotGetUrl {url phost} {
global URL robotsRunning
flush stdout
- puts "RobotGetUrl --------- robotsRunning=$robotsRunning url=$url"
+ puts "Retrieve $robotsRunning url=$url"
if {![regexp {([^:]+)://([^/]+)(.*)} $url x method hostport path]} {
return -1
}
}
}
-set agent "zmbot/0.0"
+set agent "zmbot/0.1"
if {![catch {set os [exec uname -s -r]}]} {
set agent "$agent ($os)"
}
set robotsRunning 0
set robotSeq 0
set workdir [pwd]
-set idleTime 60000
+set idletime 60000
set acceptLanguage {}
set i 0
set l [llength $argv]
-# For testing only
-if {0} {
- set url "http://www.sportsfiskeren.dk/sportsfiskeren/corner/index.htm"
- set href "../../data/../../data2/newsovs.asp?Mode=5"
-
- set URL($url,path) /sportsfiskeren/corner/index.htm
- set URL($url,hostport) www.sportsfiskeren.dk
- RobotHref $url href host path
- exit 0
-}
-
if {$l < 2} {
- puts {tclrobot: usage [-j jobs] [-i idle] [-c count] [-d domain] [url ..]}
+ puts {tclrobot: usage:}
+ puts {tclrobot [-j jobs] [-i idle] [-c count] [-d domain] [-r rules] [url ..]}
puts " Example: -c 3 -d '*.dk' http://www.indexdata.dk/"
exit 1
}
+# Rules: allow, deny, url
+set debuglevel 0
+
+proc checkrule {type this} {
+ global alrules
+ global debuglevel
+
+ if {$debuglevel > 3} {
+ puts "CHECKRULE $type $this"
+ }
+ if {[info exist alrules]} {
+ foreach l $alrules {
+ if {$debuglevel > 3} {
+ puts "consider $l"
+ }
+ # consider type
+ if {[lindex $l 1] != $type} continue
+ # consider mask
+ if {![string match [lindex $l 2] $this]} continue
+ # OK, we have a match
+ if {[lindex $l 0] == "allow"} {
+ if {$debuglevel > 3} {
+ puts "CHECKRULE MATH OK"
+ }
+ return 1
+ } else {
+ if {$debuglevel > 3} {
+ puts "CHECKFULE MATCH FAIL"
+ }
+ return 0
+ }
+ }
+ }
+ if {$debuglevel > 3} {
+ puts "CHECKRULE MATH OK"
+ }
+ return 1
+}
+
+
+proc url {href} {
+ global debuglevel
+
+ if {[RobotHref http://www.indexdata.dk/ href host path]} {
+ if {![RobotFileExist visited $host $path]} {
+ set outf [RobotFileOpen unvisited $host $path]
+ RobotWriteRecord $outf href 0
+ RobotFileClose $outf
+ }
+ }
+}
+
+proc deny {type stuff} {
+ global alrules
+
+ lappend alrules [list deny $type $stuff]
+}
+
+proc allow {type stuff} {
+ global alrules
+
+ lappend alrules [list allow $type $stuff]
+}
+
+proc debug {level} {
+ global debuglevel
+
+ set debuglevel $level
+}
+
+# Parse options
+
while {$i < $l} {
set arg [lindex $argv $i]
switch -glob -- $arg {
}
}
-c* {
- set maxDistance [string range $arg 2 end]
- if {![string length $maxDistance]} {
- set maxDistance [lindex $argv [incr i]]
+ set maxdistance [string range $arg 2 end]
+ if {![string length $maxdistance]} {
+ set maxdistance [lindex $argv [incr i]]
}
}
-d* {
lappend domains $dom
}
-i* {
- set idleTime [string range $arg 2 end]
- if {![string length $idleTime]} {
- set idleTime [lindex $argv [incr i]]
+ set idletime [string range $arg 2 end]
+ if {![string length $idletime]} {
+ set idletime [lindex $argv [incr i]]
}
}
-l* {
set acceptLanguage [lindex $argv [incr i]]
}
}
+ -r* {
+ set rfile [string range $arg 2 end]
+ if {![string length $rfile]} {
+ set rfile [lindex $argv [incr i]]
+ }
+ source $rfile
+ }
default {
set href $arg
if {[RobotHref http://www.indexdata.dk/ href host path]} {
if {![info exist domains]} {
set domains {*}
}
-if {![info exist maxDistance]} {
- set maxDistance 50
+if {![info exist maxdistance]} {
+ set maxdistance 50
}
if {![info exist robotsMax]} {
set robotsMax 5
}
puts "domains=$domains"
-puts "max distance=$maxDistance"
+puts "max distance=$maxdistance"
puts "max jobs=$robotsMax"
RobotStart
--- /dev/null
+# sample rules $Id: rules,v 1.1 2001/10/26 13:26:11 adam Exp $
+
+url http://www.indexdata.dk
+
+allow url http://www.indexdata.dk/software*
+allow url http://www.indexdata.dk/yaz*
+allow url http://www.indexdata.dk/
+deny url *
+
+allow mime text/html
+allow mime application/pdf
+deny mime text/plain
+deny mime *
+
+set maxdistance 4