#!/usr/bin/tclsh
-# $Id: robot.tcl,v 1.10 2001/01/23 09:20:32 adam Exp $
+# $Id: robot.tcl,v 1.11 2001/01/23 11:26:43 adam Exp $
#
proc RobotFileNext1 {area lead} {
puts "RobotFileNext1 area=$area lead=$lead"
upvar $pathx path
puts "Ref url = $url href=$href"
+
+ if {[string first { } $href] >= 0} {
+ return 0
+ }
+ if {[string first {?} $url] >= 0 && [string first {?} $href] >= 0} {
+ return 0
+ }
# get method (if any)
if {![regexp {^([^/:]+):(.*)} $href x method hpath]} {
set hpath $href
set agent "zmbot/0.0"
if {![catch {set os [exec uname -s -r]}]} {
set agent "$agent ($os)"
- puts "agent: $agent"
}
+puts "agent: $agent"
+
proc bgerror {m} {
global errorInfo
puts "BGERROR $m"
}
set robotsRunning 0
-set robotsMax 5
set robotSeq 0
set workdir [pwd]
set idleTime 60000
-if {[llength $argv] < 2} {
- puts "Tclrobot: usage <range> <domain> <start>"
- puts " Example: 3 '*.indexdata.dk' http://www.indexdata.dk/"
+set i 0
+set l [llength $argv]
+
+if {$l < 2} {
+ puts {tclrobot: usage [-j jobs] [-c count] [-d domain] [url ..]}
+ puts " Example: -c 3 -d '*.dk' http://www.indexdata.dk/"
exit 1
}
-set maxDistance [lindex $argv 0]
-set domains [lindex $argv 1]
-foreach href [lindex $argv 2] {
- if {[RobotHref http://www.indexdata.dk/ href host path]} {
- if {![RobotFileExist visited $host $path]} {
- set outf [RobotFileOpen unvisited $host $path]
- RobotWriteRecord $outf $href 0
- RobotFileClose $outf
+while {$i < $l} {
+ set arg [lindex $argv $i]
+ switch -glob -- $arg {
+ -j* {
+ set robotsMax [string range $arg 2 end]
+ if {![string length $robotsMax]} {
+ set robotsMax [lindex $argv [incr i]]
+ }
+ }
+ -c* {
+ set maxDistance [string range $arg 2 end]
+ if {![string length $maxDistance]} {
+ set maxDistance [lindex $argv [incr i]]
+ }
+ }
+ -d* {
+ set dom [string range $arg 2 end]
+ if {![string length $dom]} {
+ set dom [lindex $argv [incr i]]
+ }
+ lappend domains $dom
+ }
+ default {
+ set href $arg
+ if {[RobotHref http://www.indexdata.dk/ href host path]} {
+ if {![RobotFileExist visited $host $path]} {
+ set outf [RobotFileOpen unvisited $host $path]
+ RobotWriteRecord $outf href 0
+ RobotFileClose $outf
+ }
+ }
}
}
+ incr i
}
+if {![info exist domains]} {
+ set domains {*}
+}
+if {![info exist maxDistance]} {
+ set maxDistance 3
+}
+if {![info exist robotsMax]} {
+ set robotsMax 5
+}
+
+puts "domains=$domains"
+puts "max distance=$maxDistance"
+puts "max jobs=$robotsMax"
+
RobotStart
while {$robotsRunning} {