#!/usr/bin/tclsh
-# $Id: robot.tcl,v 1.6 2000/12/07 20:16:11 adam Exp $
+# $Id: robot.tcl,v 1.7 2000/12/08 22:46:53 adam Exp $
#
-proc RobotFileNext {area} {
+proc RobotFileNext1 {area} {
if {[catch {set ns [glob ${area}/*]}]} {
return {}
}
}
foreach n $ns {
if {[file isdirectory $n]} {
- set sb [RobotFileNext $n]
+ set sb [RobotFileNext1 $n]
if {[string length $sb]} {
return $sb
}
return {}
}
+proc RobotFileWait {} {
+ global robotSeq
+ set robotSeq 0
+}
+
+proc RobotFileNext {area} {
+ global robotSeq
+ if {[catch {set ns [glob ${area}/*]}]} {
+ return {}
+ }
+ set off [string length $area]
+ incr off
+
+ set n [lindex $ns $robotSeq]
+ if {![string length $n]} {
+ puts "------------ N E X T R O U N D --------"
+ set robotSeq -1
+ after 2000 RobotFileWait
+ vwait robotSeq
+
+ set n [lindex $ns $robotSeq]
+ if {![string length $n]} {
+ return {}
+ }
+ }
+ incr robotSeq
+ if {[file isfile $n/robots.txt]} {
+ puts "ok returning http://[string range $n $off end]/robots.txt"
+ return http://[string range $n $off end]/robots.txt
+ } elseif {[file isdirectory $n]} {
+ set sb [RobotFileNext1 $n]
+ if {[string length $sb]} {
+ return $sb
+ }
+ }
+ return {}
+}
+
+
proc RobotFileExist {area host path} {
set comp [split $area/$host$path /]
set l [llength $comp]
if {[catch {cd ./$d}]} {
exec mkdir $d
cd ./$d
+ if {![string compare $area unvisited] && $i == 1 && $mode == "w"} {
+ set out [open robots.txt w]
+ puts "creating robots.txt in $d"
+ close $out
+ }
}
}
set d [lindex $comp $len]
puts "BGERROR $m"
}
-if {0} {
- proc RobotRestart {} {
- global robotMoreWork
- set robotMoreWork 0
- puts "myrestart"
- }
- set robotMoreWork 1
- set url {http://www.indexdata.dk/zap/}
- RobotGetUrl $url {}
- while {$robotMoreWork} {
- vwait robotMoreWork
- }
- puts "-----------"
- puts $URL($url,buf)
- puts "-----------"
- exit 1
-}
-
set robotMoreWork 0
+set robotSeq 0
set workdir [pwd]
if {[llength $argv] < 2} {
if [RobotGetUrl $site {}] {
set robotMoreWork 0
puts "Couldn't process $site"
- } else {
- #set x [RobotFileOpen unvisited $site /robots.txt]
- #RobotFileClose $x
}
}