#!/usr/bin/tclsh
-# $Id: robot.tcl,v 1.18 2001/06/07 08:17:00 adam Exp $
+# $Id: robot.tcl,v 1.19 2001/06/29 21:47:31 adam Exp $
#
proc RobotFileNext1 {area lead} {
# puts "RobotFileNext1 area=$area lead=$lead"
if {[string length $href] > 256} {
return 0
}
+ if {[string first {?} $href] >= 0} {
+ return 0
+ }
if {[string first {?} $url] >= 0 && [string first {?} $href] >= 0} {
return 0
}
}
proc RobotConnect {url sock} {
- global URL agent
+ global URL agent acceptLanguage
fconfigure $sock -translation {lf crlf} -blocking 0
fileevent $sock readable [list RobotReadHeader $url $sock]
puts $sock "GET $URL($url,path) HTTP/1.0"
puts $sock "Host: $URL($url,host)"
puts $sock "User-Agent: $agent"
+ if {[string length $acceptLanguage]} {
+ puts $sock "Accept-Language: $acceptLanguage"
+ }
puts $sock ""
flush $sock
set URL($sock,cancel) [after 30000 [list RobotSockCancel $url $sock]]
set robotSeq 0
set workdir [pwd]
set idleTime 60000
+set acceptLanguage {}
set i 0
set l [llength $argv]
set idleTime [lindex $argv [incr i]]
}
}
+ -l* {
+ set acceptLanguage [string range $arg 2 end]
+ if {![string length $acceptLanguage]} {
+ set acceptLanguage [lindex $argv [incr i]]
+ }
+ }
default {
set href $arg
if {[RobotHref http://www.indexdata.dk/ href host path]} {