#!/usr/bin/tclsh
-# $Id: dcdot.tcl,v 1.4 2000/12/11 17:11:03 adam Exp $
+# $Id: dcdot.tcl,v 1.5 2003/01/13 13:59:07 adam Exp $
#
proc RobotRestart {} {
proc RobotTextHtml {url} {
global URL
-
+
set b $URL($url,buf)
- set e {<[mM][eE][tT][aA][^>]*>}
+ set e {<meta[^>]*>}
catch {unset $URL($url,meta)}
- while {[regexp -indices $e $b i]} {
+ while {[regexp -nocase -indices $e $b i]} {
set meta [string range $b [lindex $i 0] [lindex $i 1]]
lappend URL($url,meta) $meta
set b [string range $b [lindex $i 1] end]
}
+ set b $URL($url,buf)
+ set e {<title>[^>]*>}
+ catch {unset $URL($url,meta)}
+ while {[regexp -nocase -indices $e $b i]} {
+ set title [string range $b [lindex $i 0] [lindex $i 1]]
+ lappend URL($url,title) $title
+ set b [string range $b [lindex $i 1] end]
+ }
}
proc Robot200 {url} {
regexp {^HTTP/([0-9.]+)[ ]+([0-9]+)} $headbuf x version code
set lines [split $headbuf \n]
foreach line $lines {
- if {[regexp {^([^:]+):[ ]+(.*)} $line x name value]} {
+ if {[regexp {^([^:]+):[ ]+([^;]*)} $line x name value]} {
set URL($url,head,[string tolower $name]) [string trim $value]
}
}
set URL($url,state) skip
+ puts "code=$code"
switch $code {
200 {
if {![info exists URL($url,head,content-type)]} {
puts $m
}
}
+ if {[info exist URL($url,title)]} {
+ foreach m $URL($url,title) {
+ puts $m
+ }
+ }
foreach v [array names URL $url,head,*] {
puts "$v = $URL($v)"
}