#!/usr/bin/tclsh
-# $Id: robot.tcl,v 1.44 2003/06/11 10:11:39 adam Exp $
+# $Id: robot.tcl,v 1.45 2003/06/11 10:29:41 adam Exp $
#
proc RobotFileNext1 {area lead} {
# puts "RobotFileNext1 area=$area lead=$lead"
}
}
+proc wellform {body} {
+ regsub -all {<!--[^-]*-->} $body { } abody
+ regsub -all -nocase {<script[^<]*</script>} $abody {} body
+ regsub -all {<[^\>]+>} $body {} abody
+ regsub -all { } $abody { } body
+ regsub -all {&} $body {&} abody
+ return $abody
+}
+
proc link {task url out href body distance} {
global URL control
if {[expr $distance > $control($task,distance)]} return
puts $out "<cr>"
puts $out "<identifier>$href</identifier>"
- puts $out "<description>$body</description>"
+ set abody [wellform $body]
+ puts $out "<description>$abody</description>"
puts $out "</cr>"
if {![RobotFileExist $task visited $host $path]} {
# don't print title of document content if noindex is used
if {!$noindex} {
puts $out "<title>$title</title>"
- regsub -all {<!--[^-]*-->} $body { } abody
- regsub -all -nocase {<script[^<]*</script>} $abody {} bbody
- regsub -all {<[^\>]+>} $bbody {} nbody
+ set bbody [wellform $body]
puts $out "<documentcontent>"
- puts $out $nbody
+ puts $out $bbody
puts $out "</documentcontent>"
}
} -nonest base {