Writing Intelligent Web Agents

Browser Creator Predicts Its Demise. Back in the mid-1990s I attended a panel discussion where Marc Andreessen, the cofounder and then chief technology officer of Netscape, was asked for his predictions of where the World Wide Web was headed. He predicted, to a surprised crowd, that the days of the browser, as the primary means of viewing the Internet, were numbered.


January 01, 2002
URL:http://www.drdobbs.com/writing-intelligent-web-agents/184413817

Web Techniques: Figure 1

Figure 1


Example of a simple Web agent.

Figure 1


#   getFromWeb
#   This Tcl&Tk procedure performs a HTTP 
#   GET from the indicated web server and 
#   stores the file in the specified file.

proc getFromWeb { url file } {

    # LOAD HTTP PACKAGE
    package require http

    # IDENTIFY OUR WEB CLIENT
    ::http::config -useragent 
      "ultra thin Tcl&Tk web agent"

    # OPEN OUTPUT FILE    
    set outFile [open $file w]

    # CREATE CONNECTION TO URL, DOWNLOAD,
    # AND STORE TO FILE CHANNEL
    set token [::http::geturl $url 
               -channel $outFile]
    close $outFile

    # RETURN TOKEN    
    return $token
}

Web Techniques: Figure 2

Figure 2


An Internet weather report.

Figure 2


#   properties.tcl
#   Returns properties of web servers 
#   and web pages

source getFromWeb.tcl
source fileLib.tcl
source parse.tcl

# CREATE WIDGETS
label     .lURL -text "URL:"
entry     .eURL -width 60
label     .lProperties -text "Properties"
text      .tProperties -width 60 -height 10 
           -yscroll ".sProperties set"
scrollbar .sProperties -command 
  ".tProperties yview"
button    .bStart -text "Get Properties" 
           -command getProperties

# PACK AND DISPLAY WIDGETS
pack .lURL -anchor w
pack .eURL -anchor w
pack .bStart -anchor w
pack .lProperties -anchor w
pack .tProperties -side left
pack .sProperties -side right -fill y

proc getProperties { } {
  set webPage [.eURL get]
  set token [getFromWeb $webPage download.temp]

  .tProperties insert end 
    "*** Properties for:$webPage ***\n"

  # DISPLAY CONTENT DATA 
  # RETURNED FROM PAGE ANALYSTS
  set GIFqty  [parse download.temp 
               <img 40 .gif temp1]
  set JPGqty  [parse download.temp 
               <img 40 .jpg temp2]
  set LINKqty [parse download.temp 
               <A 140 /A> temp3]

  # PARSE AND STRIP TAGS FROM DOCUMENT TITLE
  parse download.temp <TITLE> 80 </TITLE> temp4
  set Title [readEntireFile temp4]
  regsub -nocase "<TITLE>" $Title {} Title
  regsub -nocase  "</TITLE>" $Title {} Title 

  # DISPLAY TITLE, IMAGE, AND LINK METRICS
  .tProperties insert end "Title: \t $Title \n"
  .tProperties insert end  "Images \t 
     gif:$GIFqty \t jpg:$JPGqty \n"
  .tProperties insert end  "Links: \t 
     $LINKqty \n"

  # GET AND DISPLAY META DATA 
  # RETURNED FROM SERVER
  upvar $token state
  regsub -all {\}} $state(meta) 
    "\}\n" state(meta)
  .tProperties insert end "$state(meta) \n"
}
Web Techniques: Figure 3

Figure 3


Watching an online auction.

Figure 3


#  parse.tcl parse.tcl
#  Searches source file for matches to the parse criteria.
#  Matches are stored in a destination file.
#  The number of matches is returned to calling program

proc parse { source bCrit gap eCrit dest } {

  # CONVERT PARSE CRITERIA TO LOWER CASE
  set bCrit [string tolower $bCrit]
  set eCrit [string tolower $eCrit]

  # CREATE A LOWER CASE COPY OF THE PARSE SOURCE
  set rawHTML [ readEntireFile $source ]
  set lowHTML [ string tolower $rawHTML ]

  # OPEN DESTINATION FILE
  set destFile [open $dest "w"]

  # DETERMINE THE LENGTHS OF THE CRITERIA
  set bCritLen [string length $bCrit]
  set eCritLen [string length $eCrit]
                
  # PARSE LOOP 
  set count 0
  while { 1 } {
    # LOOK FOR FIRST OCCURRENCES OF PARSE CRITERIA
    set bCritLoc [string first $bCrit $lowHTML]
    set eCritLoc [string first $eCrit $lowHTML]

    # BREAK IF EITHER PARSE CRITERIA ARE NOT FOUND
    if {$bCritLoc < 0 || $eCritLoc < 0} { break }
        
    # IF ALL CRITERIA SATISFIED, CLEAN-UP AND SAVE
    set actualGap [expr $eCritLoc-$bCritLoc-$bCritLen]
    if { $actualGap <= $gap } {
      set foundData [ string range $rawHTML $bCritLoc \
               [expr $eCritLoc+$eCritLen-1]]
      if { [string length $foundData] > 0} {
        regsub -all "\n" $foundData "" data
        puts $destFile $data
        incr count
      }
      set temp [string range $rawHTML [expr $eCritLoc+$eCritLen] end]
      set rawHTML $temp
      set temp [string range $lowHTML [expr $eCritLoc+$eCritLen] end]
      set lowHTML $temp
    } 
  }
  close $destFile
  return $count
}


Web Techniques: Figure 4

Figure 4


Detecting a new online bid.

Figure 4


#  deleteTags:Tcl
#   Parses tags from an HTML file 
#   Requires: fileLib.tcl

proc deleteTags { inFile outFile} {

  set data [readEntireFile $inFile]
  regsub -all "\n" $data {} data
  regsub -all {\[} $data {} data
  regsub -all {\]} $data {} data
  writefile $outFile $data
  set tagCount [parse $outFile < 250 > tagFile]

  set fileID [open tagFile "r"]
  for {set xx 0} {$xx < $tagCount} {incr xx} {
    gets $fileID parsedTag
    puts $parsedTag
    puts [regsub $parsedTag $data \n data]
  }
  close $fileID
  writefile $outFile $data
}

# fileLib.tcl
#   File I/O routines

proc writefile { filename data } {
        set filename $filename
        set fileID [open $filename "w"]
        puts $fileID $data
        close $fileID
}

proc readfile { filename } {
        set fileID [open $filename "r"]
        gets $fileID data
        close $fileID
        return $data
}

proc readEntireFile {filename } {
        set fileID [open $filename "r"]
        set data [read $fileID] 
        close $fileID
        return $data
}

#   weather.tcl
#   Reads weather map from National Weather Service 

source getFromWeb.tcl

# GET IMAGE FILE FROM NATIONAL WEATHER SERVICE
set URL www.wrh.noaa.gov//TotalForecast//images//Monterey.gif
getFromWeb $URL downLoaded.gif

# CREATE GRAPHICAL USER INTERFACE
image create photo weatherMap -file downLoaded.gif
label .lImage -image weatherMap
button .bExit -text Exit -command exit
button .bUpdate -text Update -command loadImage
pack .lImage 
pack .bUpdate .bExit -side left

# ROUTINE USED BY "UPDATE" BUTTON
proc loadImage { } {
  global URL

  # GET FRESH IMAGE
  getFromWeb $URL downLoaded.gif

  # DISPLAY IMAGE
  .lImage config -image weatherMap
}

Terms of Service | Privacy Statement | Copyright © 2024 UBM Tech, All rights reserved.