Home | History | Annotate | Download | only in scripts
      1 #! /bin/sh
      2 ## special handler for altavista, since they only hand out chunks of 10 at
      3 ## a time.  Tries to isolate out results without the leading/trailing trash.
      4 ## multiword arguments are foo+bar, as usual.
      5 ## Second optional arg switches the "what" field, to e.g. "news"
      6 
      7 test "${1}" = "" && echo 'Needs an argument to search for!' && exit 1
      8 WHAT="web"
      9 test "${2}" && WHAT="${2}"
     10 
     11 # convert multiple args
     12 PLUSARG="`echo $* | sed 's/ /+/g'`"
     13 
     14 # Plug in arg.  only doing simple-q for now; pg=aq for advanced-query
     15 # embedded quotes define phrases; otherwise it goes wild on multi-words
     16 QB="GET /cgi-bin/query?pg=q&what=${WHAT}&fmt=c&q=\"${PLUSARG}\""
     17 
     18 # ping 'em once, to get the routing warm
     19 nc -z -w 8 www.altavista.digital.com 24015 2> /dev/null
     20 echo "=== Altavista ==="
     21 
     22 for xx in 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 \
     23   190 200 210 220 230 240 250 260 270 280 290 300 310 320 330 340 350 ; do
     24   echo "${QB}&stq=${xx}" | nc -w 15 www.altavista.digital.com 80 | \
     25   egrep '^<a href="http://'
     26 done
     27 
     28 exit 0
     29 
     30 # old filter stuff
     31   sed -e '/Documents .* matching .* query /,/query?.*stq=.* Document/p' \
     32   -e d
     33 
     34