Home | History | Annotate | Download | only in scripts
      1 #! /bin/sh
      2 ## Hit the major search engines.  Hose the [large] output to a file!
      3 ## autoconverts multiple arguments into the right format for given servers --
      4 ## usually worda+wordb, with certain lame exceptions like dejanews.
      5 ## Extracting and post-sorting the URLs is highly recommended...
      6 ##
      7 ## Altavista currently handled by a separate script; may merge at some point.
      8 ##
      9 ## _H* original 950824, updated 951218 and 960209
     10 
     11 test "${1}" = "" && echo 'Needs argument[s] to search for!' && exit 1
     12 PLUSARG="`echo $* | sed 's/ /+/g'`"
     13 PIPEARG="`echo ${PLUSARG} | sed 's/+/|/g'`"
     14 IFILE=/tmp/.webq.$$
     15 
     16 # Don't have "nc"?  Get "netcat" from avian.org and add it to your toolkit.
     17 doquery () {
     18   echo GET "$1" | nc -v -i 1 -w 30 "$2" "$3"
     19 }
     20 
     21 # changed since original: now supplying port numbers and separator lines...
     22 
     23 echo "=== Yahoo ==="
     24 doquery "/bin/search?p=${PLUSARG}&n=300&w=w&s=a" search.yahoo.com 80
     25 
     26 echo '' ; echo "=== Webcrawler ==="
     27 doquery "/cgi-bin/WebQuery?searchText=${PLUSARG}&maxHits=300" webcrawler.com 80
     28 
     29 # the infoseek lamers want "registration" before they do a real search, but...
     30 echo '' ; echo "=== Infoseek ==="
     31 echo "  is broken."
     32 # doquery "WW/IS/Titles?qt=${PLUSARG}" www2.infoseek.com 80
     33 # ... which doesn't work cuz their lame server wants the extra newlines, WITH
     34 # CRLF pairs ferkrissake.  Fuck 'em for now, they're hopelessly broken.  If
     35 # you want to play, the basic idea and query formats follow.
     36 # echo "GET /WW/IS/Titles?qt=${PLUSARG}" > $IFILE
     37 # echo "" >> $IFILE
     38 # nc -v -w 30 guide-p.infoseek.com 80 < $IFILE
     39 
     40 # this is kinda flakey; might have to do twice??
     41 echo '' ; echo "=== Opentext ==="
     42 doquery "/omw/simplesearch?SearchFor=${PLUSARG}&mode=phrase" \
     43   search.opentext.com 80
     44 
     45 # looks like inktomi will only take hits=100, or defaults back to 30
     46 # we try to suppress all the stupid rating dots here, too
     47 echo '' ; echo "=== Inktomi ==="
     48 doquery "/query/?query=${PLUSARG}&hits=100" ink3.cs.berkeley.edu 1234 | \
     49   sed '/^<IMG ALT.*inktomi.*\.gif">$/d'
     50 
     51 #djnews lame shit limits hits to 120 and has nonstandard format
     52 echo '' ; echo "=== Dejanews ==="
     53 doquery "/cgi-bin/nph-dnquery?query=${PIPEARG}+maxhits=110+format=terse+defaultOp=AND" \
     54   smithers.dejanews.com 80
     55 
     56 # OLD lycos: used to work until they fucking BROKE it...
     57 # doquery "/cgi-bin/pursuit?query=${PLUSARG}&maxhits=300&terse=1" \
     58 #   query5.lycos.cs.cmu.edu 80
     59 # NEW lycos: wants the User-agent field present in query or it returns nothing
     60 # 960206: webmaster@lycos duly bitched at
     61 # 960208: reply received; here's how we will now handle it:
     62 echo \
     63 "GET /cgi-bin/pursuit?query=${PLUSARG}&maxhits=300&terse=terse&matchmode=and&minscore=.5 HTTP/1.x" \
     64   > $IFILE
     65 echo "User-agent: *FUCK OFF*" >> $IFILE
     66 echo "Why: go ask todd (at] pointcom.com (Todd Whitney)" >> $IFILE
     67 echo '' >> $IFILE
     68 echo '' ; echo "=== Lycos ==="
     69 nc -v -i 1 -w 30 twelve.srv.lycos.com 80 < $IFILE
     70 
     71 rm -f $IFILE
     72 exit 0
     73 
     74 # CURRENTLY BROKEN [?]
     75 # infoseek
     76 
     77 # some args need to be redone to ensure whatever "and" mode applies
     78