1 #! /bin/sh 2 ## Hit the major search engines. Hose the [large] output to a file! 3 ## autoconverts multiple arguments into the right format for given servers -- 4 ## usually worda+wordb, with certain lame exceptions like dejanews. 5 ## Extracting and post-sorting the URLs is highly recommended... 6 ## 7 ## Altavista currently handled by a separate script; may merge at some point. 8 ## 9 ## _H* original 950824, updated 951218 and 960209 10 11 test "${1}" = "" && echo 'Needs argument[s] to search for!' && exit 1 12 PLUSARG="`echo $* | sed 's/ /+/g'`" 13 PIPEARG="`echo ${PLUSARG} | sed 's/+/|/g'`" 14 IFILE=/tmp/.webq.$$ 15 16 # Don't have "nc"? Get "netcat" from avian.org and add it to your toolkit. 17 doquery () { 18 echo GET "$1" | nc -v -i 1 -w 30 "$2" "$3" 19 } 20 21 # changed since original: now supplying port numbers and separator lines... 22 23 echo "=== Yahoo ===" 24 doquery "/bin/search?p=${PLUSARG}&n=300&w=w&s=a" search.yahoo.com 80 25 26 echo '' ; echo "=== Webcrawler ===" 27 doquery "/cgi-bin/WebQuery?searchText=${PLUSARG}&maxHits=300" webcrawler.com 80 28 29 # the infoseek lamers want "registration" before they do a real search, but... 30 echo '' ; echo "=== Infoseek ===" 31 echo " is broken." 32 # doquery "WW/IS/Titles?qt=${PLUSARG}" www2.infoseek.com 80 33 # ... which doesn't work cuz their lame server wants the extra newlines, WITH 34 # CRLF pairs ferkrissake. Fuck 'em for now, they're hopelessly broken. If 35 # you want to play, the basic idea and query formats follow. 36 # echo "GET /WW/IS/Titles?qt=${PLUSARG}" > $IFILE 37 # echo "" >> $IFILE 38 # nc -v -w 30 guide-p.infoseek.com 80 < $IFILE 39 40 # this is kinda flakey; might have to do twice?? 41 echo '' ; echo "=== Opentext ===" 42 doquery "/omw/simplesearch?SearchFor=${PLUSARG}&mode=phrase" \ 43 search.opentext.com 80 44 45 # looks like inktomi will only take hits=100, or defaults back to 30 46 # we try to suppress all the stupid rating dots here, too 47 echo '' ; echo "=== Inktomi ===" 48 doquery "/query/?query=${PLUSARG}&hits=100" ink3.cs.berkeley.edu 1234 | \ 49 sed '/^<IMG ALT.*inktomi.*\.gif">$/d' 50 51 #djnews lame shit limits hits to 120 and has nonstandard format 52 echo '' ; echo "=== Dejanews ===" 53 doquery "/cgi-bin/nph-dnquery?query=${PIPEARG}+maxhits=110+format=terse+defaultOp=AND" \ 54 smithers.dejanews.com 80 55 56 # OLD lycos: used to work until they fucking BROKE it... 57 # doquery "/cgi-bin/pursuit?query=${PLUSARG}&maxhits=300&terse=1" \ 58 # query5.lycos.cs.cmu.edu 80 59 # NEW lycos: wants the User-agent field present in query or it returns nothing 60 # 960206: webmaster@lycos duly bitched at 61 # 960208: reply received; here's how we will now handle it: 62 echo \ 63 "GET /cgi-bin/pursuit?query=${PLUSARG}&maxhits=300&terse=terse&matchmode=and&minscore=.5 HTTP/1.x" \ 64 > $IFILE 65 echo "User-agent: *FUCK OFF*" >> $IFILE 66 echo "Why: go ask todd (at] pointcom.com (Todd Whitney)" >> $IFILE 67 echo '' >> $IFILE 68 echo '' ; echo "=== Lycos ===" 69 nc -v -i 1 -w 30 twelve.srv.lycos.com 80 < $IFILE 70 71 rm -f $IFILE 72 exit 0 73 74 # CURRENTLY BROKEN [?] 75 # infoseek 76 77 # some args need to be redone to ensure whatever "and" mode applies 78