1 #! /bin/sh 2 ## The web sucks. It is a mighty dismal kludge built out of a thousand 3 ## tiny dismal kludges all band-aided together, and now these bottom-line 4 ## clueless pinheads who never heard of "TCP handshake" want to run 5 ## *commerce* over the damn thing. Ye godz. Welcome to TV of the next 6 ## century -- six million channels of worthless shit to choose from, and 7 ## about as much security as today's cable industry! 8 ## 9 ## Having grown mightily tired of pain in the ass browsers, I decided 10 ## to build the minimalist client. It doesn't handle POST, just GETs, but 11 ## the majority of cgi forms handlers apparently ignore the method anyway. 12 ## A distinct advantage is that it *doesn't* pass on any other information 13 ## to the server, like Referer: or info about your local machine such as 14 ## Netscum tries to! 15 ## 16 ## Since the first version, this has become the *almost*-minimalist client, 17 ## but it saves a lot of typing now. And with netcat as its backend, it's 18 ## totally the balls. Don't have netcat? Get it here in /src/hacks! 19 ## _H* 950824, updated 951009 et seq. 20 ## 21 ## args: hostname [port]. You feed it the filename-parts of URLs. 22 ## In the loop, HOST, PORT, and SAVE do the right things; a null line 23 ## gets the previous spec again [useful for initial timeouts]; EOF to exit. 24 ## Relative URLs behave like a "cd" to wherever the last slash appears, or 25 ## just use the last component with the saved preceding "directory" part. 26 ## "\" clears the "filename" part and asks for just the "directory", and 27 ## ".." goes up one "directory" level while retaining the "filename" part. 28 ## Play around; you'll get used to it. 29 30 if test "$1" = "" ; then 31 echo Needs hostname arg. 32 exit 1 33 fi 34 umask 022 35 36 # optional PATH fixup 37 # PATH=${HOME}:${PATH} ; export PATH 38 39 test "${PAGER}" || PAGER=more 40 BACKEND="nc -v -w 15" 41 TMPAGE=/tmp/web$$ 42 host="$1" 43 port="80" 44 if test "$2" != "" ; then 45 port="$2" 46 fi 47 48 spec="/" 49 specD="/" 50 specF='' 51 saving='' 52 53 # be vaguely smart about temp file usage. Use your own homedir if you're 54 # paranoid about someone symlink-racing your shell script, jeez. 55 rm -f ${TMPAGE} 56 test -f ${TMPAGE} && echo "Can't use ${TMPAGE}" && exit 1 57 58 # get loopy. Yes, I know "echo -n" aint portable. Everything echoed would 59 # need "\c" tacked onto the end in an SV universe, which you can fix yourself. 60 while echo -n "${specD}${specF} " && read spec ; do 61 case $spec in 62 HOST) 63 echo -n 'New host: ' 64 read host 65 continue 66 ;; 67 PORT) 68 echo -n 'New port: ' 69 read port 70 continue 71 ;; 72 SAVE) 73 echo -n 'Save file: ' 74 read saving 75 # if we've already got a page, save it 76 test "${saving}" && test -f ${TMPAGE} && 77 echo "=== ${host}:${specD}${specF} ===" >> $saving && 78 cat ${TMPAGE} >> $saving && echo '' >> $saving 79 continue 80 ;; 81 # changing the logic a bit here. Keep a state-concept of "current dir" 82 # and "current file". Dir is /foo/bar/ ; file is "baz" or null. 83 # leading slash: create whole new state. 84 /*) 85 specF=`echo "${spec}" | sed 's|.*/||'` 86 specD=`echo "${spec}" | sed 's|\(.*/\).*|\1|'` 87 spec="${specD}${specF}" 88 ;; 89 # embedded slash: adding to the path. "file" part can be blank, too 90 */*) 91 specF=`echo "${spec}" | sed 's|.*/||'` 92 specD=`echo "${specD}${spec}" | sed 's|\(.*/\).*|\1|'` 93 ;; 94 # dotdot: jump "up" one level and just reprompt [confirms what it did...] 95 ..) 96 specD=`echo "${specD}" | sed 's|\(.*/\)..*/|\1|'` 97 continue 98 ;; 99 # blank line: do nothing, which will re-get the current one 100 '') 101 ;; 102 # hack-quoted blank line: "\" means just zero out "file" part 103 '\') 104 specF='' 105 ;; 106 # sigh 107 '?') 108 echo Help yourself. Read the script fer krissake. 109 continue 110 ;; 111 # anything else is taken as a "file" part 112 *) 113 specF=${spec} 114 ;; 115 esac 116 117 # now put it together and stuff it down a connection. Some lame non-unix 118 # http servers assume they'll never get simple-query format, and wait till 119 # an extra newline arrives. If you're up against one of these, change 120 # below to (echo GET "$spec" ; echo '') | $BACKEND ... 121 spec="${specD}${specF}" 122 echo GET "${spec}" | $BACKEND $host $port > ${TMPAGE} 123 ${PAGER} ${TMPAGE} 124 125 # save in a format that still shows the URLs we hit after a de-html run 126 if test "${saving}" ; then 127 echo "=== ${host}:${spec} ===" >> $saving 128 cat ${TMPAGE} >> $saving 129 echo '' >> $saving 130 fi 131 done 132 rm -f ${TMPAGE} 133 exit 0 134 135 ####### 136 # Encoding notes, finally from RFC 1738: 137 # %XX -- hex-encode of special chars 138 # allowed alphas in a URL: $_-.+!*'(), 139 # relative names *not* described, but obviously used all over the place 140 # transport://user:pass@host:port/path/name?query-string 141 # wais: port 210, //host:port/database?search or /database/type/file? 142 # cgi-bin/script?arg1=foo&arg2=bar&... scripts have to parse xxx&yyy&zzz 143 # ISMAP imagemap stuff: /bin/foobar.map?xxx,yyy -- have to guess at coords! 144 # local access-ctl files: ncsa: .htaccess ; cern: .www_acl 145 ####### 146 # SEARCH ENGINES: fortunately, all are GET forms or at least work that way... 147 # multi-word args for most cases: foo+bar 148 # See 'websearch' for concise results of this research... 149