Home | History | Annotate | Download | only in scripts
      1 #! /bin/sh
      2 ## The web sucks.  It is a mighty dismal kludge built out of a thousand
      3 ## tiny dismal kludges all band-aided together, and now these bottom-line
      4 ## clueless pinheads who never heard of "TCP handshake" want to run
      5 ## *commerce* over the damn thing.  Ye godz.  Welcome to TV of the next
      6 ## century -- six million channels of worthless shit to choose from, and
      7 ## about as much security as today's cable industry!
      8 ##
      9 ## Having grown mightily tired of pain in the ass browsers, I decided
     10 ## to build the minimalist client.  It doesn't handle POST, just GETs, but
     11 ## the majority of cgi forms handlers apparently ignore the method anyway.
     12 ## A distinct advantage is that it *doesn't* pass on any other information
     13 ## to the server, like Referer: or info about your local machine such as
     14 ## Netscum tries to!
     15 ##
     16 ## Since the first version, this has become the *almost*-minimalist client,
     17 ## but it saves a lot of typing now.  And with netcat as its backend, it's
     18 ## totally the balls.  Don't have netcat?  Get it here in /src/hacks!
     19 ## _H* 950824, updated 951009 et seq.
     20 ##
     21 ## args: hostname [port].  You feed it the filename-parts of URLs.
     22 ## In the loop, HOST, PORT, and SAVE do the right things; a null line
     23 ## gets the previous spec again [useful for initial timeouts]; EOF to exit.
     24 ## Relative URLs behave like a "cd" to wherever the last slash appears, or
     25 ## just use the last component with the saved preceding "directory" part.
     26 ## "\" clears the "filename" part and asks for just the "directory", and
     27 ## ".." goes up one "directory" level while retaining the "filename" part.
     28 ## Play around; you'll get used to it.
     29 
     30 if test "$1" = "" ; then
     31   echo Needs hostname arg.
     32   exit 1
     33 fi
     34 umask 022
     35 
     36 # optional PATH fixup
     37 # PATH=${HOME}:${PATH} ; export PATH
     38 
     39 test "${PAGER}" || PAGER=more
     40 BACKEND="nc -v -w 15"
     41 TMPAGE=/tmp/web$$
     42 host="$1"
     43 port="80"
     44 if test "$2" != "" ; then
     45   port="$2"
     46 fi
     47 
     48 spec="/"
     49 specD="/"
     50 specF=''
     51 saving=''
     52 
     53 # be vaguely smart about temp file usage.  Use your own homedir if you're
     54 # paranoid about someone symlink-racing your shell script, jeez.
     55 rm -f ${TMPAGE}
     56 test -f ${TMPAGE} && echo "Can't use ${TMPAGE}" && exit 1
     57 
     58 # get loopy.  Yes, I know "echo -n" aint portable.  Everything echoed would
     59 # need "\c" tacked onto the end in an SV universe, which you can fix yourself.
     60 while echo -n "${specD}${specF} " && read spec ; do
     61   case $spec in
     62   HOST)
     63     echo -n 'New host: '
     64     read host
     65     continue
     66   ;;
     67   PORT)
     68     echo -n 'New port: '
     69     read port
     70     continue
     71   ;;
     72   SAVE)
     73     echo -n 'Save file: '
     74     read saving
     75 # if we've already got a page, save it
     76     test "${saving}" && test -f ${TMPAGE} &&
     77       echo "=== ${host}:${specD}${specF} ===" >> $saving &&
     78       cat ${TMPAGE} >> $saving && echo '' >> $saving
     79     continue
     80   ;;
     81 # changing the logic a bit here.  Keep a state-concept of "current dir"
     82 # and "current file".  Dir is /foo/bar/ ; file is "baz" or null.
     83 # leading slash: create whole new state.
     84   /*)
     85     specF=`echo "${spec}" | sed 's|.*/||'`
     86     specD=`echo "${spec}" | sed 's|\(.*/\).*|\1|'`
     87     spec="${specD}${specF}"
     88   ;;
     89 # embedded slash: adding to the path.  "file" part can be blank, too
     90   */*)
     91     specF=`echo "${spec}" | sed 's|.*/||'`
     92     specD=`echo "${specD}${spec}" | sed 's|\(.*/\).*|\1|'`
     93   ;;
     94 # dotdot: jump "up" one level and just reprompt [confirms what it did...]
     95   ..)
     96     specD=`echo "${specD}" | sed 's|\(.*/\)..*/|\1|'`
     97     continue
     98   ;;
     99 # blank line: do nothing, which will re-get the current one
    100   '')
    101   ;;
    102 # hack-quoted blank line: "\" means just zero out "file" part
    103   '\')
    104     specF=''
    105   ;;
    106 # sigh
    107   '?')
    108     echo Help yourself.  Read the script fer krissake.
    109     continue
    110   ;;
    111 # anything else is taken as a "file" part
    112   *)
    113     specF=${spec}
    114   ;;
    115   esac
    116 
    117 # now put it together and stuff it down a connection.  Some lame non-unix
    118 # http servers assume they'll never get simple-query format, and wait till
    119 # an extra newline arrives.  If you're up against one of these, change
    120 # below to (echo GET "$spec" ; echo '') | $BACKEND ...
    121   spec="${specD}${specF}"
    122     echo GET "${spec}" | $BACKEND $host $port > ${TMPAGE}
    123   ${PAGER} ${TMPAGE}
    124 
    125 # save in a format that still shows the URLs we hit after a de-html run
    126   if test "${saving}" ; then
    127     echo "=== ${host}:${spec} ===" >> $saving
    128     cat ${TMPAGE} >> $saving
    129     echo '' >> $saving
    130   fi
    131 done
    132 rm -f ${TMPAGE}
    133 exit 0
    134 
    135 #######
    136 # Encoding notes, finally from RFC 1738:
    137 # %XX -- hex-encode of special chars
    138 # allowed alphas in a URL: $_-.+!*'(),
    139 # relative names *not* described, but obviously used all over the place
    140 # transport://user:pass@host:port/path/name?query-string
    141 # wais: port 210, //host:port/database?search or /database/type/file?
    142 # cgi-bin/script?arg1=foo&arg2=bar&...  scripts have to parse xxx&yyy&zzz
    143 # ISMAP imagemap stuff: /bin/foobar.map?xxx,yyy -- have to guess at coords!
    144 # local access-ctl files: ncsa: .htaccess ; cern: .www_acl
    145 #######
    146 # SEARCH ENGINES: fortunately, all are GET forms or at least work that way...
    147 # multi-word args for most cases: foo+bar
    148 # See 'websearch' for concise results of this research...
    149