Home | History | Annotate | Download | only in tagsoup
      1 // This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
      2 //
      3 // TagSoup is licensed under the Apache License,
      4 // Version 2.0.  You may obtain a copy of this license at
      5 // http://www.apache.org/licenses/LICENSE-2.0 .  You may also have
      6 // additional legal rights not granted by this license.
      7 //
      8 // TagSoup is distributed in the hope that it will be useful, but
      9 // unless required by applicable law or agreed to in writing, TagSoup
     10 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
     11 // OF ANY KIND, either express or implied; not even the implied warranty
     12 // of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
     13 //
     14 //
     15 // This file is part of TagSoup.
     16 //
     17 // This program is free software; you can redistribute it and/or modify
     18 // it under the terms of the GNU General Public License as published by
     19 // the Free Software Foundation; either version 2 of the License, or
     20 // (at your option) any later version.  You may also distribute
     21 // and/or modify it under version 2.1 of the Academic Free License.
     22 //
     23 // This program is distributed in the hope that it will be useful,
     24 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     25 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
     26 //
     27 //
     28 // PYX Scanner
     29 
     30 package org.ccil.cowan.tagsoup;
     31 import java.io.*;
     32 import org.xml.sax.SAXException;
     33 
     34 /**
     35 A Scanner that accepts PYX format instead of HTML.
     36 Useful primarily for debugging.
     37 **/
     38 public class PYXScanner implements Scanner {
     39 
     40         public void resetDocumentLocator(String publicid, String systemid) {
     41 	// Need this method for interface compatibility, but note
     42 	// that PyxScanner does not implement Locator.
     43         }
     44 
     45 	public void scan(Reader r, ScanHandler h) throws IOException, SAXException {
     46 		BufferedReader br = new BufferedReader(r);
     47 		String s;
     48 		char[] buff = null;
     49 		boolean instag = false;
     50 		while ((s = br.readLine()) != null) {
     51 			int size = s.length();
     52 			if (buff == null || buff.length < size) {
     53 				buff = new char[size];
     54 				}
     55 			s.getChars(0, size, buff, 0);
     56 			switch (buff[0]) {
     57 			case '(':
     58 				if (instag) {
     59 					h.stagc(buff, 0, 0);
     60 					instag = false;
     61 					}
     62 				h.gi(buff, 1, size - 1);
     63 				instag = true;
     64 				break;
     65 			case ')':
     66 				if (instag) {
     67 					h.stagc(buff, 0, 0);
     68 					instag = false;
     69 					}
     70 				h.etag(buff, 1, size - 1);
     71 				break;
     72 			case '?':
     73 				if (instag) {
     74 					h.stagc(buff, 0, 0);
     75 					instag = false;
     76 					}
     77 				h.pi(buff, 1, size - 1);
     78 				break;
     79 			case 'A':
     80 				int sp = s.indexOf(' ');
     81 				h.aname(buff, 1, sp - 1);
     82 				h.aval(buff, sp + 1, size - sp - 1);
     83 				break;
     84 			case '-':
     85 				if (instag) {
     86 					h.stagc(buff, 0, 0);
     87 					instag = false;
     88 					}
     89 				if (s.equals("-\\n")) {
     90 					buff[0] = '\n';
     91 					h.pcdata(buff, 0, 1);
     92 					}
     93 				else {
     94 					// FIXME:
     95 					// Does not decode \t and \\ in input
     96 					h.pcdata(buff, 1, size - 1);
     97 					}
     98 				break;
     99 			case 'E':
    100 				if (instag) {
    101 					h.stagc(buff, 0, 0);
    102 					instag = false;
    103 					}
    104 				h.entity(buff, 1, size - 1);
    105 				break;
    106 			default:
    107 //				System.err.print("Gotcha ");
    108 //				System.err.print(s);
    109 //				System.err.print('\n');
    110 				break;
    111 				}
    112 			}
    113 		h.eof(buff, 0, 0);
    114 		}
    115 
    116 	public void startCDATA() { }
    117 
    118 	public static void main(String[] argv) throws IOException, SAXException {
    119 		Scanner s = new PYXScanner();
    120 		Reader r = new InputStreamReader(System.in, "UTF-8");
    121 		Writer w = new BufferedWriter(new OutputStreamWriter(System.out, "UTF-8"));
    122 		s.scan(r, new PYXWriter(w));
    123 		}
    124 	}
    125