Home | History | Annotate | Download | only in charsetdet
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  **************************************************************************
      5  * Copyright (C) 2005-2010, International Business Machines Corporation   *
      6  * and others. All Rights Reserved.                                       *
      7  **************************************************************************
      8  *
      9  */
     10 
     11 package com.ibm.icu.dev.demo.charsetdet;
     12 
     13 import java.awt.Font;
     14 import java.awt.event.ActionEvent;
     15 import java.awt.event.ActionListener;
     16 import java.awt.event.KeyEvent;
     17 import java.awt.event.WindowAdapter;
     18 import java.awt.event.WindowEvent;
     19 import java.io.BufferedInputStream;
     20 import java.io.File;
     21 import java.io.FileInputStream;
     22 import java.io.IOException;
     23 import java.io.InputStream;
     24 import java.io.InputStreamReader;
     25 import java.net.URL;
     26 import java.nio.ByteBuffer;
     27 import java.nio.charset.Charset;
     28 import java.security.AccessControlException;
     29 
     30 import javax.swing.JFileChooser;
     31 import javax.swing.JFrame;
     32 import javax.swing.JMenu;
     33 import javax.swing.JMenuBar;
     34 import javax.swing.JMenuItem;
     35 import javax.swing.JOptionPane;
     36 import javax.swing.JScrollPane;
     37 import javax.swing.JTextPane;
     38 import javax.swing.KeyStroke;
     39 
     40 import com.ibm.icu.charset.CharsetICU;
     41 import com.ibm.icu.dev.demo.impl.DemoApplet;
     42 import com.ibm.icu.text.CharsetDetector;
     43 import com.ibm.icu.text.CharsetMatch;
     44 
     45 /**
     46  * This simple application demonstrates how to use the CharsetDetector API. It
     47  * opens a file or web page, detects the encoding, and then displays it using that
     48  * encoding.
     49  */
     50 public class DetectingViewer extends JFrame implements ActionListener
     51 {
     52 
     53     /**
     54      * For serialization
     55      */
     56     private static final long serialVersionUID = -2307065724464747775L;
     57     private JTextPane text;
     58     private JFileChooser fileChooser;
     59 
     60     /**
     61      * @throws java.awt.HeadlessException
     62      */
     63     public DetectingViewer()
     64     {
     65         super();
     66         DemoApplet.demoFrameOpened();
     67 
     68         try {
     69             fileChooser = new JFileChooser();
     70         } catch (AccessControlException ace) {
     71             System.err.println("no file chooser - access control exception. Continuing without file browsing. "+ace.toString());
     72             fileChooser = null; //
     73         }
     74 
     75 //        setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
     76         setSize(800, 800);
     77 
     78         setJMenuBar(makeMenus());
     79         text = new JTextPane();
     80         text.setContentType("text/plain");
     81         text.setText("");
     82         text.setSize(800, 800);
     83 
     84         Font font = new Font("Arial Unicode MS", Font.PLAIN, 24);
     85         text.setFont(font);
     86 
     87         JScrollPane scrollPane = new JScrollPane(text);
     88 
     89         getContentPane().add(scrollPane);
     90         setVisible(true);
     91 
     92         addWindowListener(
     93                 new WindowAdapter() {
     94                     public void windowClosing(WindowEvent e) {
     95 //                        setVisible(false);
     96 //                        dispose();
     97 
     98                           doQuit();
     99                     }
    100                 } );
    101 
    102 
    103     }
    104 
    105     public void actionPerformed(ActionEvent event)
    106     {
    107         String cmd = event.getActionCommand();
    108 
    109         if (cmd.equals("New...")) {
    110            doNew();
    111         } else if (cmd.equals("Open File...")) {
    112            doOpenFile();
    113         } else if (cmd.equals("Open URL...")) {
    114             doOpenURL();
    115         } else if (cmd.equals("Quit")) {
    116            doQuit();
    117         }
    118     }
    119 
    120     public static void main(String[] args)
    121     {
    122         new DetectingViewer();
    123     }
    124 
    125     private void errorDialog(String title, String msg)
    126     {
    127         JOptionPane.showMessageDialog(this, msg, title, JOptionPane.ERROR_MESSAGE);
    128     }
    129 
    130     private BufferedInputStream openFile(File file)
    131     {
    132         FileInputStream fileStream = null;
    133 
    134         try {
    135             fileStream = new FileInputStream(file);
    136         } catch (Exception e) {
    137             errorDialog("Error Opening File", e.getMessage());
    138             return null;
    139         }
    140 
    141         return new BufferedInputStream(fileStream);
    142     }
    143 
    144 //    private void openFile(String directory, String filename)
    145 //    {
    146 //        openFile(new File(directory, filename));
    147 //    }
    148 
    149 
    150     private BufferedInputStream openURL(String url)
    151     {
    152         InputStream s = null;
    153 
    154         try {
    155             URL aURL = new URL(url);
    156             s = aURL.openStream();
    157         } catch (Exception e) {
    158             errorDialog("Error Opening URL", e.getMessage());
    159             return null;
    160         }
    161 
    162         return new BufferedInputStream(s);
    163     }
    164 
    165     private String encodingName(CharsetMatch match)
    166     {
    167         return match.getName() + " (" + match.getLanguage() + ")";
    168     }
    169 
    170     private void setMatchMenu(CharsetMatch[] matches)
    171     {
    172         JMenu menu = getJMenuBar().getMenu(1);
    173         JMenuItem menuItem;
    174 
    175         menu.removeAll();
    176 
    177         for (int i = 0; i < matches.length; i += 1) {
    178             CharsetMatch match = matches[i];
    179 
    180             menuItem = new JMenuItem(encodingName(match) + " " + match.getConfidence());
    181 
    182             menu.add(menuItem);
    183         }
    184     }
    185 
    186     private byte[] scriptTag = {(byte) 's', (byte) 'c', (byte) 'r', (byte) 'i', (byte) 'p', (byte) 't'};
    187     private byte[] styleTag  = {(byte) 's', (byte) 't', (byte) 'y', (byte) 'l', (byte) 'e'};
    188     private static int BUFFER_SIZE = 100000;
    189 
    190     private boolean openTag(byte[] buffer, int offset, int length, byte[] tag)
    191     {
    192         int tagLen = tag.length;
    193         int bufRem = length - offset;
    194         int b;
    195 
    196         for (b = 0; b < tagLen && b < bufRem; b += 1) {
    197             if (buffer[b + offset] != tag[b]) {
    198                 return false;
    199             }
    200         }
    201 
    202         return b == tagLen;
    203     }
    204 
    205     private boolean closedTag(byte[] buffer, int offset, int length, byte[] tag)
    206     {
    207         if (buffer[offset] != (byte) '/') {
    208             return false;
    209         }
    210 
    211         return openTag(buffer, offset + 1, length, tag);
    212     }
    213 
    214     private byte[] filter(InputStream in)
    215     {
    216         byte[] buffer = new byte[BUFFER_SIZE];
    217         int bytesRemaining = BUFFER_SIZE;
    218         int bufLen = 0;
    219 
    220         in.mark(BUFFER_SIZE);
    221 
    222         try {
    223             while (bytesRemaining > 0) {
    224                 int bytesRead = in.read(buffer, bufLen, bytesRemaining);
    225 
    226                 if (bytesRead <= 0) {
    227                     break;
    228                 }
    229 
    230                 bufLen += bytesRead;
    231                 bytesRemaining -= bytesRead;
    232             }
    233         } catch (Exception e) {
    234             // TODO: error handling?
    235             return null;
    236         }
    237 
    238         boolean inTag = false;
    239         boolean skip  = false;
    240         int out = 0;
    241 
    242         for (int i = 0; i < bufLen; i += 1) {
    243             byte b = buffer[i];
    244 
    245             if (b == (byte) '<') {
    246                 inTag = true;
    247 
    248                 if (openTag(buffer, i + 1, bufLen, scriptTag) ||
    249                     openTag(buffer, i + 1, bufLen, styleTag)) {
    250                     skip = true;
    251                 } else if (closedTag(buffer, i + 1, bufLen, scriptTag) ||
    252                            closedTag(buffer, i + 1, bufLen, styleTag)) {
    253                     skip = false;
    254                 }
    255             } else if (b == (byte) '>') {
    256                 inTag = false;
    257             } else if (! (inTag || skip)) {
    258                 buffer[out++] = b;
    259             }
    260         }
    261 
    262         byte[] filtered = new byte[out];
    263 
    264         System.arraycopy(buffer, 0, filtered, 0, out);
    265         return filtered;
    266     }
    267 
    268     private CharsetMatch[] detect(byte[] bytes)
    269     {
    270         CharsetDetector det = new CharsetDetector();
    271 
    272         det.setText(bytes);
    273 
    274         return det.detectAll();
    275     }
    276 
    277     private CharsetMatch[] detect(BufferedInputStream inputStream)
    278     {
    279         CharsetDetector det    = new CharsetDetector();
    280 
    281         try {
    282             det.setText(inputStream);
    283 
    284             return det.detectAll();
    285         } catch (Exception e) {
    286             // TODO: error message?
    287             return null;
    288         }
    289     }
    290 
    291     private void show(InputStream inputStream, CharsetMatch[] matches, String title)
    292     {
    293         InputStreamReader isr;
    294         char[] buffer = new char[1024];
    295         int bytesRead = 0;
    296 
    297         if (matches == null || matches.length == 0) {
    298             errorDialog("Match Error", "No matches!");
    299             return;
    300         }
    301 
    302         try {
    303             StringBuffer sb = new StringBuffer();
    304             String encoding = matches[0].getName();
    305 
    306             inputStream.reset();
    307 
    308             if (encoding.startsWith("UTF-32")) {
    309                 byte[] bytes = new byte[1024];
    310                 int offset = 0;
    311                 int chBytes = 0;
    312                 Charset utf32 = CharsetICU.forNameICU(encoding);
    313 
    314                 while ((bytesRead = inputStream.read(bytes, offset, 1024)) >= 0) {
    315                     offset  = bytesRead % 4;
    316                     chBytes = bytesRead - offset;
    317 
    318                     sb.append(utf32.decode(ByteBuffer.wrap(bytes)).toString());
    319 
    320                     if (offset != 0) {
    321                         for (int i = 0; i < offset; i += 1) {
    322                             bytes[i] = bytes[chBytes + i];
    323                         }
    324                     }
    325                 }
    326             } else {
    327                 isr = new InputStreamReader(inputStream, encoding);
    328 
    329                 while ((bytesRead = isr.read(buffer, 0, 1024)) >= 0) {
    330                     sb.append(buffer, 0, bytesRead);
    331                 }
    332 
    333                 isr.close();
    334             }
    335 
    336             this.setTitle(title + " - " + encodingName(matches[0]));
    337 
    338             setMatchMenu(matches);
    339             text.setText(sb.toString());
    340         } catch (IOException e) {
    341             errorDialog("IO Error", e.getMessage());
    342         } catch (Exception e) {
    343             errorDialog("Internal Error", e.getMessage());
    344         }
    345     }
    346 
    347     private void doNew()
    348     {
    349         // open a new window...
    350     }
    351 
    352     private void doOpenFile()
    353     {
    354         int retVal = fileChooser.showOpenDialog(this);
    355 
    356         if (retVal == JFileChooser.APPROVE_OPTION) {
    357             File file = fileChooser.getSelectedFile();
    358             BufferedInputStream inputStream = openFile(file);
    359 
    360             if (inputStream != null) {
    361                 CharsetMatch[] matches = detect(inputStream);
    362 
    363                 show(inputStream, matches, file.getName());
    364             }
    365         }
    366     }
    367 
    368     private void doOpenURL()
    369     {
    370         String url = (String) JOptionPane.showInputDialog(this, "URL to open:", "Open URL", JOptionPane.PLAIN_MESSAGE,
    371                 null, null, null);
    372 
    373         if (url != null && url.length() > 0) {
    374             BufferedInputStream inputStream = openURL(url);
    375 
    376             if (inputStream != null) {
    377                 byte[] filtered = filter(inputStream);
    378                 CharsetMatch[] matches = detect(filtered);
    379 
    380                 show(inputStream, matches, url);
    381             }
    382         }
    383 }
    384 
    385     private void doQuit()
    386     {
    387         DemoApplet.demoFrameClosed();
    388         this.setVisible(false);
    389         this.dispose();
    390     }
    391 
    392     private JMenuBar makeMenus()
    393     {
    394         JMenu menu = new JMenu("File");
    395         JMenuItem mi;
    396 
    397         mi = new JMenuItem("Open File...");
    398         mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_O, ActionEvent.CTRL_MASK)));
    399         mi.addActionListener(this);
    400         menu.add(mi);
    401         if(fileChooser == null) {
    402             mi.setEnabled(false); // no file chooser.
    403         }
    404 
    405         mi = new JMenuItem("Open URL...");
    406         mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_U, ActionEvent.CTRL_MASK)));
    407         mi.addActionListener(this);
    408         menu.add(mi);
    409 
    410         mi = new JMenuItem("Quit");
    411         mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_Q, ActionEvent.CTRL_MASK)));
    412         mi.addActionListener(this);
    413         menu.add(mi);
    414 
    415         JMenuBar mbar = new JMenuBar();
    416         mbar.add(menu);
    417 
    418         menu = new JMenu("Detected Encodings");
    419         mbar.add(menu);
    420 
    421         return mbar;
    422     }
    423 }
    424