1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ************************************************************************** 5 * Copyright (C) 2005-2010, International Business Machines Corporation * 6 * and others. All Rights Reserved. * 7 ************************************************************************** 8 * 9 */ 10 11 package com.ibm.icu.dev.demo.charsetdet; 12 13 import java.awt.Font; 14 import java.awt.event.ActionEvent; 15 import java.awt.event.ActionListener; 16 import java.awt.event.KeyEvent; 17 import java.awt.event.WindowAdapter; 18 import java.awt.event.WindowEvent; 19 import java.io.BufferedInputStream; 20 import java.io.File; 21 import java.io.FileInputStream; 22 import java.io.IOException; 23 import java.io.InputStream; 24 import java.io.InputStreamReader; 25 import java.net.URL; 26 import java.nio.ByteBuffer; 27 import java.nio.charset.Charset; 28 import java.security.AccessControlException; 29 30 import javax.swing.JFileChooser; 31 import javax.swing.JFrame; 32 import javax.swing.JMenu; 33 import javax.swing.JMenuBar; 34 import javax.swing.JMenuItem; 35 import javax.swing.JOptionPane; 36 import javax.swing.JScrollPane; 37 import javax.swing.JTextPane; 38 import javax.swing.KeyStroke; 39 40 import com.ibm.icu.charset.CharsetICU; 41 import com.ibm.icu.dev.demo.impl.DemoApplet; 42 import com.ibm.icu.text.CharsetDetector; 43 import com.ibm.icu.text.CharsetMatch; 44 45 /** 46 * This simple application demonstrates how to use the CharsetDetector API. It 47 * opens a file or web page, detects the encoding, and then displays it using that 48 * encoding. 49 */ 50 public class DetectingViewer extends JFrame implements ActionListener 51 { 52 53 /** 54 * For serialization 55 */ 56 private static final long serialVersionUID = -2307065724464747775L; 57 private JTextPane text; 58 private JFileChooser fileChooser; 59 60 /** 61 * @throws java.awt.HeadlessException 62 */ 63 public DetectingViewer() 64 { 65 super(); 66 DemoApplet.demoFrameOpened(); 67 68 try { 69 fileChooser = new JFileChooser(); 70 } catch (AccessControlException ace) { 71 System.err.println("no file chooser - access control exception. Continuing without file browsing. "+ace.toString()); 72 fileChooser = null; // 73 } 74 75 // setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); 76 setSize(800, 800); 77 78 setJMenuBar(makeMenus()); 79 text = new JTextPane(); 80 text.setContentType("text/plain"); 81 text.setText(""); 82 text.setSize(800, 800); 83 84 Font font = new Font("Arial Unicode MS", Font.PLAIN, 24); 85 text.setFont(font); 86 87 JScrollPane scrollPane = new JScrollPane(text); 88 89 getContentPane().add(scrollPane); 90 setVisible(true); 91 92 addWindowListener( 93 new WindowAdapter() { 94 public void windowClosing(WindowEvent e) { 95 // setVisible(false); 96 // dispose(); 97 98 doQuit(); 99 } 100 } ); 101 102 103 } 104 105 public void actionPerformed(ActionEvent event) 106 { 107 String cmd = event.getActionCommand(); 108 109 if (cmd.equals("New...")) { 110 doNew(); 111 } else if (cmd.equals("Open File...")) { 112 doOpenFile(); 113 } else if (cmd.equals("Open URL...")) { 114 doOpenURL(); 115 } else if (cmd.equals("Quit")) { 116 doQuit(); 117 } 118 } 119 120 public static void main(String[] args) 121 { 122 new DetectingViewer(); 123 } 124 125 private void errorDialog(String title, String msg) 126 { 127 JOptionPane.showMessageDialog(this, msg, title, JOptionPane.ERROR_MESSAGE); 128 } 129 130 private BufferedInputStream openFile(File file) 131 { 132 FileInputStream fileStream = null; 133 134 try { 135 fileStream = new FileInputStream(file); 136 } catch (Exception e) { 137 errorDialog("Error Opening File", e.getMessage()); 138 return null; 139 } 140 141 return new BufferedInputStream(fileStream); 142 } 143 144 // private void openFile(String directory, String filename) 145 // { 146 // openFile(new File(directory, filename)); 147 // } 148 149 150 private BufferedInputStream openURL(String url) 151 { 152 InputStream s = null; 153 154 try { 155 URL aURL = new URL(url); 156 s = aURL.openStream(); 157 } catch (Exception e) { 158 errorDialog("Error Opening URL", e.getMessage()); 159 return null; 160 } 161 162 return new BufferedInputStream(s); 163 } 164 165 private String encodingName(CharsetMatch match) 166 { 167 return match.getName() + " (" + match.getLanguage() + ")"; 168 } 169 170 private void setMatchMenu(CharsetMatch[] matches) 171 { 172 JMenu menu = getJMenuBar().getMenu(1); 173 JMenuItem menuItem; 174 175 menu.removeAll(); 176 177 for (int i = 0; i < matches.length; i += 1) { 178 CharsetMatch match = matches[i]; 179 180 menuItem = new JMenuItem(encodingName(match) + " " + match.getConfidence()); 181 182 menu.add(menuItem); 183 } 184 } 185 186 private byte[] scriptTag = {(byte) 's', (byte) 'c', (byte) 'r', (byte) 'i', (byte) 'p', (byte) 't'}; 187 private byte[] styleTag = {(byte) 's', (byte) 't', (byte) 'y', (byte) 'l', (byte) 'e'}; 188 private static int BUFFER_SIZE = 100000; 189 190 private boolean openTag(byte[] buffer, int offset, int length, byte[] tag) 191 { 192 int tagLen = tag.length; 193 int bufRem = length - offset; 194 int b; 195 196 for (b = 0; b < tagLen && b < bufRem; b += 1) { 197 if (buffer[b + offset] != tag[b]) { 198 return false; 199 } 200 } 201 202 return b == tagLen; 203 } 204 205 private boolean closedTag(byte[] buffer, int offset, int length, byte[] tag) 206 { 207 if (buffer[offset] != (byte) '/') { 208 return false; 209 } 210 211 return openTag(buffer, offset + 1, length, tag); 212 } 213 214 private byte[] filter(InputStream in) 215 { 216 byte[] buffer = new byte[BUFFER_SIZE]; 217 int bytesRemaining = BUFFER_SIZE; 218 int bufLen = 0; 219 220 in.mark(BUFFER_SIZE); 221 222 try { 223 while (bytesRemaining > 0) { 224 int bytesRead = in.read(buffer, bufLen, bytesRemaining); 225 226 if (bytesRead <= 0) { 227 break; 228 } 229 230 bufLen += bytesRead; 231 bytesRemaining -= bytesRead; 232 } 233 } catch (Exception e) { 234 // TODO: error handling? 235 return null; 236 } 237 238 boolean inTag = false; 239 boolean skip = false; 240 int out = 0; 241 242 for (int i = 0; i < bufLen; i += 1) { 243 byte b = buffer[i]; 244 245 if (b == (byte) '<') { 246 inTag = true; 247 248 if (openTag(buffer, i + 1, bufLen, scriptTag) || 249 openTag(buffer, i + 1, bufLen, styleTag)) { 250 skip = true; 251 } else if (closedTag(buffer, i + 1, bufLen, scriptTag) || 252 closedTag(buffer, i + 1, bufLen, styleTag)) { 253 skip = false; 254 } 255 } else if (b == (byte) '>') { 256 inTag = false; 257 } else if (! (inTag || skip)) { 258 buffer[out++] = b; 259 } 260 } 261 262 byte[] filtered = new byte[out]; 263 264 System.arraycopy(buffer, 0, filtered, 0, out); 265 return filtered; 266 } 267 268 private CharsetMatch[] detect(byte[] bytes) 269 { 270 CharsetDetector det = new CharsetDetector(); 271 272 det.setText(bytes); 273 274 return det.detectAll(); 275 } 276 277 private CharsetMatch[] detect(BufferedInputStream inputStream) 278 { 279 CharsetDetector det = new CharsetDetector(); 280 281 try { 282 det.setText(inputStream); 283 284 return det.detectAll(); 285 } catch (Exception e) { 286 // TODO: error message? 287 return null; 288 } 289 } 290 291 private void show(InputStream inputStream, CharsetMatch[] matches, String title) 292 { 293 InputStreamReader isr; 294 char[] buffer = new char[1024]; 295 int bytesRead = 0; 296 297 if (matches == null || matches.length == 0) { 298 errorDialog("Match Error", "No matches!"); 299 return; 300 } 301 302 try { 303 StringBuffer sb = new StringBuffer(); 304 String encoding = matches[0].getName(); 305 306 inputStream.reset(); 307 308 if (encoding.startsWith("UTF-32")) { 309 byte[] bytes = new byte[1024]; 310 int offset = 0; 311 int chBytes = 0; 312 Charset utf32 = CharsetICU.forNameICU(encoding); 313 314 while ((bytesRead = inputStream.read(bytes, offset, 1024)) >= 0) { 315 offset = bytesRead % 4; 316 chBytes = bytesRead - offset; 317 318 sb.append(utf32.decode(ByteBuffer.wrap(bytes)).toString()); 319 320 if (offset != 0) { 321 for (int i = 0; i < offset; i += 1) { 322 bytes[i] = bytes[chBytes + i]; 323 } 324 } 325 } 326 } else { 327 isr = new InputStreamReader(inputStream, encoding); 328 329 while ((bytesRead = isr.read(buffer, 0, 1024)) >= 0) { 330 sb.append(buffer, 0, bytesRead); 331 } 332 333 isr.close(); 334 } 335 336 this.setTitle(title + " - " + encodingName(matches[0])); 337 338 setMatchMenu(matches); 339 text.setText(sb.toString()); 340 } catch (IOException e) { 341 errorDialog("IO Error", e.getMessage()); 342 } catch (Exception e) { 343 errorDialog("Internal Error", e.getMessage()); 344 } 345 } 346 347 private void doNew() 348 { 349 // open a new window... 350 } 351 352 private void doOpenFile() 353 { 354 int retVal = fileChooser.showOpenDialog(this); 355 356 if (retVal == JFileChooser.APPROVE_OPTION) { 357 File file = fileChooser.getSelectedFile(); 358 BufferedInputStream inputStream = openFile(file); 359 360 if (inputStream != null) { 361 CharsetMatch[] matches = detect(inputStream); 362 363 show(inputStream, matches, file.getName()); 364 } 365 } 366 } 367 368 private void doOpenURL() 369 { 370 String url = (String) JOptionPane.showInputDialog(this, "URL to open:", "Open URL", JOptionPane.PLAIN_MESSAGE, 371 null, null, null); 372 373 if (url != null && url.length() > 0) { 374 BufferedInputStream inputStream = openURL(url); 375 376 if (inputStream != null) { 377 byte[] filtered = filter(inputStream); 378 CharsetMatch[] matches = detect(filtered); 379 380 show(inputStream, matches, url); 381 } 382 } 383 } 384 385 private void doQuit() 386 { 387 DemoApplet.demoFrameClosed(); 388 this.setVisible(false); 389 this.dispose(); 390 } 391 392 private JMenuBar makeMenus() 393 { 394 JMenu menu = new JMenu("File"); 395 JMenuItem mi; 396 397 mi = new JMenuItem("Open File..."); 398 mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_O, ActionEvent.CTRL_MASK))); 399 mi.addActionListener(this); 400 menu.add(mi); 401 if(fileChooser == null) { 402 mi.setEnabled(false); // no file chooser. 403 } 404 405 mi = new JMenuItem("Open URL..."); 406 mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_U, ActionEvent.CTRL_MASK))); 407 mi.addActionListener(this); 408 menu.add(mi); 409 410 mi = new JMenuItem("Quit"); 411 mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_Q, ActionEvent.CTRL_MASK))); 412 mi.addActionListener(this); 413 menu.add(mi); 414 415 JMenuBar mbar = new JMenuBar(); 416 mbar.add(menu); 417 418 menu = new JMenu("Detected Encodings"); 419 mbar.add(menu); 420 421 return mbar; 422 } 423 } 424