Home | History | Annotate | Download | only in test
      1 # 2002 May 24
      2 #
      3 # The author disclaims copyright to this source code.  In place of
      4 # a legal notice, here is a blessing:
      5 #
      6 #    May you do good and not evil.
      7 #    May you find forgiveness for yourself and forgive others.
      8 #    May you share freely, never taking more than you give.
      9 #
     10 #***********************************************************************
     11 # This file implements regression tests for SQLite library.  The focus of
     12 # this file is testing the SQLite routines used for converting between the
     13 # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
     14 # UTF-16be).
     15 #
     16 # $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $
     17 
     18 set testdir [file dirname $argv0]
     19 source $testdir/tester.tcl
     20 
     21 # Skip this test if the build does not support multiple encodings.
     22 #
     23 ifcapable {!utf16} {
     24   finish_test
     25   return
     26 }
     27 
     28 proc do_bincmp_test {testname got expect} {
     29   binary scan $expect \c* expectvals
     30   binary scan $got \c* gotvals
     31   do_test $testname [list set dummy $gotvals] $expectvals
     32 }
     33 
     34 # $utf16 is a UTF-16 encoded string. Swap each pair of bytes around
     35 # to change the byte-order of the string.
     36 proc swap_byte_order {utf16} {
     37   binary scan $utf16 \c* ints
     38 
     39   foreach {a b} $ints {
     40     lappend ints2 $b
     41     lappend ints2 $a
     42   }
     43 
     44   return [binary format \c* $ints2]
     45 }
     46 
     47 #
     48 # Test that the SQLite routines for converting between UTF encodings
     49 # produce the same results as their TCL counterparts.
     50 #
     51 # $testname is the prefix to be used for the test names.
     52 # $str is a string to use for testing (encoded in UTF-8, as normal for TCL).
     53 #
     54 # The test procedure is:
     55 # 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and
     56 #    SQLite routines produce the same results.
     57 #
     58 # 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and
     59 #    SQLite routines produce the same results.
     60 #
     61 # 3. Use the SQLite routines to convert the native machine order UTF-16
     62 #    representation back to the original UTF-8. Check that the result
     63 #    matches the original representation.
     64 #
     65 # 4. Add a byte-order mark to each of the UTF-16 representations and
     66 #    check that the SQLite routines can convert them back to UTF-8.  For
     67 #    byte-order mark info, refer to section 3.10 of the unicode standard.
     68 #
     69 # 5. Take the byte-order marked UTF-16 strings from step 4 and ensure
     70 #    that SQLite can convert them both to native byte order UTF-16 
     71 #    strings, sans BOM.
     72 #
     73 # Coverage:
     74 #
     75 # sqlite_utf8to16be (step 2)
     76 # sqlite_utf8to16le (step 1)
     77 # sqlite_utf16to8 (steps 3, 4)
     78 # sqlite_utf16to16le (step 5)
     79 # sqlite_utf16to16be (step 5)
     80 #
     81 proc test_conversion {testname str} {
     82  
     83   # Step 1.
     84   set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE]
     85   set utf16le_tcl [encoding convertto unicode $str]
     86   append utf16le_tcl "\x00\x00"
     87   if { $::tcl_platform(byteOrder)!="littleEndian" } {
     88     set utf16le_tcl [swap_byte_order $utf16le_tcl]
     89   }
     90   do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl
     91   set utf16le $utf16le_tcl
     92 
     93   # Step 2.
     94   set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE]
     95   set utf16be_tcl [encoding convertto unicode $str]
     96   append utf16be_tcl "\x00\x00"
     97   if { $::tcl_platform(byteOrder)=="littleEndian" } {
     98     set utf16be_tcl [swap_byte_order $utf16be_tcl]
     99   }
    100   do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl
    101   set utf16be $utf16be_tcl
    102  
    103   # Step 3.
    104   if { $::tcl_platform(byteOrder)=="littleEndian" } {
    105     set utf16 $utf16le
    106   } else {
    107     set utf16 $utf16be
    108   }
    109   set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8]
    110   do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str]
    111 
    112   # Step 4 (little endian).
    113   append utf16le_bom "\xFF\xFE" $utf16le
    114   set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1]
    115   do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str]
    116 
    117   # Step 4 (big endian).
    118   append utf16be_bom "\xFE\xFF" $utf16be
    119   set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8]
    120   do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str]
    121 
    122   # Step 5 (little endian to little endian).
    123   set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE]
    124   do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le
    125 
    126   # Step 5 (big endian to big endian).
    127   set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE]
    128   do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be
    129 
    130   # Step 5 (big endian to little endian).
    131   set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE]
    132   do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le
    133 
    134   # Step 5 (little endian to big endian).
    135   set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE]
    136   do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be
    137 }
    138 
    139 translate_selftest
    140 
    141 test_conversion enc-1 "hello world"
    142 test_conversion enc-2 "sqlite"
    143 test_conversion enc-3 ""
    144 test_conversion enc-X "\u0100"
    145 test_conversion enc-4 "\u1234"
    146 test_conversion enc-5 "\u4321abc"
    147 test_conversion enc-6 "\u4321\u1234"
    148 test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
    149 test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
    150 test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]
    151 test_conversion enc-10 [string repeat "\uE000" 100]
    152 
    153 proc test_collate {enc zLeft zRight} {
    154   return [string compare $zLeft $zRight]
    155 }
    156 add_test_collate $::DB 0 0 1
    157 do_test enc-11.1 {
    158   execsql {
    159     CREATE TABLE ab(a COLLATE test_collate, b);
    160     INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800');
    161     INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800');
    162     CREATE INDEX ab_i ON ab(a, b);
    163   }
    164 } {}
    165 do_test enc-11.2 {
    166   set cp200 "\u00C8"
    167   execsql {
    168     SELECT count(*) FROM ab WHERE a = $::cp200;
    169   }
    170 } {2}
    171 
    172 finish_test
    173