Home | History | Annotate | Download | only in test
      1 # 2002 May 24
      2 #
      3 # The author disclaims copyright to this source code.  In place of
      4 # a legal notice, here is a blessing:
      5 #
      6 #    May you do good and not evil.
      7 #    May you find forgiveness for yourself and forgive others.
      8 #    May you share freely, never taking more than you give.
      9 #
     10 #***********************************************************************
     11 # This file implements regression tests for SQLite library.  The focus of
     12 # this file is testing the SQLite routines used for converting between the
     13 # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
     14 # UTF-16be).
     15 #
     16 # $Id: enc2.test,v 1.29 2007/10/09 08:29:32 danielk1977 Exp $
     17 
     18 set testdir [file dirname $argv0]
     19 source $testdir/tester.tcl
     20 
     21 # If UTF16 support is disabled, ignore the tests in this file
     22 #
     23 ifcapable {!utf16} {
     24   finish_test
     25   return
     26 }
     27 
     28 # The rough organisation of tests in this file is:
     29 #
     30 # enc2.1.*: Simple tests with a UTF-8 db.
     31 # enc2.2.*: Simple tests with a UTF-16LE db.
     32 # enc2.3.*: Simple tests with a UTF-16BE db.
     33 # enc2.4.*: Test that attached databases must have the same text encoding
     34 #           as the main database.
     35 # enc2.5.*: Test the behaviour of the library when a collation sequence is
     36 #           not available for the most desirable text encoding.
     37 # enc2.6.*: Similar test for user functions.
     38 # enc2.7.*: Test that the VerifyCookie opcode protects against assuming the
     39 #           wrong text encoding for the database.
     40 # enc2.8.*: Test sqlite3_complete16()
     41 #
     42 
     43 db close
     44 
     45 # Return the UTF-8 representation of the supplied UTF-16 string $str. 
     46 proc utf8 {str} {
     47   # If $str ends in two 0x00 0x00 bytes, knock these off before
     48   # converting to UTF-8 using TCL.
     49   binary scan $str \c* vals
     50   if {[lindex $vals end]==0 && [lindex $vals end-1]==0} {
     51     set str [binary format \c* [lrange $vals 0 end-2]]
     52   }
     53 
     54   set r [encoding convertfrom unicode $str]
     55   return $r
     56 }
     57 
     58 #
     59 # This proc contains all the tests in this file. It is run
     60 # three times. Each time the file 'test.db' contains a database
     61 # with the following contents:
     62 set dbcontents {
     63   CREATE TABLE t1(a PRIMARY KEY, b, c);
     64   INSERT INTO t1 VALUES('one', 'I', 1);
     65 }
     66 # This proc tests that we can open and manipulate the test.db 
     67 # database, and that it is possible to retreive values in
     68 # various text encodings.
     69 #
     70 proc run_test_script {t enc} {
     71 
     72 # Open the database and pull out a (the) row.
     73 do_test $t.1 {
     74   sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
     75   execsql {SELECT * FROM t1}
     76 } {one I 1}
     77 
     78 # Insert some data
     79 do_test $t.2 {
     80   execsql {INSERT INTO t1 VALUES('two', 'II', 2);}
     81   execsql {SELECT * FROM t1}
     82 } {one I 1 two II 2}
     83 
     84 # Insert some data 
     85 do_test $t.3 {
     86   execsql {
     87     INSERT INTO t1 VALUES('three','III',3);
     88     INSERT INTO t1 VALUES('four','IV',4);
     89     INSERT INTO t1 VALUES('five','V',5);
     90   }
     91   execsql {SELECT * FROM t1}
     92 } {one I 1 two II 2 three III 3 four IV 4 five V 5}
     93 
     94 # Use the index
     95 do_test $t.4 {
     96   execsql {
     97     SELECT * FROM t1 WHERE a = 'one';
     98   }
     99 } {one I 1}
    100 do_test $t.5 {
    101   execsql {
    102     SELECT * FROM t1 WHERE a = 'four';
    103   }
    104 } {four IV 4}
    105 ifcapable subquery {
    106   do_test $t.6 {
    107     execsql {
    108       SELECT * FROM t1 WHERE a IN ('one', 'two');
    109     }
    110   } {one I 1 two II 2}
    111 }
    112 
    113 # Now check that we can retrieve data in both UTF-16 and UTF-8
    114 do_test $t.7 {
    115   set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL]
    116   sqlite3_step $STMT
    117   sqlite3_column_text $STMT 0
    118 } {four}
    119 
    120 do_test $t.8 {
    121   sqlite3_step $STMT
    122   utf8 [sqlite3_column_text16 $STMT 0]
    123 } {five}
    124 
    125 do_test $t.9 {
    126   sqlite3_finalize $STMT
    127 } SQLITE_OK
    128 
    129 ifcapable vacuum {
    130   execsql VACUUM
    131 }
    132 
    133 do_test $t.10 {
    134   db eval {PRAGMA encoding}
    135 } $enc
    136 
    137 }
    138 
    139 # The three unicode encodings understood by SQLite.
    140 set encodings [list UTF-8 UTF-16le UTF-16be]
    141 
    142 set sqlite_os_trace 0
    143 set i 1
    144 foreach enc $encodings {
    145   file delete -force test.db
    146   sqlite3 db test.db
    147   db eval "PRAGMA encoding = \"$enc\""
    148   execsql $dbcontents
    149   do_test enc2-$i.0.1 {
    150     db eval {PRAGMA encoding}
    151   } $enc
    152   do_test enc2-$i.0.2 {
    153     db eval {PRAGMA encoding=UTF8}
    154     db eval {PRAGMA encoding}
    155   } $enc
    156   do_test enc2-$i.0.3 {
    157     db eval {PRAGMA encoding=UTF16le}
    158     db eval {PRAGMA encoding}
    159   } $enc
    160   do_test enc2-$i.0.4 {
    161     db eval {PRAGMA encoding=UTF16be}
    162     db eval {PRAGMA encoding}
    163   } $enc
    164 
    165   db close
    166   run_test_script enc2-$i $enc
    167   db close
    168   incr i
    169 }
    170 
    171 # Test that it is an error to try to attach a database with a different
    172 # encoding to the main database.
    173 ifcapable attach {
    174   do_test enc2-4.1 {
    175     file delete -force test.db
    176     sqlite3 db test.db
    177     db eval "PRAGMA encoding = 'UTF-8'"
    178     db eval "CREATE TABLE abc(a, b, c);"
    179   } {}
    180   do_test enc2-4.2 {
    181     file delete -force test2.db
    182     sqlite3 db2 test2.db
    183     db2 eval "PRAGMA encoding = 'UTF-16'"
    184     db2 eval "CREATE TABLE abc(a, b, c);"
    185   } {}
    186   do_test enc2-4.3 {
    187     catchsql {
    188       ATTACH 'test2.db' as aux;
    189     }
    190   } {1 {attached databases must use the same text encoding as main database}}
    191   db2 close
    192   db close
    193 }
    194 
    195 # The following tests - enc2-5.* - test that SQLite selects the correct
    196 # collation sequence when more than one is available.
    197 
    198 set ::values [list one two three four five]
    199 set ::test_collate_enc INVALID
    200 proc test_collate {enc lhs rhs} {
    201   set ::test_collate_enc $enc
    202   set l [lsearch -exact $::values $lhs]
    203   set r [lsearch -exact $::values $rhs]
    204   set res [expr $l - $r]
    205   # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res"
    206   return $res
    207 }
    208 
    209 file delete -force test.db
    210 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
    211 do_test enc2-5.0 {
    212   execsql {
    213     CREATE TABLE t5(a);
    214     INSERT INTO t5 VALUES('one');
    215     INSERT INTO t5 VALUES('two');
    216     INSERT INTO t5 VALUES('five');
    217     INSERT INTO t5 VALUES('three');
    218     INSERT INTO t5 VALUES('four');
    219   }
    220 } {}
    221 do_test enc2-5.1 {
    222   add_test_collate $DB 1 1 1
    223   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}]
    224   lappend res $::test_collate_enc
    225 } {one two three four five UTF-8}
    226 do_test enc2-5.2 {
    227   add_test_collate $DB 0 1 0
    228   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
    229   lappend res $::test_collate_enc
    230 } {one two three four five UTF-16LE}
    231 do_test enc2-5.3 {
    232   add_test_collate $DB 0 0 1
    233   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
    234   lappend res $::test_collate_enc
    235 } {one two three four five UTF-16BE}
    236 
    237 db close
    238 file delete -force test.db
    239 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
    240 execsql {pragma encoding = 'UTF-16LE'}
    241 do_test enc2-5.4 {
    242   execsql {
    243     CREATE TABLE t5(a);
    244     INSERT INTO t5 VALUES('one');
    245     INSERT INTO t5 VALUES('two');
    246     INSERT INTO t5 VALUES('five');
    247     INSERT INTO t5 VALUES('three');
    248     INSERT INTO t5 VALUES('four');
    249   }
    250 } {}
    251 do_test enc2-5.5 {
    252   add_test_collate $DB 1 1 1
    253   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
    254   lappend res $::test_collate_enc
    255 } {one two three four five UTF-16LE}
    256 do_test enc2-5.6 {
    257   add_test_collate $DB 1 0 1
    258   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
    259   lappend res $::test_collate_enc
    260 } {one two three four five UTF-16BE}
    261 do_test enc2-5.7 {
    262   add_test_collate $DB 1 0 0
    263   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
    264   lappend res $::test_collate_enc
    265 } {one two three four five UTF-8}
    266 
    267 db close
    268 file delete -force test.db
    269 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
    270 execsql {pragma encoding = 'UTF-16BE'}
    271 do_test enc2-5.8 {
    272   execsql {
    273     CREATE TABLE t5(a);
    274     INSERT INTO t5 VALUES('one');
    275     INSERT INTO t5 VALUES('two');
    276     INSERT INTO t5 VALUES('five');
    277     INSERT INTO t5 VALUES('three');
    278     INSERT INTO t5 VALUES('four');
    279   }
    280 } {}
    281 do_test enc2-5.9 {
    282   add_test_collate $DB 1 1 1
    283   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
    284   lappend res $::test_collate_enc
    285 } {one two three four five UTF-16BE}
    286 do_test enc2-5.10 {
    287   add_test_collate $DB 1 1 0
    288   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
    289   lappend res $::test_collate_enc
    290 } {one two three four five UTF-16LE}
    291 do_test enc2-5.11 {
    292   add_test_collate $DB 1 0 0
    293   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
    294   lappend res $::test_collate_enc
    295 } {one two three four five UTF-8}
    296 
    297 # Also test that a UTF-16 collation factory works.
    298 do_test enc2-5-12 {
    299   add_test_collate $DB 0 0 0
    300   catchsql {
    301     SELECT * FROM t5 ORDER BY 1 COLLATE test_collate
    302   }
    303 } {1 {no such collation sequence: test_collate}}
    304 do_test enc2-5.13 {
    305   add_test_collate_needed $DB 
    306   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }]
    307   lappend res $::test_collate_enc
    308 } {one two three four five UTF-16BE}
    309 do_test enc2-5.14 {
    310   set ::sqlite_last_needed_collation
    311 } test_collate
    312 
    313 db close
    314 file delete -force test.db
    315 
    316 do_test enc2-5.15 {
    317   sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db]
    318   add_test_collate_needed $::DB
    319   set ::sqlite_last_needed_collation
    320 } {}
    321 do_test enc2-5.16 {
    322   execsql {CREATE TABLE t1(a varchar collate test_collate);}
    323 } {}
    324 do_test enc2-5.17 {
    325   set ::sqlite_last_needed_collation
    326 } {test_collate}
    327 
    328 # The following tests - enc2-6.* - test that SQLite selects the correct
    329 # user function when more than one is available.
    330 
    331 proc test_function {enc arg} {
    332   return "$enc $arg"
    333 }
    334 
    335 db close
    336 file delete -force test.db
    337 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
    338 execsql {pragma encoding = 'UTF-8'}
    339 do_test enc2-6.0 {
    340   execsql {
    341     CREATE TABLE t5(a);
    342     INSERT INTO t5 VALUES('one');
    343   }
    344 } {}
    345 do_test enc2-6.1 {
    346   add_test_function $DB 1 1 1
    347   execsql {
    348     SELECT test_function('sqlite')
    349   }
    350 } {{UTF-8 sqlite}}
    351 db close
    352 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
    353 do_test enc2-6.2 {
    354   add_test_function $DB 0 1 0
    355   execsql {
    356     SELECT test_function('sqlite')
    357   }
    358 } {{UTF-16LE sqlite}}
    359 db close
    360 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
    361 do_test enc2-6.3 {
    362   add_test_function $DB 0 0 1
    363   execsql {
    364     SELECT test_function('sqlite')
    365   }
    366 } {{UTF-16BE sqlite}}
    367 
    368 db close
    369 file delete -force test.db
    370 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
    371 execsql {pragma encoding = 'UTF-16LE'}
    372 do_test enc2-6.3 {
    373   execsql {
    374     CREATE TABLE t5(a);
    375     INSERT INTO t5 VALUES('sqlite');
    376   }
    377 } {}
    378 do_test enc2-6.4 {
    379   add_test_function $DB 1 1 1
    380   execsql {
    381     SELECT test_function('sqlite')
    382   }
    383 } {{UTF-16LE sqlite}}
    384 db close
    385 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
    386 do_test enc2-6.5 {
    387   add_test_function $DB 0 1 0
    388   execsql {
    389     SELECT test_function('sqlite')
    390   }
    391 } {{UTF-16LE sqlite}}
    392 db close
    393 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
    394 do_test enc2-6.6 {
    395   add_test_function $DB 0 0 1
    396   execsql {
    397     SELECT test_function('sqlite')
    398   }
    399 } {{UTF-16BE sqlite}}
    400 
    401 db close
    402 file delete -force test.db
    403 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
    404 execsql {pragma encoding = 'UTF-16BE'}
    405 do_test enc2-6.7 {
    406   execsql {
    407     CREATE TABLE t5(a);
    408     INSERT INTO t5 VALUES('sqlite');
    409   }
    410 } {}
    411 do_test enc2-6.8 {
    412   add_test_function $DB 1 1 1
    413   execsql {
    414     SELECT test_function('sqlite')
    415   }
    416 } {{UTF-16BE sqlite}}
    417 db close
    418 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
    419 do_test enc2-6.9 {
    420   add_test_function $DB 0 1 0
    421   execsql {
    422     SELECT test_function('sqlite')
    423   }
    424 } {{UTF-16LE sqlite}}
    425 db close
    426 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
    427 do_test enc2-6.10 {
    428   add_test_function $DB 0 0 1
    429   execsql {
    430     SELECT test_function('sqlite')
    431   }
    432 } {{UTF-16BE sqlite}}
    433 
    434 
    435 db close
    436 file delete -force test.db
    437 
    438 # The following tests - enc2-7.* - function as follows:
    439 #
    440 # 1: Open an empty database file assuming UTF-16 encoding.
    441 # 2: Open the same database with a different handle assuming UTF-8. Create
    442 #    a table using this handle.
    443 # 3: Read the sqlite_master table from the first handle. 
    444 # 4: Ensure the first handle recognises the database encoding is UTF-8.
    445 #
    446 do_test enc2-7.1 {
    447   sqlite3 db test.db
    448   execsql {
    449     PRAGMA encoding = 'UTF-16';
    450     SELECT * FROM sqlite_master;
    451   }
    452 } {}
    453 do_test enc2-7.2 {
    454   set enc [execsql {
    455     PRAGMA encoding;
    456   }]
    457   string range $enc 0 end-2 ;# Chop off the "le" or "be"
    458 } {UTF-16}
    459 do_test enc2-7.3 {
    460   sqlite3 db2 test.db
    461   execsql {
    462     PRAGMA encoding = 'UTF-8';
    463     CREATE TABLE abc(a, b, c);
    464   } db2
    465 } {}
    466 do_test enc2-7.4 {
    467   execsql {
    468     SELECT * FROM sqlite_master;
    469   }
    470 } "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}"
    471 do_test enc2-7.5 {
    472   execsql {
    473     PRAGMA encoding;
    474   }
    475 } {UTF-8}
    476 
    477 db close
    478 db2 close
    479 
    480 proc utf16 {utf8} {
    481   set utf16 [encoding convertto unicode $utf8]
    482   append utf16 "\x00\x00"
    483   return $utf16
    484 }
    485 ifcapable {complete} {
    486   do_test enc2-8.1 {
    487     sqlite3_complete16 [utf16 "SELECT * FROM t1;"]
    488   } {1}
    489   do_test enc2-8.2 {
    490     sqlite3_complete16 [utf16 "SELECT * FROM"]
    491   } {0}
    492 }
    493 
    494 # Test that the encoding of an empty database may still be set after the
    495 # (empty) schema has been initialized.
    496 file delete -force test.db
    497 do_test enc2-9.1 {
    498   sqlite3 db test.db
    499   execsql {
    500     PRAGMA encoding = 'UTF-8';
    501     PRAGMA encoding;
    502   }
    503 } {UTF-8}
    504 do_test enc2-9.2 {
    505   sqlite3 db test.db
    506   execsql {
    507     PRAGMA encoding = 'UTF-16le';
    508     PRAGMA encoding;
    509   }
    510 } {UTF-16le}
    511 do_test enc2-9.3 {
    512   sqlite3 db test.db
    513   execsql {
    514     SELECT * FROM sqlite_master;
    515     PRAGMA encoding = 'UTF-8';
    516     PRAGMA encoding;
    517   }
    518 } {UTF-8}
    519 do_test enc2-9.4 {
    520   sqlite3 db test.db
    521   execsql {
    522     PRAGMA encoding = 'UTF-16le';
    523     CREATE TABLE abc(a, b, c);
    524     PRAGMA encoding;
    525   }
    526 } {UTF-16le}
    527 do_test enc2-9.5 {
    528   sqlite3 db test.db
    529   execsql {
    530     PRAGMA encoding = 'UTF-8';
    531     PRAGMA encoding;
    532   }
    533 } {UTF-16le}
    534 
    535 # Ticket #1987.
    536 # Disallow encoding changes once the encoding has been set.
    537 #
    538 do_test enc2-10.1 {
    539   db close
    540   file delete -force test.db test.db-journal
    541   sqlite3 db test.db
    542   db eval {
    543     PRAGMA encoding=UTF16;
    544     CREATE TABLE t1(a);
    545     PRAGMA encoding=UTF8;
    546     CREATE TABLE t2(b);
    547   }
    548   db close
    549   sqlite3 db test.db
    550   db eval {
    551     SELECT name FROM sqlite_master
    552   }
    553 } {t1 t2}
    554 
    555 finish_test
    556