1 # 2002 May 24 2 # 3 # The author disclaims copyright to this source code. In place of 4 # a legal notice, here is a blessing: 5 # 6 # May you do good and not evil. 7 # May you find forgiveness for yourself and forgive others. 8 # May you share freely, never taking more than you give. 9 # 10 #*********************************************************************** 11 # This file implements regression tests for SQLite library. The focus of 12 # this file is testing the SQLite routines used for converting between the 13 # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and 14 # UTF-16be). 15 # 16 # $Id: enc2.test,v 1.29 2007/10/09 08:29:32 danielk1977 Exp $ 17 18 set testdir [file dirname $argv0] 19 source $testdir/tester.tcl 20 21 # If UTF16 support is disabled, ignore the tests in this file 22 # 23 ifcapable {!utf16} { 24 finish_test 25 return 26 } 27 28 # The rough organisation of tests in this file is: 29 # 30 # enc2.1.*: Simple tests with a UTF-8 db. 31 # enc2.2.*: Simple tests with a UTF-16LE db. 32 # enc2.3.*: Simple tests with a UTF-16BE db. 33 # enc2.4.*: Test that attached databases must have the same text encoding 34 # as the main database. 35 # enc2.5.*: Test the behaviour of the library when a collation sequence is 36 # not available for the most desirable text encoding. 37 # enc2.6.*: Similar test for user functions. 38 # enc2.7.*: Test that the VerifyCookie opcode protects against assuming the 39 # wrong text encoding for the database. 40 # enc2.8.*: Test sqlite3_complete16() 41 # 42 43 db close 44 45 # Return the UTF-8 representation of the supplied UTF-16 string $str. 46 proc utf8 {str} { 47 # If $str ends in two 0x00 0x00 bytes, knock these off before 48 # converting to UTF-8 using TCL. 49 binary scan $str \c* vals 50 if {[lindex $vals end]==0 && [lindex $vals end-1]==0} { 51 set str [binary format \c* [lrange $vals 0 end-2]] 52 } 53 54 set r [encoding convertfrom unicode $str] 55 return $r 56 } 57 58 # 59 # This proc contains all the tests in this file. It is run 60 # three times. Each time the file 'test.db' contains a database 61 # with the following contents: 62 set dbcontents { 63 CREATE TABLE t1(a PRIMARY KEY, b, c); 64 INSERT INTO t1 VALUES('one', 'I', 1); 65 } 66 # This proc tests that we can open and manipulate the test.db 67 # database, and that it is possible to retreive values in 68 # various text encodings. 69 # 70 proc run_test_script {t enc} { 71 72 # Open the database and pull out a (the) row. 73 do_test $t.1 { 74 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 75 execsql {SELECT * FROM t1} 76 } {one I 1} 77 78 # Insert some data 79 do_test $t.2 { 80 execsql {INSERT INTO t1 VALUES('two', 'II', 2);} 81 execsql {SELECT * FROM t1} 82 } {one I 1 two II 2} 83 84 # Insert some data 85 do_test $t.3 { 86 execsql { 87 INSERT INTO t1 VALUES('three','III',3); 88 INSERT INTO t1 VALUES('four','IV',4); 89 INSERT INTO t1 VALUES('five','V',5); 90 } 91 execsql {SELECT * FROM t1} 92 } {one I 1 two II 2 three III 3 four IV 4 five V 5} 93 94 # Use the index 95 do_test $t.4 { 96 execsql { 97 SELECT * FROM t1 WHERE a = 'one'; 98 } 99 } {one I 1} 100 do_test $t.5 { 101 execsql { 102 SELECT * FROM t1 WHERE a = 'four'; 103 } 104 } {four IV 4} 105 ifcapable subquery { 106 do_test $t.6 { 107 execsql { 108 SELECT * FROM t1 WHERE a IN ('one', 'two'); 109 } 110 } {one I 1 two II 2} 111 } 112 113 # Now check that we can retrieve data in both UTF-16 and UTF-8 114 do_test $t.7 { 115 set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL] 116 sqlite3_step $STMT 117 sqlite3_column_text $STMT 0 118 } {four} 119 120 do_test $t.8 { 121 sqlite3_step $STMT 122 utf8 [sqlite3_column_text16 $STMT 0] 123 } {five} 124 125 do_test $t.9 { 126 sqlite3_finalize $STMT 127 } SQLITE_OK 128 129 ifcapable vacuum { 130 execsql VACUUM 131 } 132 133 do_test $t.10 { 134 db eval {PRAGMA encoding} 135 } $enc 136 137 } 138 139 # The three unicode encodings understood by SQLite. 140 set encodings [list UTF-8 UTF-16le UTF-16be] 141 142 set sqlite_os_trace 0 143 set i 1 144 foreach enc $encodings { 145 file delete -force test.db 146 sqlite3 db test.db 147 db eval "PRAGMA encoding = \"$enc\"" 148 execsql $dbcontents 149 do_test enc2-$i.0.1 { 150 db eval {PRAGMA encoding} 151 } $enc 152 do_test enc2-$i.0.2 { 153 db eval {PRAGMA encoding=UTF8} 154 db eval {PRAGMA encoding} 155 } $enc 156 do_test enc2-$i.0.3 { 157 db eval {PRAGMA encoding=UTF16le} 158 db eval {PRAGMA encoding} 159 } $enc 160 do_test enc2-$i.0.4 { 161 db eval {PRAGMA encoding=UTF16be} 162 db eval {PRAGMA encoding} 163 } $enc 164 165 db close 166 run_test_script enc2-$i $enc 167 db close 168 incr i 169 } 170 171 # Test that it is an error to try to attach a database with a different 172 # encoding to the main database. 173 ifcapable attach { 174 do_test enc2-4.1 { 175 file delete -force test.db 176 sqlite3 db test.db 177 db eval "PRAGMA encoding = 'UTF-8'" 178 db eval "CREATE TABLE abc(a, b, c);" 179 } {} 180 do_test enc2-4.2 { 181 file delete -force test2.db 182 sqlite3 db2 test2.db 183 db2 eval "PRAGMA encoding = 'UTF-16'" 184 db2 eval "CREATE TABLE abc(a, b, c);" 185 } {} 186 do_test enc2-4.3 { 187 catchsql { 188 ATTACH 'test2.db' as aux; 189 } 190 } {1 {attached databases must use the same text encoding as main database}} 191 db2 close 192 db close 193 } 194 195 # The following tests - enc2-5.* - test that SQLite selects the correct 196 # collation sequence when more than one is available. 197 198 set ::values [list one two three four five] 199 set ::test_collate_enc INVALID 200 proc test_collate {enc lhs rhs} { 201 set ::test_collate_enc $enc 202 set l [lsearch -exact $::values $lhs] 203 set r [lsearch -exact $::values $rhs] 204 set res [expr $l - $r] 205 # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res" 206 return $res 207 } 208 209 file delete -force test.db 210 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 211 do_test enc2-5.0 { 212 execsql { 213 CREATE TABLE t5(a); 214 INSERT INTO t5 VALUES('one'); 215 INSERT INTO t5 VALUES('two'); 216 INSERT INTO t5 VALUES('five'); 217 INSERT INTO t5 VALUES('three'); 218 INSERT INTO t5 VALUES('four'); 219 } 220 } {} 221 do_test enc2-5.1 { 222 add_test_collate $DB 1 1 1 223 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}] 224 lappend res $::test_collate_enc 225 } {one two three four five UTF-8} 226 do_test enc2-5.2 { 227 add_test_collate $DB 0 1 0 228 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 229 lappend res $::test_collate_enc 230 } {one two three four five UTF-16LE} 231 do_test enc2-5.3 { 232 add_test_collate $DB 0 0 1 233 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 234 lappend res $::test_collate_enc 235 } {one two three four five UTF-16BE} 236 237 db close 238 file delete -force test.db 239 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 240 execsql {pragma encoding = 'UTF-16LE'} 241 do_test enc2-5.4 { 242 execsql { 243 CREATE TABLE t5(a); 244 INSERT INTO t5 VALUES('one'); 245 INSERT INTO t5 VALUES('two'); 246 INSERT INTO t5 VALUES('five'); 247 INSERT INTO t5 VALUES('three'); 248 INSERT INTO t5 VALUES('four'); 249 } 250 } {} 251 do_test enc2-5.5 { 252 add_test_collate $DB 1 1 1 253 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 254 lappend res $::test_collate_enc 255 } {one two three four five UTF-16LE} 256 do_test enc2-5.6 { 257 add_test_collate $DB 1 0 1 258 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 259 lappend res $::test_collate_enc 260 } {one two three four five UTF-16BE} 261 do_test enc2-5.7 { 262 add_test_collate $DB 1 0 0 263 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 264 lappend res $::test_collate_enc 265 } {one two three four five UTF-8} 266 267 db close 268 file delete -force test.db 269 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 270 execsql {pragma encoding = 'UTF-16BE'} 271 do_test enc2-5.8 { 272 execsql { 273 CREATE TABLE t5(a); 274 INSERT INTO t5 VALUES('one'); 275 INSERT INTO t5 VALUES('two'); 276 INSERT INTO t5 VALUES('five'); 277 INSERT INTO t5 VALUES('three'); 278 INSERT INTO t5 VALUES('four'); 279 } 280 } {} 281 do_test enc2-5.9 { 282 add_test_collate $DB 1 1 1 283 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 284 lappend res $::test_collate_enc 285 } {one two three four five UTF-16BE} 286 do_test enc2-5.10 { 287 add_test_collate $DB 1 1 0 288 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 289 lappend res $::test_collate_enc 290 } {one two three four five UTF-16LE} 291 do_test enc2-5.11 { 292 add_test_collate $DB 1 0 0 293 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 294 lappend res $::test_collate_enc 295 } {one two three four five UTF-8} 296 297 # Also test that a UTF-16 collation factory works. 298 do_test enc2-5-12 { 299 add_test_collate $DB 0 0 0 300 catchsql { 301 SELECT * FROM t5 ORDER BY 1 COLLATE test_collate 302 } 303 } {1 {no such collation sequence: test_collate}} 304 do_test enc2-5.13 { 305 add_test_collate_needed $DB 306 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }] 307 lappend res $::test_collate_enc 308 } {one two three four five UTF-16BE} 309 do_test enc2-5.14 { 310 set ::sqlite_last_needed_collation 311 } test_collate 312 313 db close 314 file delete -force test.db 315 316 do_test enc2-5.15 { 317 sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db] 318 add_test_collate_needed $::DB 319 set ::sqlite_last_needed_collation 320 } {} 321 do_test enc2-5.16 { 322 execsql {CREATE TABLE t1(a varchar collate test_collate);} 323 } {} 324 do_test enc2-5.17 { 325 set ::sqlite_last_needed_collation 326 } {test_collate} 327 328 # The following tests - enc2-6.* - test that SQLite selects the correct 329 # user function when more than one is available. 330 331 proc test_function {enc arg} { 332 return "$enc $arg" 333 } 334 335 db close 336 file delete -force test.db 337 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 338 execsql {pragma encoding = 'UTF-8'} 339 do_test enc2-6.0 { 340 execsql { 341 CREATE TABLE t5(a); 342 INSERT INTO t5 VALUES('one'); 343 } 344 } {} 345 do_test enc2-6.1 { 346 add_test_function $DB 1 1 1 347 execsql { 348 SELECT test_function('sqlite') 349 } 350 } {{UTF-8 sqlite}} 351 db close 352 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 353 do_test enc2-6.2 { 354 add_test_function $DB 0 1 0 355 execsql { 356 SELECT test_function('sqlite') 357 } 358 } {{UTF-16LE sqlite}} 359 db close 360 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 361 do_test enc2-6.3 { 362 add_test_function $DB 0 0 1 363 execsql { 364 SELECT test_function('sqlite') 365 } 366 } {{UTF-16BE sqlite}} 367 368 db close 369 file delete -force test.db 370 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 371 execsql {pragma encoding = 'UTF-16LE'} 372 do_test enc2-6.3 { 373 execsql { 374 CREATE TABLE t5(a); 375 INSERT INTO t5 VALUES('sqlite'); 376 } 377 } {} 378 do_test enc2-6.4 { 379 add_test_function $DB 1 1 1 380 execsql { 381 SELECT test_function('sqlite') 382 } 383 } {{UTF-16LE sqlite}} 384 db close 385 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 386 do_test enc2-6.5 { 387 add_test_function $DB 0 1 0 388 execsql { 389 SELECT test_function('sqlite') 390 } 391 } {{UTF-16LE sqlite}} 392 db close 393 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 394 do_test enc2-6.6 { 395 add_test_function $DB 0 0 1 396 execsql { 397 SELECT test_function('sqlite') 398 } 399 } {{UTF-16BE sqlite}} 400 401 db close 402 file delete -force test.db 403 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 404 execsql {pragma encoding = 'UTF-16BE'} 405 do_test enc2-6.7 { 406 execsql { 407 CREATE TABLE t5(a); 408 INSERT INTO t5 VALUES('sqlite'); 409 } 410 } {} 411 do_test enc2-6.8 { 412 add_test_function $DB 1 1 1 413 execsql { 414 SELECT test_function('sqlite') 415 } 416 } {{UTF-16BE sqlite}} 417 db close 418 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 419 do_test enc2-6.9 { 420 add_test_function $DB 0 1 0 421 execsql { 422 SELECT test_function('sqlite') 423 } 424 } {{UTF-16LE sqlite}} 425 db close 426 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 427 do_test enc2-6.10 { 428 add_test_function $DB 0 0 1 429 execsql { 430 SELECT test_function('sqlite') 431 } 432 } {{UTF-16BE sqlite}} 433 434 435 db close 436 file delete -force test.db 437 438 # The following tests - enc2-7.* - function as follows: 439 # 440 # 1: Open an empty database file assuming UTF-16 encoding. 441 # 2: Open the same database with a different handle assuming UTF-8. Create 442 # a table using this handle. 443 # 3: Read the sqlite_master table from the first handle. 444 # 4: Ensure the first handle recognises the database encoding is UTF-8. 445 # 446 do_test enc2-7.1 { 447 sqlite3 db test.db 448 execsql { 449 PRAGMA encoding = 'UTF-16'; 450 SELECT * FROM sqlite_master; 451 } 452 } {} 453 do_test enc2-7.2 { 454 set enc [execsql { 455 PRAGMA encoding; 456 }] 457 string range $enc 0 end-2 ;# Chop off the "le" or "be" 458 } {UTF-16} 459 do_test enc2-7.3 { 460 sqlite3 db2 test.db 461 execsql { 462 PRAGMA encoding = 'UTF-8'; 463 CREATE TABLE abc(a, b, c); 464 } db2 465 } {} 466 do_test enc2-7.4 { 467 execsql { 468 SELECT * FROM sqlite_master; 469 } 470 } "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}" 471 do_test enc2-7.5 { 472 execsql { 473 PRAGMA encoding; 474 } 475 } {UTF-8} 476 477 db close 478 db2 close 479 480 proc utf16 {utf8} { 481 set utf16 [encoding convertto unicode $utf8] 482 append utf16 "\x00\x00" 483 return $utf16 484 } 485 ifcapable {complete} { 486 do_test enc2-8.1 { 487 sqlite3_complete16 [utf16 "SELECT * FROM t1;"] 488 } {1} 489 do_test enc2-8.2 { 490 sqlite3_complete16 [utf16 "SELECT * FROM"] 491 } {0} 492 } 493 494 # Test that the encoding of an empty database may still be set after the 495 # (empty) schema has been initialized. 496 file delete -force test.db 497 do_test enc2-9.1 { 498 sqlite3 db test.db 499 execsql { 500 PRAGMA encoding = 'UTF-8'; 501 PRAGMA encoding; 502 } 503 } {UTF-8} 504 do_test enc2-9.2 { 505 sqlite3 db test.db 506 execsql { 507 PRAGMA encoding = 'UTF-16le'; 508 PRAGMA encoding; 509 } 510 } {UTF-16le} 511 do_test enc2-9.3 { 512 sqlite3 db test.db 513 execsql { 514 SELECT * FROM sqlite_master; 515 PRAGMA encoding = 'UTF-8'; 516 PRAGMA encoding; 517 } 518 } {UTF-8} 519 do_test enc2-9.4 { 520 sqlite3 db test.db 521 execsql { 522 PRAGMA encoding = 'UTF-16le'; 523 CREATE TABLE abc(a, b, c); 524 PRAGMA encoding; 525 } 526 } {UTF-16le} 527 do_test enc2-9.5 { 528 sqlite3 db test.db 529 execsql { 530 PRAGMA encoding = 'UTF-8'; 531 PRAGMA encoding; 532 } 533 } {UTF-16le} 534 535 # Ticket #1987. 536 # Disallow encoding changes once the encoding has been set. 537 # 538 do_test enc2-10.1 { 539 db close 540 file delete -force test.db test.db-journal 541 sqlite3 db test.db 542 db eval { 543 PRAGMA encoding=UTF16; 544 CREATE TABLE t1(a); 545 PRAGMA encoding=UTF8; 546 CREATE TABLE t2(b); 547 } 548 db close 549 sqlite3 db test.db 550 db eval { 551 SELECT name FROM sqlite_master 552 } 553 } {t1 t2} 554 555 finish_test 556