1 #!/bin/sh 2 # Copyright (C) 2016 and later: Unicode, Inc. and others. 3 # License & terms of use: http://www.unicode.org/copyright.html 4 # Copyright (C) 2001-2010, International Business Machines 5 # Corporation and others. All Rights Reserved. 6 # 7 # Authors: 8 # Ami Fixler 9 # Steven R. Loomis 10 # George Rhoten 11 # 12 # Shell script to unpax ICU and convert the files to an EBCDIC codepage. 13 # After extracting to EBCDIC, binary files are re-extracted without the 14 # EBCDIC conversion, thus restoring them to original codepage. 15 # 16 # Set the following variable to the list of binary file suffixes (extensions) 17 18 #ICU specific binary files 19 #**************************************************************************** 20 binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML nrm NRM utf16be UTF16BE' 21 data_files='icu/source/data/brkitr/* icu/source/data/locales/* icu/source/data/coll/* icu/source/data/rbnf/* icu/source/data/mappings/* icu/source/data/misc/* icu/source/data/translit/* icu/source/data/unidata/* icu/source/test/testdata/*' 22 23 #**************************************************************************** 24 # Function: usage 25 # Description: Prints out text that describes how to call this script 26 # Input: None 27 # Output: None 28 #**************************************************************************** 29 usage() 30 { 31 echo "Enter archive filename as a parameter: $0 icu-archive.tar" 32 } 33 34 #**************************************************************************** 35 # first make sure we at least one arg and it's a file we can read 36 #**************************************************************************** 37 38 # check for no arguments 39 if [ $# -eq 0 ]; then 40 usage 41 exit 42 fi 43 tar_file=$1 44 if [ ! -r $tar_file ]; then 45 echo "$tar_file does not exist or cannot be read." 46 usage 47 exit 48 fi 49 50 echo "" 51 echo "Extracting from $tar_file ..." 52 echo "" 53 # extract files while converting them to EBCDIC 54 pax -rvf $tar_file -o to=IBM-1047,from=ISO8859-1 -o setfiletag 55 56 #**************************************************************************** 57 # For files we have restored as CCSID 37, check the BOM to see if they 58 # should be processed as 819. Also handle files with special paths. Files 59 # that match will be added to binary files lists. The lists will in turn 60 # be processed to restore files as 819. 61 #**************************************************************************** 62 echo "" 63 echo "Determining binary files by BOM ..." 64 echo "" 65 66 # When building in ASCII mode, text files are converted as ASCII 67 if [ "${ICU_ENABLE_ASCII_STRINGS}" -eq 1 ]; then 68 binary_suffixes="$binary_suffixes txt TXT ucm UCM" 69 elif [ -f icu/as_is/bomlist.txt ]; 70 then 71 echo 'Using icu/as_is/bomlist.txt' 72 binary_files=$(cat icu/as_is/bomlist.txt) 73 else 74 echo "Analyzing files .." 75 for file in `find ./icu \( -name \*.txt -print \) | sed -e 's/^\.\///'`; do 76 bom8=`head -c 3 $file|\ 77 od -t x1|\ 78 head -n 1|\ 79 sed 's/ */ /g'|\ 80 cut -f2-4 -d ' '|\ 81 tr 'A-Z' 'a-z'`; 82 #Find a converted UTF-8 BOM 83 if [ "$bom8" = "57 8b ab" ] 84 then 85 binary_files="$binary_files $file"; 86 fi 87 done 88 fi 89 90 echo "Looking for binary suffixes.." 91 92 for i in $(pax -f $tar_file 2>/dev/null) 93 do 94 case $i in 95 */) ;; # then this entry is a directory 96 *.*) # then this entry has a dot in the filename 97 for j in $binary_suffixes 98 do 99 # We substitute the suffix more than once 100 # to handle files like NormalizationTest-3.2.0.txt 101 suf=${i#*.*} 102 suf=${suf#*.*} 103 suf=${suf#*.*} 104 if [ "$suf" = "$j" ] 105 then 106 binary_files="$binary_files $i" 107 break 108 fi 109 done 110 ;; 111 *) ;; # then this entry does not have a dot in it 112 esac 113 done 114 115 # now see if a re-extract of binary files is necessary 116 if [ ${#binary_files} -eq 0 ]; then 117 echo "" 118 echo "There are no binary files to restore." 119 else 120 echo "Restoring binary files ..." 121 echo "" 122 rm $binary_files 123 pax -rvf $tar_file $binary_files 124 # Tag the files as binary for proper interaction with the _BPXK_AUTOCVT 125 # environment setting 126 chtag -b $binary_files 127 fi 128 echo "" 129 echo "$0 has completed extracting ICU from $tar_file." 130