1 #!/usr/bin/qsh 2 # Copyright (C) 2016 and later: Unicode, Inc. and others. 3 # License & terms of use: http://www.unicode.org/copyright.html 4 # Copyright (C) 2000-2011, International Business Machines 5 # Corporation and others. All Rights Reserved. 6 # 7 # Authors: 8 # Ami Fixler 9 # Barry Novinger 10 # Steven R. Loomis 11 # George Rhoten 12 # Jason Spieth 13 # 14 # Shell script to unpax ICU and convert the files to an EBCDIC codepage. 15 # After extracting to EBCDIC, binary files are re-extracted without the 16 # EBCDIC conversion, thus restoring them to original codepage. 17 18 if [ -z "$QSH_VERSION" ]; 19 then 20 QSH=0 21 echo "QSH not detected (QSH_VERSION not set) - just testing." 22 else 23 QSH=1 24 #echo "QSH version $QSH_VERSION" 25 fi 26 export QSH 27 28 # set this to "v" to list files as they are unpacked (default) 29 VERBOSE_UNPACK="v" 30 31 # Set the following variable to the list of binary file suffixes (extensions) 32 33 34 #**************************************************************************** 35 #binary_suffixes='ico ICO bmp BMP jpg JPG gif GIF brk BRK' 36 #ICU specific binary files 37 #**************************************************************************** 38 binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML nrm NRM utf16be UTF16BE' 39 data_files='icu/source/data/brkitr/* icu/source/data/locales/* icu/source/data/coll/* icu/source/data/rbnf/* icu/source/data/mappings/* icu/source/data/misc/* icu/source/data/translit/* icu/source/data/unidata/* icu/source/test/testdata/*' 40 41 #**************************************************************************** 42 # Function: usage 43 # Description: Prints out text that describes how to call this script 44 # Input: None 45 # Output: None 46 #**************************************************************************** 47 usage() 48 { 49 echo "Enter archive filename as a parameter: $0 icu-archive.tar" 50 } 51 52 #**************************************************************************** 53 # first make sure we at least one arg and it's a file we can read 54 #**************************************************************************** 55 56 # check for no arguments 57 if [ $# -eq 0 ]; then 58 usage 59 exit 60 fi 61 62 # tar file is argument 1 63 tar_file=$1 64 65 # check that the file is valid 66 if [ ! -r $tar_file ]; then 67 echo "$tar_file does not exist or cannot be read." 68 usage 69 exit 70 fi 71 72 # treat all data files as ebcdic 73 ebcdic_data=$data_files 74 75 #**************************************************************************** 76 # Extract files. We do this in two passes. One pass for 819 files and a 77 # second pass for 37 files 78 #**************************************************************************** 79 echo "" 80 echo "Extracting from $tar_file ..." 81 echo "" 82 83 # extract everything as iso-8859-1 except these directories 84 pax -C 819 -rc${VERBOSE_UNPACK}f $tar_file $ebcdic_data 85 86 # extract files while converting them to EBCDIC 87 echo "" 88 echo "Extracting files which must be in ibm-37 ..." 89 echo "" 90 pax -C 37 -r${VERBOSE_UNPACK}f $tar_file $ebcdic_data 91 92 #**************************************************************************** 93 # For files we have restored as CCSID 37, check the BOM to see if they 94 # should be processed as 819. Also handle files with special paths. Files 95 # that match will be added to binary files lists. The lists will in turn 96 # be processed to restore files as 819. 97 #**************************************************************************** 98 echo "" 99 echo "Determining binary files by BOM ..." 100 echo "" 101 bin_count=0 102 # Process BOMs 103 if [ -f icu/as_is/bomlist.txt ]; 104 then 105 echo "Using icu/as_is/bomlist.txt" 106 pax -C 819 -rvf $tar_file `cat icu/as_is/bomlist.txt` 107 else 108 for file in `find ./icu \( -name \*.txt -print \)`; do 109 bom8=`head -n 1 $file|\ 110 od -t x1|\ 111 head -n 1|\ 112 sed 's/ */ /g'|\ 113 cut -f2-4 -d ' '|\ 114 tr 'A-Z' 'a-z'`; 115 #Find a converted UTF-8 BOM 116 if [ "$bom8" = "057 08b 0ab" -o "$bom8" = "57 8b ab" ] 117 then 118 file="`echo $file | cut -d / -f2-`" 119 120 if [ `echo $binary_files | wc -w` -lt 200 ] 121 then 122 bin_count=`expr $bin_count + 1` 123 binary_files="$binary_files $file"; 124 else 125 echo "Restoring binary files by BOM ($bin_count)..." 126 rm $binary_files; 127 pax -C 819 -rvf $tar_file $binary_files; 128 echo "Determining binary files by BOM ($bin_count)..." 129 binary_files="$file"; 130 bin_count=`expr $bin_count + 1` 131 fi 132 fi 133 done 134 # now see if a re-extract of binary files is necessary 135 if [ `echo $binary_files | wc -w` -gt 0 ] 136 then 137 echo "Restoring binary files ($bin_count) ..." 138 rm $binary_files 139 pax -C 819 -rvf $tar_file $binary_files 140 fi 141 fi 142 143 echo "# Processing special paths." 144 # Process special paths 145 more_bin_opts=$(echo $binary_suffixes | sed -e 's%[a-zA-Z0-9]*%-o -name \*.&%g') 146 # echo "Looking for additional files: find ... $more_bin_opts" 147 more_bin_files=$(find icu -type f \( -name '*.zzz' $more_bin_opts \) -print) 148 echo "Restoring binary files by special paths ($bin_count) ..." 149 rm $more_bin_files 150 pax -C 819 -rvf $tar_file $more_bin_files 151 152 #**************************************************************************** 153 # Generate and run the configure script 154 #**************************************************************************** 155 156 echo "" 157 echo "Generating qsh compatible configure ..." 158 echo "" 159 160 sed -f icu/as_is/os400/convertConfigure.sed icu/source/configure > icu/source/configureTemp 161 del -f icu/source/configure 162 mv icu/source/configureTemp icu/source/configure 163 chmod 755 icu/source/configure 164 165 echo "" 166 echo "$0 has completed extracting ICU from $tar_file - $bin_count binary files extracted." 167 168