Home | History | Annotate | Download | only in os390
      1 #!/bin/sh
      2 # Copyright (C) 2016 and later: Unicode, Inc. and others.
      3 # License & terms of use: http://www.unicode.org/copyright.html
      4 # Copyright (C) 2001-2010, International Business Machines
      5 #   Corporation and others.  All Rights Reserved.
      6 #
      7 # Authors:
      8 # Ami Fixler
      9 # Steven R. Loomis
     10 # George Rhoten
     11 #
     12 # Shell script to unpax ICU and convert the files to an EBCDIC codepage.
     13 # After extracting to EBCDIC, binary files are re-extracted without the
     14 # EBCDIC conversion, thus restoring them to original codepage.
     15 #
     16 # Set the following variable to the list of binary file suffixes (extensions)
     17 
     18 #ICU specific binary files
     19 #****************************************************************************
     20 binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML nrm NRM utf16be UTF16BE'
     21 data_files='icu/source/data/brkitr/* icu/source/data/locales/* icu/source/data/coll/* icu/source/data/rbnf/* icu/source/data/mappings/* icu/source/data/misc/* icu/source/data/translit/* icu/source/data/unidata/* icu/source/test/testdata/*'
     22 
     23 #****************************************************************************
     24 # Function:     usage
     25 # Description:  Prints out text that describes how to call this script
     26 # Input:        None
     27 # Output:       None
     28 #****************************************************************************
     29 usage()
     30 {
     31     echo "Enter archive filename as a parameter: $0 icu-archive.tar"
     32 }
     33 
     34 #****************************************************************************
     35 # first make sure we at least one arg and it's a file we can read
     36 #****************************************************************************
     37 
     38 # check for no arguments
     39 if [ $# -eq 0 ]; then
     40     usage
     41     exit
     42 fi
     43 tar_file=$1
     44 if [ ! -r $tar_file ]; then
     45     echo "$tar_file does not exist or cannot be read."
     46     usage
     47     exit
     48 fi
     49 
     50 echo ""
     51 echo "Extracting from $tar_file ..."
     52 echo ""
     53 # extract files while converting them to EBCDIC
     54 pax -rvf $tar_file -o to=IBM-1047,from=ISO8859-1 -o setfiletag
     55 
     56 #****************************************************************************
     57 # For files we have restored as CCSID 37, check the BOM to see if they    
     58 # should be processed as 819.  Also handle files with special paths. Files
     59 # that match will be added to binary files lists.  The lists will in turn
     60 # be processed to restore files as 819.
     61 #****************************************************************************
     62 echo ""
     63 echo "Determining binary files by BOM ..."
     64 echo ""
     65 
     66 # When building in ASCII mode, text files are converted as ASCII
     67 if [ "${ICU_ENABLE_ASCII_STRINGS}" -eq 1 ]; then
     68     binary_suffixes="$binary_suffixes txt TXT ucm UCM"
     69 elif [ -f icu/as_is/bomlist.txt ];
     70 then
     71     echo 'Using icu/as_is/bomlist.txt'
     72     binary_files=$(cat icu/as_is/bomlist.txt)
     73 else
     74     echo "Analyzing files .."
     75 	for file in `find ./icu \( -name \*.txt -print \) | sed -e 's/^\.\///'`; do
     76 		bom8=`head -c 3 $file|\
     77 			od -t x1|\
     78 			head -n 1|\
     79 			sed 's/  */ /g'|\
     80 			cut -f2-4 -d ' '|\
     81 			tr 'A-Z' 'a-z'`;
     82 		#Find a converted UTF-8 BOM
     83 		if [ "$bom8" = "57 8b ab" ]
     84 		then
     85 			binary_files="$binary_files $file";
     86 		fi
     87 	done
     88 fi
     89 
     90 echo "Looking for binary suffixes.."
     91 
     92 for i in $(pax -f $tar_file 2>/dev/null)
     93 do
     94 	case $i in
     95 	*/) ;;		# then this entry is a directory
     96 	*.*)		# then this entry has a dot in the filename
     97 		for j in $binary_suffixes
     98 		do
     99 			# We substitute the suffix more than once
    100 			# to handle files like NormalizationTest-3.2.0.txt
    101 			suf=${i#*.*}
    102 			suf=${suf#*.*}
    103 			suf=${suf#*.*}
    104 			if [ "$suf" = "$j" ]
    105 			then
    106 				binary_files="$binary_files $i"
    107 				break
    108 			fi
    109 		done
    110 		;;
    111 	*) ;;		# then this entry does not have a dot in it
    112     esac
    113 done
    114 
    115 # now see if a re-extract of binary files is necessary
    116 if [ ${#binary_files} -eq 0 ]; then
    117     echo ""
    118     echo "There are no binary files to restore."
    119 else
    120     echo "Restoring binary files ..."
    121     echo ""
    122     rm $binary_files
    123     pax -rvf $tar_file $binary_files
    124     # Tag the files as binary for proper interaction with the _BPXK_AUTOCVT
    125     # environment setting
    126     chtag -b $binary_files
    127 fi
    128 echo ""
    129 echo "$0 has completed extracting ICU from $tar_file."
    130