Home | History | Annotate | Download | only in scripts
      1 #!/bin/sh
      2 #
      3 # Copyright 2015, Daniel Axtens, IBM Corporation
      4 #
      5 # This program is free software; you can redistribute it and/or modify
      6 # it under the terms of the GNU General Public License as published by
      7 #  the Free Software Foundation; version 2 of the License.
      8 #
      9 # This program is distributed in the hope that it will be useful,
     10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
     11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     12 # GNU General Public License for more details.
     13 
     14 
     15 # do we have ./getscom, ./putscom?
     16 if [ -x ./getscom ] && [ -x ./putscom ]; then
     17 	GETSCOM=./getscom
     18 	PUTSCOM=./putscom
     19 elif which getscom > /dev/null; then
     20 	GETSCOM=$(which getscom)
     21 	PUTSCOM=$(which putscom)
     22 else
     23 	cat <<EOF
     24 Can't find getscom/putscom in . or \$PATH.
     25 See https://github.com/open-power/skiboot.
     26 The tool is in external/xscom-utils
     27 EOF
     28 	exit 1
     29 fi
     30 
     31 # We will get 8 HMI events per injection
     32 # todo: deal with things being offline
     33 expected_hmis=8
     34 COUNT_HMIS() {
     35     dmesg | grep -c 'Harmless Hypervisor Maintenance interrupt'
     36 }
     37 
     38 # massively expand snooze delay, allowing injection on all cores
     39 ppc64_cpu --smt-snooze-delay=1000000000
     40 
     41 # when we exit, restore it
     42 trap "ppc64_cpu --smt-snooze-delay=100" 0 1
     43 
     44 # for each chip+core combination
     45 # todo - less fragile parsing
     46 egrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog |
     47 while read chipcore; do
     48 	chip=$(echo "$chipcore"|awk '{print $3}')
     49 	core=$(echo "$chipcore"|awk '{print $5}')
     50 	fir="0x1${core}013100"
     51 
     52 	# verify that Core FIR is zero as expected
     53 	if [ "$($GETSCOM -c 0x${chip} $fir)" != 0 ]; then
     54 		echo "FIR was not zero before injection for chip $chip, core $core. Aborting!"
     55 		echo "Result of $GETSCOM -c 0x${chip} $fir:"
     56 		$GETSCOM -c 0x${chip} $fir
     57 		echo "If you get a -5 error, the core may be in idle state. Try stress-ng."
     58 		echo "Otherwise, try $PUTSCOM -c 0x${chip} $fir 0"
     59 		exit 1
     60 	fi
     61 
     62 	# keep track of the number of HMIs handled
     63 	old_hmis=$(COUNT_HMIS)
     64 
     65 	# do injection, adding a marker to dmesg for clarity
     66 	echo "Injecting HMI on core $core, chip $chip" | tee /dev/kmsg
     67 	# inject a RegFile recoverable error
     68 	if ! $PUTSCOM -c 0x${chip} $fir 2000000000000000 > /dev/null; then
     69 		echo "Error injecting. Aborting!"
     70 		exit 1
     71 	fi
     72 
     73 	# now we want to wait for all the HMIs to be processed
     74 	# we expect one per thread on the core
     75 	i=0;
     76 	new_hmis=$(COUNT_HMIS)
     77 	while [ $new_hmis -lt $((old_hmis + expected_hmis)) ] && [ $i -lt 12 ]; do
     78 	    echo "Seen $((new_hmis - old_hmis)) HMI(s) out of $expected_hmis expected, sleeping"
     79 	    sleep 5;
     80 	    i=$((i + 1))
     81 	    new_hmis=$(COUNT_HMIS)
     82 	done
     83 	if [ $i = 12 ]; then
     84 	    echo "Haven't seen expected $expected_hmis recoveries after 1 min. Aborting."
     85 	    exit 1
     86 	fi
     87 	echo "Processed $expected_hmis events; presumed success. Check dmesg."
     88 	echo ""
     89 done
     90