1 #!/bin/sh 2 # 3 # Copyright 2015, Daniel Axtens, IBM Corporation 4 # 5 # This program is free software; you can redistribute it and/or modify 6 # it under the terms of the GNU General Public License as published by 7 # the Free Software Foundation; version 2 of the License. 8 # 9 # This program is distributed in the hope that it will be useful, 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 # GNU General Public License for more details. 13 14 15 # do we have ./getscom, ./putscom? 16 if [ -x ./getscom ] && [ -x ./putscom ]; then 17 GETSCOM=./getscom 18 PUTSCOM=./putscom 19 elif which getscom > /dev/null; then 20 GETSCOM=$(which getscom) 21 PUTSCOM=$(which putscom) 22 else 23 cat <<EOF 24 Can't find getscom/putscom in . or \$PATH. 25 See https://github.com/open-power/skiboot. 26 The tool is in external/xscom-utils 27 EOF 28 exit 1 29 fi 30 31 # We will get 8 HMI events per injection 32 # todo: deal with things being offline 33 expected_hmis=8 34 COUNT_HMIS() { 35 dmesg | grep -c 'Harmless Hypervisor Maintenance interrupt' 36 } 37 38 # massively expand snooze delay, allowing injection on all cores 39 ppc64_cpu --smt-snooze-delay=1000000000 40 41 # when we exit, restore it 42 trap "ppc64_cpu --smt-snooze-delay=100" 0 1 43 44 # for each chip+core combination 45 # todo - less fragile parsing 46 egrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog | 47 while read chipcore; do 48 chip=$(echo "$chipcore"|awk '{print $3}') 49 core=$(echo "$chipcore"|awk '{print $5}') 50 fir="0x1${core}013100" 51 52 # verify that Core FIR is zero as expected 53 if [ "$($GETSCOM -c 0x${chip} $fir)" != 0 ]; then 54 echo "FIR was not zero before injection for chip $chip, core $core. Aborting!" 55 echo "Result of $GETSCOM -c 0x${chip} $fir:" 56 $GETSCOM -c 0x${chip} $fir 57 echo "If you get a -5 error, the core may be in idle state. Try stress-ng." 58 echo "Otherwise, try $PUTSCOM -c 0x${chip} $fir 0" 59 exit 1 60 fi 61 62 # keep track of the number of HMIs handled 63 old_hmis=$(COUNT_HMIS) 64 65 # do injection, adding a marker to dmesg for clarity 66 echo "Injecting HMI on core $core, chip $chip" | tee /dev/kmsg 67 # inject a RegFile recoverable error 68 if ! $PUTSCOM -c 0x${chip} $fir 2000000000000000 > /dev/null; then 69 echo "Error injecting. Aborting!" 70 exit 1 71 fi 72 73 # now we want to wait for all the HMIs to be processed 74 # we expect one per thread on the core 75 i=0; 76 new_hmis=$(COUNT_HMIS) 77 while [ $new_hmis -lt $((old_hmis + expected_hmis)) ] && [ $i -lt 12 ]; do 78 echo "Seen $((new_hmis - old_hmis)) HMI(s) out of $expected_hmis expected, sleeping" 79 sleep 5; 80 i=$((i + 1)) 81 new_hmis=$(COUNT_HMIS) 82 done 83 if [ $i = 12 ]; then 84 echo "Haven't seen expected $expected_hmis recoveries after 1 min. Aborting." 85 exit 1 86 fi 87 echo "Processed $expected_hmis events; presumed success. Check dmesg." 88 echo "" 89 done 90