Back to home page

OSCL-LXR

 
 

    


0001 #!/bin/sh
0002 # SPDX-License-Identifier: GPL-2.0-only
0003 #
0004 # Copyright 2015, Daniel Axtens, IBM Corporation
0005 #
0006 
0007 
0008 # do we have ./getscom, ./putscom?
0009 if [ -x ./getscom ] && [ -x ./putscom ]; then
0010         GETSCOM=./getscom
0011         PUTSCOM=./putscom
0012 elif which getscom > /dev/null; then
0013         GETSCOM=$(which getscom)
0014         PUTSCOM=$(which putscom)
0015 else
0016         cat <<EOF
0017 Can't find getscom/putscom in . or \$PATH.
0018 See https://github.com/open-power/skiboot.
0019 The tool is in external/xscom-utils
0020 EOF
0021         exit 1
0022 fi
0023 
0024 # We will get 8 HMI events per injection
0025 # todo: deal with things being offline
0026 expected_hmis=8
0027 COUNT_HMIS() {
0028     dmesg | grep -c 'Harmless Hypervisor Maintenance interrupt'
0029 }
0030 
0031 # massively expand snooze delay, allowing injection on all cores
0032 ppc64_cpu --smt-snooze-delay=1000000000
0033 
0034 # when we exit, restore it
0035 trap "ppc64_cpu --smt-snooze-delay=100" 0 1
0036 
0037 # for each chip+core combination
0038 # todo - less fragile parsing
0039 egrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog |
0040 while read chipcore; do
0041         chip=$(echo "$chipcore"|awk '{print $3}')
0042         core=$(echo "$chipcore"|awk '{print $5}')
0043         fir="0x1${core}013100"
0044 
0045         # verify that Core FIR is zero as expected
0046         if [ "$($GETSCOM -c 0x${chip} $fir)" != 0 ]; then
0047                 echo "FIR was not zero before injection for chip $chip, core $core. Aborting!"
0048                 echo "Result of $GETSCOM -c 0x${chip} $fir:"
0049                 $GETSCOM -c 0x${chip} $fir
0050                 echo "If you get a -5 error, the core may be in idle state. Try stress-ng."
0051                 echo "Otherwise, try $PUTSCOM -c 0x${chip} $fir 0"
0052                 exit 1
0053         fi
0054 
0055         # keep track of the number of HMIs handled
0056         old_hmis=$(COUNT_HMIS)
0057 
0058         # do injection, adding a marker to dmesg for clarity
0059         echo "Injecting HMI on core $core, chip $chip" | tee /dev/kmsg
0060         # inject a RegFile recoverable error
0061         if ! $PUTSCOM -c 0x${chip} $fir 2000000000000000 > /dev/null; then
0062                 echo "Error injecting. Aborting!"
0063                 exit 1
0064         fi
0065 
0066         # now we want to wait for all the HMIs to be processed
0067         # we expect one per thread on the core
0068         i=0;
0069         new_hmis=$(COUNT_HMIS)
0070         while [ $new_hmis -lt $((old_hmis + expected_hmis)) ] && [ $i -lt 12 ]; do
0071             echo "Seen $((new_hmis - old_hmis)) HMI(s) out of $expected_hmis expected, sleeping"
0072             sleep 5;
0073             i=$((i + 1))
0074             new_hmis=$(COUNT_HMIS)
0075         done
0076         if [ $i = 12 ]; then
0077             echo "Haven't seen expected $expected_hmis recoveries after 1 min. Aborting."
0078             exit 1
0079         fi
0080         echo "Processed $expected_hmis events; presumed success. Check dmesg."
0081         echo ""
0082 done