ct-bugcheck: Add tools to poll for and report ath10k firmware crashes.
authorBen Greear <greearb@candelatech.com>
Wed, 24 Aug 2016 21:20:58 +0000 (14:20 -0700)
committerFelix Fietkau <nbd@nbd.name>
Sun, 28 Aug 2016 11:51:41 +0000 (13:51 +0200)
This tool can periodically check for ath10k firmware crashes.
If it finds a crash, it will package up the binary crash dump,
some OS level things like dmesg, lspci, etc into a tar file.

It then notifies the user about the crash and asks them to report
the bug to the appropriate email address.

This is most useful when used with ath10k-ct driver and
CT ath10k firmware, but it should also report issues with stock
ath10k driver and firmware in case one has appropriate contacts
to debug them.

This tool could be extended later for other modules/bugs/etc.

Signed-off-by: Ben Greear <greearb@candelatech.com>
package/utils/ct-bugcheck/Makefile [new file with mode: 0644]
package/utils/ct-bugcheck/src/bugcheck.initd [new file with mode: 0644]
package/utils/ct-bugcheck/src/bugcheck.sh [new file with mode: 0755]
package/utils/ct-bugcheck/src/bugchecker.sh [new file with mode: 0755]

diff --git a/package/utils/ct-bugcheck/Makefile b/package/utils/ct-bugcheck/Makefile
new file mode 100644 (file)
index 0000000..5deb1e0
--- /dev/null
@@ -0,0 +1,52 @@
+#
+# Copyright (C) 2016 OpenWrt.org
+#
+# This is free software, licensed under the GNU General Public License v2.
+# See /LICENSE for more information.
+#
+
+include $(TOPDIR)/rules.mk
+include $(INCLUDE_DIR)/kernel.mk
+
+PKG_NAME:=ct-bugcheck
+PKG_RELEASE:=2016-07-21
+
+include $(INCLUDE_DIR)/package.mk
+
+define Package/ct-bugcheck
+  SECTION:=utils
+  CATEGORY:=Utilities
+  TITLE:=Bug checking and reporting utility
+  VERSION:=$(PKG_RELEASE)
+  MAINTAINER:=Ben Greear <greearb@candelatech.com>
+endef
+
+define Package/ct-bugcheck/description
+  Scripts to check for bugs (like firmware crashes) and package them for reporting.
+  Currently this script only checks for ath10k firmware crashes.
+  Once installed, you can enable this tool by creating a file called
+  /etc/config/bugcheck with the following contents:
+ DO_BUGCHECK=1
+ export DO_BUGCHECK
+
+endef
+
+define Build/Prepare
+       $(CP) src/bugcheck.sh $(PKG_BUILD_DIR)/
+       $(CP) src/bugchecker.sh $(PKG_BUILD_DIR)/
+       $(CP) src/bugcheck.initd $(PKG_BUILD_DIR)/
+endef
+
+define Build/Compile
+       true
+endef
+
+define Package/ct-bugcheck/install
+       $(INSTALL_DIR) $(1)/usr/bin
+       $(INSTALL_DIR) $(1)/etc/init.d
+       $(INSTALL_BIN) $(PKG_BUILD_DIR)/bugcheck.sh $(1)/usr/bin/
+       $(INSTALL_BIN) $(PKG_BUILD_DIR)/bugchecker.sh $(1)/usr/bin/
+       $(INSTALL_BIN) $(PKG_BUILD_DIR)/bugcheck.initd $(1)/etc/init.d/bugcheck
+endef
+
+$(eval $(call BuildPackage,ct-bugcheck))
diff --git a/package/utils/ct-bugcheck/src/bugcheck.initd b/package/utils/ct-bugcheck/src/bugcheck.initd
new file mode 100644 (file)
index 0000000..b97a415
--- /dev/null
@@ -0,0 +1,16 @@
+#!/bin/sh /etc/rc.common
+# Copyright (C) 2016 OpenWrt.org
+
+START=99
+
+USE_PROCD=1
+PROG=/usr/bin/bugchecker.sh
+
+# To actually make bugchecker.sh run, see comments
+# at top of its file.
+
+start_service () {
+        procd_open_instance
+        procd_set_param command "$PROG"
+        procd_close_instance
+}
diff --git a/package/utils/ct-bugcheck/src/bugcheck.sh b/package/utils/ct-bugcheck/src/bugcheck.sh
new file mode 100755 (executable)
index 0000000..85f70c5
--- /dev/null
@@ -0,0 +1,115 @@
+#!/bin/sh
+
+# Check for ath10k (and maybe other) bugs, package them up,
+# and let user know what to do with them.
+
+TMPLOC=/tmp
+CRASHDIR=$TMPLOC/bugcheck
+FOUND_BUG=0
+
+# set -x
+
+bugcheck_generic()
+{
+    echo "LEDE crashlog report" > $CRASHDIR/info.txt
+    date >> $CRASHDIR/info.txt
+    echo >> $CRASHDIR/info.txt
+    echo "uname" >> $CRASHDIR/info.txt
+    uname -a >> $CRASHDIR/info.txt
+    echo >> $CRASHDIR/info.txt
+    echo "os-release" >> $CRASHDIR/info.txt
+    cat /etc/os-release >> $CRASHDIR/info.txt
+    echo >> $CRASHDIR/info.txt
+    echo "os-release" >> $CRASHDIR/info.txt
+    cat /etc/os-release >> $CRASHDIR/info.txt
+    echo >> $CRASHDIR/info.txt
+    echo "dmesg output" >> $CRASHDIR/info.txt
+    dmesg >> $CRASHDIR/info.txt
+    if [ -x /usr/bin/lspci ]
+       then
+       echo >> $CRASHDIR/info.txt
+       echo "lspci" >> $CRASHDIR/info.txt
+       lspci >> $CRASHDIR/info.txt
+    fi
+    echo >> $CRASHDIR/info.txt
+    echo "cpuinfo" >> $CRASHDIR/info.txt
+    cat /proc/cpuinfo >> $CRASHDIR/info.txt
+    echo >> $CRASHDIR/info.txt
+    echo "meminfo" >> $CRASHDIR/info.txt
+    cat /proc/cpuinfo >> $CRASHDIR/info.txt
+    echo >> $CRASHDIR/info.txt
+    echo "cmdline" >> $CRASHDIR/info.txt
+    cat /proc/cmdline >> $CRASHDIR/info.txt
+    echo >> $CRASHDIR/info.txt
+    echo "lsmod" >> $CRASHDIR/info.txt
+    lsmod >> $CRASHDIR/info.txt
+}
+
+roll_crashes()
+{
+    # Roll any existing crashes
+    if [ -d $CRASHDIR ]
+       then
+       if [ -d $CRASHDIR.1 ]
+           then
+           rm -fr $CRASHDIR.1
+       fi
+       mv $CRASHDIR $CRASHDIR.1
+    fi
+
+    # Prepare location
+    mkdir -p $CRASHDIR
+}
+
+# ath10k, check debugfs entries.
+for i in /sys/kernel/debug/ieee80211/*/ath10k/fw_crash_dump
+do
+  #echo "Checking $i"
+  if cat $i > $TMPLOC/ath10k_crash.bin 2>&1
+      then
+      FOUND_BUG=1
+
+      #echo "Found ath10k crash data in $i"
+      roll_crashes
+
+      ADIR=${i/fw_crash_dump/}
+
+      CTFW=0
+      if grep -- -ct- $TMPLOC/ath10k_crash.bin > /dev/null 2>&1
+         then
+         CTFW=1
+      fi
+
+      echo "Send bug reports to:" > $CRASHDIR/report_to.txt
+      if [ -f $ADIR/ct_special -o $CTFW == "1" ]
+         then
+         # Looks like this is CT firmware or driver...
+         echo "greearb@candelatech.com" >> $CRASHDIR/report_to.txt
+         echo "and/or report or check for duplicates here:" >> $CRASHDIR/report_to.txt
+         echo "https://github.com/greearb/ath10k-ct/issues" >> $CRASHDIR/report_to.txt
+      else
+         # Not sure who would want these bug reports for upstream...
+         echo "https://www.lede-project.org/" >> $CRASHDIR/report_to.txt
+      fi
+      echo >> $CRASHDIR/report_to.txt
+      echo "Please attach all files in this directory to bug reports." >> $CRASHDIR/report_to.txt
+
+      mv $TMPLOC/ath10k_crash.bin $CRASHDIR
+
+      # Add any more ath10k specific stuff here.
+
+      # And call generic bug reporting logic
+      bugcheck_generic
+  fi
+done
+
+if [ $FOUND_BUG == "1" ]
+    then
+    # Notify LUCI somehow?
+    echo "bugcheck.sh found an issue to be reported" > /dev/kmsg
+    echo "See $CRASHDIR for details on how to report this" > /dev/kmsg
+    # Let calling code know something was wrong.
+    exit 1
+fi
+
+exit 0
diff --git a/package/utils/ct-bugcheck/src/bugchecker.sh b/package/utils/ct-bugcheck/src/bugchecker.sh
new file mode 100755 (executable)
index 0000000..be305af
--- /dev/null
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+# Periodically call bugcheck.sh script
+
+CHECKER=bugcheck.sh
+SLEEPFOR=60
+
+DO_BUGCHECK=0
+
+# So, to enable this, you create an /etc/config/bugcheck file
+# with contents like:
+#  DO_BUGCHECK=1
+#  export DO_BUGCHECK
+
+if [ -f /etc/config/bugcheck ]
+    then
+    . /etc/config/bugcheck
+fi
+
+if [ $DO_BUGCHECK == 0 ]
+then
+    exit 0
+fi
+
+while true
+  do
+  $CHECKER
+  sleep $SLEEPFOR
+done