PCI/AER: Add sysfs attributes for rootport cumulative stats
authorRajat Jain <rajatja@google.com>
Thu, 21 Jun 2018 23:48:29 +0000 (16:48 -0700)
committerBjorn Helgaas <bhelgaas@google.com>
Thu, 19 Jul 2018 21:19:52 +0000 (16:19 -0500)
Add sysfs attributes for rootport statistics (that are cumulative of all
the ERR_* messages seen on this PCI hierarchy).

Signed-off-by: Rajat Jain <rajatja@google.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
drivers/pci/pcie/aer.c

index 3a784297cfed9de6f726cc2b82d82c70427743dc..4b0318c99507f437455d047163618bd6e0a0f9d6 100644 (file)
@@ -92,3 +92,31 @@ AtomicOp Egress Blocked 0
 TLP Prefix Blocked Error 0
 TOTAL_ERR_NONFATAL 0
 -------------------------------------------------------------------------
+
+============================
+PCIe Rootport AER statistics
+============================
+These attributes show up under only the rootports (or root complex event
+collectors) that are AER capable. These indicate the number of error messages as
+"reported to" the rootport. Please note that the rootports also transmit
+(internally) the ERR_* messages for errors seen by the internal rootport PCI
+device, so these counters include them and are thus cumulative of all the error
+messages on the PCI hierarchy originating at that root port.
+
+Where:         /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_cor
+Date:          July 2018
+Kernel Version: 4.19.0
+Contact:       linux-pci@vger.kernel.org, rajatja@google.com
+Description:   Total number of ERR_COR messages reported to rootport.
+
+Where:     /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_fatal
+Date:          July 2018
+Kernel Version: 4.19.0
+Contact:       linux-pci@vger.kernel.org, rajatja@google.com
+Description:   Total number of ERR_FATAL messages reported to rootport.
+
+Where:     /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_nonfatal
+Date:          July 2018
+Kernel Version: 4.19.0
+Contact:       linux-pci@vger.kernel.org, rajatja@google.com
+Description:   Total number of ERR_NONFATAL messages reported to rootport.
index b18c5aca30bdd5179579b5b0e13f65abb4dd99dc..47c67de1ccf15dd5eb8adb09e3b367dc2237872b 100644 (file)
@@ -577,10 +577,30 @@ aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs,
                   aer_uncorrectable_error_string, "ERR_NONFATAL",
                   dev_total_nonfatal_errs);
 
+#define aer_stats_rootport_attr(name, field)                           \
+       static ssize_t                                                  \
+       name##_show(struct device *dev, struct device_attribute *attr,  \
+                    char *buf)                                         \
+{                                                                      \
+       struct pci_dev *pdev = to_pci_dev(dev);                         \
+       return sprintf(buf, "%llu\n", pdev->aer_stats->field);          \
+}                                                                      \
+static DEVICE_ATTR_RO(name)
+
+aer_stats_rootport_attr(aer_rootport_total_err_cor,
+                        rootport_total_cor_errs);
+aer_stats_rootport_attr(aer_rootport_total_err_fatal,
+                        rootport_total_fatal_errs);
+aer_stats_rootport_attr(aer_rootport_total_err_nonfatal,
+                        rootport_total_nonfatal_errs);
+
 static struct attribute *aer_stats_attrs[] __ro_after_init = {
        &dev_attr_aer_dev_correctable.attr,
        &dev_attr_aer_dev_fatal.attr,
        &dev_attr_aer_dev_nonfatal.attr,
+       &dev_attr_aer_rootport_total_err_cor.attr,
+       &dev_attr_aer_rootport_total_err_fatal.attr,
+       &dev_attr_aer_rootport_total_err_nonfatal.attr,
        NULL
 };
 
@@ -593,6 +613,12 @@ static umode_t aer_stats_attrs_are_visible(struct kobject *kobj,
        if (!pdev->aer_stats)
                return 0;
 
+       if ((a == &dev_attr_aer_rootport_total_err_cor.attr ||
+            a == &dev_attr_aer_rootport_total_err_fatal.attr ||
+            a == &dev_attr_aer_rootport_total_err_nonfatal.attr) &&
+           pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT)
+               return 0;
+
        return a->mode;
 }
 
@@ -635,6 +661,25 @@ static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
                        counter[i]++;
 }
 
+static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
+                                struct aer_err_source *e_src)
+{
+       struct aer_stats *aer_stats = pdev->aer_stats;
+
+       if (!aer_stats)
+               return;
+
+       if (e_src->status & PCI_ERR_ROOT_COR_RCV)
+               aer_stats->rootport_total_cor_errs++;
+
+       if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
+               if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
+                       aer_stats->rootport_total_fatal_errs++;
+               else
+                       aer_stats->rootport_total_nonfatal_errs++;
+       }
+}
+
 static void __print_tlp_header(struct pci_dev *dev,
                               struct aer_header_log_regs *t)
 {
@@ -1085,6 +1130,8 @@ static void aer_isr_one_error(struct aer_rpc *rpc,
        struct pci_dev *pdev = rpc->rpd;
        struct aer_err_info *e_info = &rpc->e_info;
 
+       pci_rootport_aer_stats_incr(pdev, e_src);
+
        /*
         * There is a possibility that both correctable error and
         * uncorrectable error being logged. Report correctable error first.