hv_netvsc: Add per-cpu ethtool stats for netvsc
authorYidong Ren <yidren@microsoft.com>
Mon, 30 Jul 2018 17:09:45 +0000 (17:09 +0000)
committerDavid S. Miller <davem@davemloft.net>
Mon, 30 Jul 2018 19:35:04 +0000 (12:35 -0700)
This patch implements following ethtool stats fields for netvsc:
cpu<n>_tx/rx_packets/bytes
cpu<n>_vf_tx/rx_packets/bytes

Corresponding per-cpu counters already exist in current code. Exposing
these counters will help troubleshooting performance issues.

for_each_present_cpu() was used instead of for_each_possible_cpu().
for_each_possible_cpu() would create very long and useless output.
It is still being used for internal buffer, but not for ethtool
output.

There could be an overflow if cpu was added between ethtool
call netvsc_get_sset_count() and netvsc_get_ethtool_stats() and
netvsc_get_strings(). (still safe if cpu was removed)
ethtool makes these three function calls separately.
As long as we use ethtool, I can't see any clean solution.

Currently and in foreseeable short term, Hyper-V doesn't support
cpu hot-plug. Plus, ethtool is for admin use. Unlikely the admin
would perform such combo operations.

Changes in v2:
  - Remove cpp style comment
  - Resubmit after freeze

Changes in v3:
  - Reimplemented with kvmalloc instead of alloc_percpu

Changes in v4:
  - Fixed inconsistent array size
  - Use kvmalloc_array instead of kvmalloc

Signed-off-by: Yidong Ren <yidren@microsoft.com>
Reviewed-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc_drv.c

index 4b6e308199d270cd455b7df0de20a8458f6b7941..a32ded5b4f416f662e2a820356f16c9bfbef00db 100644 (file)
@@ -873,6 +873,17 @@ struct netvsc_ethtool_stats {
        unsigned long wake_queue;
 };
 
+struct netvsc_ethtool_pcpu_stats {
+       u64     rx_packets;
+       u64     rx_bytes;
+       u64     tx_packets;
+       u64     tx_bytes;
+       u64     vf_rx_packets;
+       u64     vf_rx_bytes;
+       u64     vf_tx_packets;
+       u64     vf_tx_bytes;
+};
+
 struct netvsc_vf_pcpu_stats {
        u64     rx_packets;
        u64     rx_bytes;
index cf4f40a04194a7fd31e619f41b56a0878186dab1..20275d1e6f9a969bed77561e516277bef5d3289b 100644 (file)
@@ -1118,6 +1118,64 @@ static void netvsc_get_vf_stats(struct net_device *net,
        }
 }
 
+static void netvsc_get_pcpu_stats(struct net_device *net,
+                                 struct netvsc_ethtool_pcpu_stats *pcpu_tot)
+{
+       struct net_device_context *ndev_ctx = netdev_priv(net);
+       struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
+       int i;
+
+       /* fetch percpu stats of vf */
+       for_each_possible_cpu(i) {
+               const struct netvsc_vf_pcpu_stats *stats =
+                       per_cpu_ptr(ndev_ctx->vf_stats, i);
+               struct netvsc_ethtool_pcpu_stats *this_tot = &pcpu_tot[i];
+               unsigned int start;
+
+               do {
+                       start = u64_stats_fetch_begin_irq(&stats->syncp);
+                       this_tot->vf_rx_packets = stats->rx_packets;
+                       this_tot->vf_tx_packets = stats->tx_packets;
+                       this_tot->vf_rx_bytes = stats->rx_bytes;
+                       this_tot->vf_tx_bytes = stats->tx_bytes;
+               } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+               this_tot->rx_packets = this_tot->vf_rx_packets;
+               this_tot->tx_packets = this_tot->vf_tx_packets;
+               this_tot->rx_bytes   = this_tot->vf_rx_bytes;
+               this_tot->tx_bytes   = this_tot->vf_tx_bytes;
+       }
+
+       /* fetch percpu stats of netvsc */
+       for (i = 0; i < nvdev->num_chn; i++) {
+               const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
+               const struct netvsc_stats *stats;
+               struct netvsc_ethtool_pcpu_stats *this_tot =
+                       &pcpu_tot[nvchan->channel->target_cpu];
+               u64 packets, bytes;
+               unsigned int start;
+
+               stats = &nvchan->tx_stats;
+               do {
+                       start = u64_stats_fetch_begin_irq(&stats->syncp);
+                       packets = stats->packets;
+                       bytes = stats->bytes;
+               } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+               this_tot->tx_bytes      += bytes;
+               this_tot->tx_packets    += packets;
+
+               stats = &nvchan->rx_stats;
+               do {
+                       start = u64_stats_fetch_begin_irq(&stats->syncp);
+                       packets = stats->packets;
+                       bytes = stats->bytes;
+               } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+               this_tot->rx_bytes      += bytes;
+               this_tot->rx_packets    += packets;
+       }
+}
+
 static void netvsc_get_stats64(struct net_device *net,
                               struct rtnl_link_stats64 *t)
 {
@@ -1215,6 +1273,23 @@ static const struct {
        { "rx_no_memory", offsetof(struct netvsc_ethtool_stats, rx_no_memory) },
        { "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
        { "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
+}, pcpu_stats[] = {
+       { "cpu%u_rx_packets",
+               offsetof(struct netvsc_ethtool_pcpu_stats, rx_packets) },
+       { "cpu%u_rx_bytes",
+               offsetof(struct netvsc_ethtool_pcpu_stats, rx_bytes) },
+       { "cpu%u_tx_packets",
+               offsetof(struct netvsc_ethtool_pcpu_stats, tx_packets) },
+       { "cpu%u_tx_bytes",
+               offsetof(struct netvsc_ethtool_pcpu_stats, tx_bytes) },
+       { "cpu%u_vf_rx_packets",
+               offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_packets) },
+       { "cpu%u_vf_rx_bytes",
+               offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_bytes) },
+       { "cpu%u_vf_tx_packets",
+               offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_packets) },
+       { "cpu%u_vf_tx_bytes",
+               offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_bytes) },
 }, vf_stats[] = {
        { "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
        { "vf_rx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
@@ -1226,6 +1301,9 @@ static const struct {
 #define NETVSC_GLOBAL_STATS_LEN        ARRAY_SIZE(netvsc_stats)
 #define NETVSC_VF_STATS_LEN    ARRAY_SIZE(vf_stats)
 
+/* statistics per queue (rx/tx packets/bytes) */
+#define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats))
+
 /* 4 statistics per queue (rx/tx packets/bytes) */
 #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
 
@@ -1241,7 +1319,8 @@ static int netvsc_get_sset_count(struct net_device *dev, int string_set)
        case ETH_SS_STATS:
                return NETVSC_GLOBAL_STATS_LEN
                        + NETVSC_VF_STATS_LEN
-                       + NETVSC_QUEUE_STATS_LEN(nvdev);
+                       + NETVSC_QUEUE_STATS_LEN(nvdev)
+                       + NETVSC_PCPU_STATS_LEN;
        default:
                return -EINVAL;
        }
@@ -1255,9 +1334,10 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
        const void *nds = &ndc->eth_stats;
        const struct netvsc_stats *qstats;
        struct netvsc_vf_pcpu_stats sum;
+       struct netvsc_ethtool_pcpu_stats *pcpu_sum;
        unsigned int start;
        u64 packets, bytes;
-       int i, j;
+       int i, j, cpu;
 
        if (!nvdev)
                return;
@@ -1289,6 +1369,19 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
                data[i++] = packets;
                data[i++] = bytes;
        }
+
+       pcpu_sum = kvmalloc_array(num_possible_cpus(),
+                                 sizeof(struct netvsc_ethtool_pcpu_stats),
+                                 GFP_KERNEL);
+       netvsc_get_pcpu_stats(dev, pcpu_sum);
+       for_each_present_cpu(cpu) {
+               struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu];
+
+               for (j = 0; j < ARRAY_SIZE(pcpu_stats); j++)
+                       data[i++] = *(u64 *)((void *)this_sum
+                                            + pcpu_stats[j].offset);
+       }
+       kvfree(pcpu_sum);
 }
 
 static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
@@ -1296,7 +1389,7 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
        struct net_device_context *ndc = netdev_priv(dev);
        struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
        u8 *p = data;
-       int i;
+       int i, cpu;
 
        if (!nvdev)
                return;
@@ -1324,6 +1417,13 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
                        p += ETH_GSTRING_LEN;
                }
 
+               for_each_present_cpu(cpu) {
+                       for (i = 0; i < ARRAY_SIZE(pcpu_stats); i++) {
+                               sprintf(p, pcpu_stats[i].name, cpu);
+                               p += ETH_GSTRING_LEN;
+                       }
+               }
+
                break;
        }
 }