bonding: Add tlb_dynamic_lb parameter for tlb mode
authorMahesh Bandewar <maheshb@google.com>
Tue, 22 Apr 2014 23:30:22 +0000 (16:30 -0700)
committerDavid S. Miller <davem@davemloft.net>
Thu, 24 Apr 2014 17:04:34 +0000 (13:04 -0400)
The aggresive load balancing causes packet re-ordering as active
flows are moved from a slave to another within the group. Sometime
this aggresive lb is not necessary if the preference is for less
re-ordering. This parameter if used with value "0" disables
this dynamic flow shuffling minimizing packet re-ordering. Of course
the side effect is that it has to live with the static load balancing
that the hashing distribution provides. This impact is less severe if
the correct xmit-hashing-policy is used for the tlb setup.

The default value of the parameter is set to "1" mimicing the earlier
behavior.

Ran the netperf test with 200 stream for 1 min between two hosts with
4x1G trunk (xmit-lb mode with xmit-policy L3+4) before and after these
changes. Following was the command used for those 200 instances -

    netperf -t TCP_RR -l 60 -s 5 -H <host> -- -r81920,81920

Transactions per second:
    Before change: 1,367.11
    After  change: 1,470.65

Change-Id: Ie3f75c77282cf602e83a6e833c6eb164e72a0990
Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Documentation/networking/bonding.txt
drivers/net/bonding/bond_alb.c
drivers/net/bonding/bond_main.c
drivers/net/bonding/bond_options.c
drivers/net/bonding/bond_options.h
drivers/net/bonding/bond_sysfs.c
drivers/net/bonding/bonding.h

index a97c567f24e866e6c6f64e69b4c12443e9fbda38..9c723ecd00251534a0b011c4e0ffbd053242454e 100644 (file)
@@ -585,13 +585,19 @@ mode
        balance-tlb or 5
 
                Adaptive transmit load balancing: channel bonding that
-               does not require any special switch support.  The
-               outgoing traffic is distributed according to the
-               current load (computed relative to the speed) on each
-               slave.  Incoming traffic is received by the current
-               slave.  If the receiving slave fails, another slave
-               takes over the MAC address of the failed receiving
-               slave.
+               does not require any special switch support.
+
+               In tlb_dynamic_lb=1 mode; the outgoing traffic is
+               distributed according to the current load (computed
+               relative to the speed) on each slave.
+
+               In tlb_dynamic_lb=0 mode; the load balancing based on
+               current load is disabled and the load is distributed
+               only using the hash distribution.
+
+               Incoming traffic is received by the current slave.
+               If the receiving slave fails, another slave takes over
+               the MAC address of the failed receiving slave.
 
                Prerequisite:
 
@@ -736,6 +742,28 @@ primary_reselect
 
        This option was added for bonding version 3.6.0.
 
+tlb_dynamic_lb
+
+       Specifies if dynamic shuffling of flows is enabled in tlb
+       mode. The value has no effect on any other modes.
+
+       The default behavior of tlb mode is to shuffle active flows across
+       slaves based on the load in that interval. This gives nice lb
+       characteristics but can cause packet reordering. If re-ordering is
+       a concern use this variable to disable flow shuffling and rely on
+       load balancing provided solely by the hash distribution.
+       xmit-hash-policy can be used to select the appropriate hashing for
+       the setup.
+
+       The sysfs entry can be used to change the setting per bond device
+       and the initial value is derived from the module parameter. The
+       sysfs entry is allowed to be changed only if the bond device is
+       down.
+
+       The default value is "1" that enables flow shuffling while value "0"
+       disables it. This option was added in bonding driver 3.7.1
+
+
 updelay
 
        Specifies the time, in milliseconds, to wait before enabling a
index 153232ed4b3f47f63b2c3031715d50517caabcb5..70de039dad2e09783887ec1b5e82265c26db993f 100644 (file)
@@ -1356,7 +1356,8 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
        if (!tx_slave) {
                /* unbalanced or unassigned, send through primary */
                tx_slave = rcu_dereference(bond->curr_active_slave);
-               bond_info->unbalanced_load += skb->len;
+               if (bond->params.tlb_dynamic_lb)
+                       bond_info->unbalanced_load += skb->len;
        }
 
        if (tx_slave && SLAVE_IS_OK(tx_slave)) {
@@ -1369,7 +1370,7 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
                goto out;
        }
 
-       if (tx_slave) {
+       if (tx_slave && bond->params.tlb_dynamic_lb) {
                _lock_tx_hashtbl(bond);
                __tlb_clear_slave(bond, tx_slave, 0);
                _unlock_tx_hashtbl(bond);
@@ -1399,11 +1400,21 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
                    /* In case of IPX, it will falback to L2 hash */
                case htons(ETH_P_IPV6):
                        hash_index = bond_xmit_hash(bond, skb);
-                       tx_slave = tlb_choose_channel(bond, hash_index & 0xFF, skb->len);
+                       if (bond->params.tlb_dynamic_lb) {
+                               tx_slave = tlb_choose_channel(bond,
+                                                             hash_index & 0xFF,
+                                                             skb->len);
+                       } else {
+                               struct list_head *iter;
+                               int idx = hash_index % bond->slave_cnt;
+
+                               bond_for_each_slave_rcu(bond, tx_slave, iter)
+                                       if (--idx < 0)
+                                               break;
+                       }
                        break;
                }
        }
-
        return bond_do_alb_xmit(skb, bond, tx_slave);
 }
 
index 1fd32a16cbc5c02d65a8d49955603828fede44f8..9d08e007d85309a86be139e8e9b39cc36cfcd7b0 100644 (file)
@@ -3096,7 +3096,8 @@ static int bond_open(struct net_device *bond_dev)
                 */
                if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB)))
                        return -ENOMEM;
-               queue_delayed_work(bond->wq, &bond->alb_work, 0);
+               if (bond->params.tlb_dynamic_lb)
+                       queue_delayed_work(bond->wq, &bond->alb_work, 0);
        }
 
        if (bond->params.miimon)  /* link check interval, in milliseconds. */
@@ -4304,6 +4305,7 @@ static int bond_check_params(struct bond_params *params)
        params->min_links = min_links;
        params->lp_interval = lp_interval;
        params->packets_per_slave = packets_per_slave;
+       params->tlb_dynamic_lb = 1; /* Default value */
        if (packets_per_slave > 0) {
                params->reciprocal_packets_per_slave =
                        reciprocal_value(packets_per_slave);
index dc389384175251a4eb367172ec26d94abcff2dd7..9fba7a1e6d51ead234de9d1468fe6c094971ccfb 100644 (file)
@@ -70,6 +70,8 @@ static int bond_option_mode_set(struct bonding *bond,
                                const struct bond_opt_value *newval);
 static int bond_option_slaves_set(struct bonding *bond,
                                  const struct bond_opt_value *newval);
+static int bond_option_tlb_dynamic_lb_set(struct bonding *bond,
+                                 const struct bond_opt_value *newval);
 
 
 static const struct bond_opt_value bond_mode_tbl[] = {
@@ -179,6 +181,12 @@ static const struct bond_opt_value bond_lp_interval_tbl[] = {
        { NULL,      -1,      0},
 };
 
+static const struct bond_opt_value bond_tlb_dynamic_lb_tbl[] = {
+       { "off", 0,  0},
+       { "on",  1,  BOND_VALFLAG_DEFAULT},
+       { NULL,  -1, 0}
+};
+
 static const struct bond_option bond_opts[] = {
        [BOND_OPT_MODE] = {
                .id = BOND_OPT_MODE,
@@ -364,6 +372,15 @@ static const struct bond_option bond_opts[] = {
                .flags = BOND_OPTFLAG_RAWVAL,
                .set = bond_option_slaves_set
        },
+       [BOND_OPT_TLB_DYNAMIC_LB] = {
+               .id = BOND_OPT_TLB_DYNAMIC_LB,
+               .name = "dynamic_lb",
+               .desc = "Enable dynamic flow shuffling",
+               .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_TLB)),
+               .values = bond_tlb_dynamic_lb_tbl,
+               .flags = BOND_OPTFLAG_IFDOWN,
+               .set = bond_option_tlb_dynamic_lb_set,
+       },
        { }
 };
 
@@ -1337,3 +1354,13 @@ err_no_cmd:
        ret = -EPERM;
        goto out;
 }
+
+static int bond_option_tlb_dynamic_lb_set(struct bonding *bond,
+                                         const struct bond_opt_value *newval)
+{
+       pr_info("%s: Setting dynamic-lb to %s (%llu)\n",
+               bond->dev->name, newval->string, newval->value);
+       bond->params.tlb_dynamic_lb = newval->value;
+
+       return 0;
+}
index 12be9e1bfb0c0d048229a1698c2b384847fbd794..c1860f06145aca73e4c358f20d00fb96aacc2143 100644 (file)
@@ -62,6 +62,7 @@ enum {
        BOND_OPT_RESEND_IGMP,
        BOND_OPT_LP_INTERVAL,
        BOND_OPT_SLAVES,
+       BOND_OPT_TLB_DYNAMIC_LB,
        BOND_OPT_LAST
 };
 
index 0e8b268da0a08f58c4443c6c36aa62bb9ef0f071..431892f1a4cee811cc0061cb204ad2199fb55dca 100644 (file)
@@ -1039,6 +1039,34 @@ static ssize_t bonding_store_lp_interval(struct device *d,
 static DEVICE_ATTR(lp_interval, S_IRUGO | S_IWUSR,
                   bonding_show_lp_interval, bonding_store_lp_interval);
 
+static ssize_t bonding_show_tlb_dynamic_lb(struct device *d,
+                                          struct device_attribute *attr,
+                                          char *buf)
+{
+       struct bonding *bond = to_bond(d);
+       return sprintf(buf, "%d\n", bond->params.tlb_dynamic_lb);
+}
+
+static ssize_t bonding_store_tlb_dynamic_lb(struct device *d,
+                                           struct device_attribute *attr,
+                                           const char *buf,
+                                           size_t count)
+{
+       struct bonding *bond = to_bond(d);
+       int ret;
+
+       ret = bond_opt_tryset_rtnl(bond, BOND_OPT_TLB_DYNAMIC_LB,
+                                  (char *)buf);
+       if (!ret)
+               ret = count;
+
+       return ret;
+}
+
+static DEVICE_ATTR(tlb_dynamic_lb, S_IRUGO | S_IWUSR,
+                  bonding_show_tlb_dynamic_lb,
+                  bonding_store_tlb_dynamic_lb);
+
 static ssize_t bonding_show_packets_per_slave(struct device *d,
                                              struct device_attribute *attr,
                                              char *buf)
@@ -1099,6 +1127,7 @@ static struct attribute *per_bond_attrs[] = {
        &dev_attr_min_links.attr,
        &dev_attr_lp_interval.attr,
        &dev_attr_packets_per_slave.attr,
+       &dev_attr_tlb_dynamic_lb.attr,
        NULL,
 };
 
index c0948ca26389e6106802201ee67950cfff782059..c1c7c2f12ac44618648dc5395d2fe935bdaf2208 100644 (file)
@@ -174,6 +174,7 @@ struct bond_params {
        int resend_igmp;
        int lp_interval;
        int packets_per_slave;
+       int tlb_dynamic_lb;
        struct reciprocal_value reciprocal_packets_per_slave;
 };