From 268210cec8662ab6d6f8744a888c3a41bb27227c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 25 Nov 2020 19:51:31 +0100 Subject: [PATCH] mac80211: add fq performace improvements Improves performance under load Signed-off-by: Felix Fietkau --- ...-free-packets-from-a-flow-on-overmem.patch | 95 ++++++ ...-get_default_func-move-default-flow-.patch | 144 ++++++++ ...ot-maintain-a-backlog-sorted-list-of.patch | 314 ++++++++++++++++++ .../500-mac80211_configure_antenna_gain.patch | 2 +- 4 files changed, 554 insertions(+), 1 deletion(-) create mode 100644 package/kernel/mac80211/patches/subsys/310-net-fq_impl-bulk-free-packets-from-a-flow-on-overmem.patch create mode 100644 package/kernel/mac80211/patches/subsys/311-net-fq_impl-drop-get_default_func-move-default-flow-.patch create mode 100644 package/kernel/mac80211/patches/subsys/312-net-fq_impl-do-not-maintain-a-backlog-sorted-list-of.patch diff --git a/package/kernel/mac80211/patches/subsys/310-net-fq_impl-bulk-free-packets-from-a-flow-on-overmem.patch b/package/kernel/mac80211/patches/subsys/310-net-fq_impl-bulk-free-packets-from-a-flow-on-overmem.patch new file mode 100644 index 0000000000..05a888006e --- /dev/null +++ b/package/kernel/mac80211/patches/subsys/310-net-fq_impl-bulk-free-packets-from-a-flow-on-overmem.patch @@ -0,0 +1,95 @@ +From: Felix Fietkau +Date: Wed, 25 Nov 2020 18:03:46 +0100 +Subject: [PATCH] net/fq_impl: bulk-free packets from a flow on overmemory + +This is similar to what sch_fq_codel does. It also amortizes the worst +case cost of a follow-up patch that changes the selection of the biggest +flow for dropping packets + +Signed-off-by: Felix Fietkau +--- + +--- a/include/net/fq_impl.h ++++ b/include/net/fq_impl.h +@@ -11,17 +11,25 @@ + + /* functions that are embedded into includer */ + ++ ++static void ++__fq_adjust_removal(struct fq *fq, struct fq_flow *flow, unsigned int packets, ++ unsigned int bytes, unsigned int truesize) ++{ ++ struct fq_tin *tin = flow->tin; ++ ++ tin->backlog_bytes -= bytes; ++ tin->backlog_packets -= packets; ++ flow->backlog -= bytes; ++ fq->backlog -= packets; ++ fq->memory_usage -= truesize; ++} ++ + static void fq_adjust_removal(struct fq *fq, + struct fq_flow *flow, + struct sk_buff *skb) + { +- struct fq_tin *tin = flow->tin; +- +- tin->backlog_bytes -= skb->len; +- tin->backlog_packets--; +- flow->backlog -= skb->len; +- fq->backlog--; +- fq->memory_usage -= skb->truesize; ++ __fq_adjust_removal(fq, flow, 1, skb->len, skb->truesize); + } + + static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow) +@@ -59,6 +67,34 @@ static struct sk_buff *fq_flow_dequeue(s + return skb; + } + ++static int fq_flow_drop(struct fq *fq, struct fq_flow *flow, ++ fq_skb_free_t free_func) ++{ ++ unsigned int packets = 0, bytes = 0, truesize = 0; ++ struct fq_tin *tin = flow->tin; ++ struct sk_buff *skb; ++ int pending; ++ ++ lockdep_assert_held(&fq->lock); ++ ++ pending = min_t(int, 32, skb_queue_len(&flow->queue) / 2); ++ do { ++ skb = __skb_dequeue(&flow->queue); ++ if (!skb) ++ break; ++ ++ packets++; ++ bytes += skb->len; ++ truesize += skb->truesize; ++ free_func(fq, tin, flow, skb); ++ } while (packets < pending); ++ ++ __fq_adjust_removal(fq, flow, packets, bytes, truesize); ++ fq_rejigger_backlog(fq, flow); ++ ++ return packets; ++} ++ + static struct sk_buff *fq_tin_dequeue(struct fq *fq, + struct fq_tin *tin, + fq_tin_dequeue_t dequeue_func) +@@ -190,12 +226,9 @@ static void fq_tin_enqueue(struct fq *fq + if (!flow) + return; + +- skb = fq_flow_dequeue(fq, flow); +- if (!skb) ++ if (!fq_flow_drop(fq, flow, free_func)) + return; + +- free_func(fq, flow->tin, flow, skb); +- + flow->tin->overlimit++; + fq->overlimit++; + if (oom) { diff --git a/package/kernel/mac80211/patches/subsys/311-net-fq_impl-drop-get_default_func-move-default-flow-.patch b/package/kernel/mac80211/patches/subsys/311-net-fq_impl-drop-get_default_func-move-default-flow-.patch new file mode 100644 index 0000000000..de1f1bb7f5 --- /dev/null +++ b/package/kernel/mac80211/patches/subsys/311-net-fq_impl-drop-get_default_func-move-default-flow-.patch @@ -0,0 +1,144 @@ +From: Felix Fietkau +Date: Wed, 25 Nov 2020 18:09:10 +0100 +Subject: [PATCH] net/fq_impl: drop get_default_func, move default flow to + fq_tin + +Simplifies the code and prepares for a rework of scanning for flows on +overmemory drop. + +Signed-off-by: Felix Fietkau +--- + +--- a/include/net/fq.h ++++ b/include/net/fq.h +@@ -47,6 +47,7 @@ struct fq_flow { + struct fq_tin { + struct list_head new_flows; + struct list_head old_flows; ++ struct fq_flow default_flow; + u32 backlog_bytes; + u32 backlog_packets; + u32 overlimit; +--- a/include/net/fq_impl.h ++++ b/include/net/fq_impl.h +@@ -151,8 +151,7 @@ static u32 fq_flow_idx(struct fq *fq, st + + static struct fq_flow *fq_flow_classify(struct fq *fq, + struct fq_tin *tin, u32 idx, +- struct sk_buff *skb, +- fq_flow_get_default_t get_default_func) ++ struct sk_buff *skb) + { + struct fq_flow *flow; + +@@ -160,7 +159,7 @@ static struct fq_flow *fq_flow_classify( + + flow = &fq->flows[idx]; + if (flow->tin && flow->tin != tin) { +- flow = get_default_func(fq, tin, idx, skb); ++ flow = &tin->default_flow; + tin->collisions++; + fq->collisions++; + } +@@ -192,15 +191,14 @@ static void fq_recalc_backlog(struct fq + static void fq_tin_enqueue(struct fq *fq, + struct fq_tin *tin, u32 idx, + struct sk_buff *skb, +- fq_skb_free_t free_func, +- fq_flow_get_default_t get_default_func) ++ fq_skb_free_t free_func) + { + struct fq_flow *flow; + bool oom; + + lockdep_assert_held(&fq->lock); + +- flow = fq_flow_classify(fq, tin, idx, skb, get_default_func); ++ flow = fq_flow_classify(fq, tin, idx, skb); + + flow->tin = tin; + flow->backlog += skb->len; +@@ -331,6 +329,7 @@ static void fq_tin_init(struct fq_tin *t + { + INIT_LIST_HEAD(&tin->new_flows); + INIT_LIST_HEAD(&tin->old_flows); ++ fq_flow_init(&tin->default_flow); + } + + static int fq_init(struct fq *fq, int flows_cnt) +--- a/net/mac80211/ieee80211_i.h ++++ b/net/mac80211/ieee80211_i.h +@@ -855,7 +855,6 @@ enum txq_info_flags { + */ + struct txq_info { + struct fq_tin tin; +- struct fq_flow def_flow; + struct codel_vars def_cvars; + struct codel_stats cstats; + struct sk_buff_head frags; +--- a/net/mac80211/tx.c ++++ b/net/mac80211/tx.c +@@ -1322,7 +1322,7 @@ static struct sk_buff *codel_dequeue_fun + fq = &local->fq; + + if (cvars == &txqi->def_cvars) +- flow = &txqi->def_flow; ++ flow = &txqi->tin.default_flow; + else + flow = &fq->flows[cvars - local->cvars]; + +@@ -1365,7 +1365,7 @@ static struct sk_buff *fq_tin_dequeue_fu + cparams = &local->cparams; + } + +- if (flow == &txqi->def_flow) ++ if (flow == &tin->default_flow) + cvars = &txqi->def_cvars; + else + cvars = &local->cvars[flow - fq->flows]; +@@ -1392,17 +1392,6 @@ static void fq_skb_free_func(struct fq * + ieee80211_free_txskb(&local->hw, skb); + } + +-static struct fq_flow *fq_flow_get_default_func(struct fq *fq, +- struct fq_tin *tin, +- int idx, +- struct sk_buff *skb) +-{ +- struct txq_info *txqi; +- +- txqi = container_of(tin, struct txq_info, tin); +- return &txqi->def_flow; +-} +- + static void ieee80211_txq_enqueue(struct ieee80211_local *local, + struct txq_info *txqi, + struct sk_buff *skb) +@@ -1415,8 +1404,7 @@ static void ieee80211_txq_enqueue(struct + + spin_lock_bh(&fq->lock); + fq_tin_enqueue(fq, tin, flow_idx, skb, +- fq_skb_free_func, +- fq_flow_get_default_func); ++ fq_skb_free_func); + spin_unlock_bh(&fq->lock); + } + +@@ -1459,7 +1447,6 @@ void ieee80211_txq_init(struct ieee80211 + struct txq_info *txqi, int tid) + { + fq_tin_init(&txqi->tin); +- fq_flow_init(&txqi->def_flow); + codel_vars_init(&txqi->def_cvars); + codel_stats_init(&txqi->cstats); + __skb_queue_head_init(&txqi->frags); +@@ -3310,8 +3297,7 @@ static bool ieee80211_amsdu_aggregate(st + */ + + tin = &txqi->tin; +- flow = fq_flow_classify(fq, tin, flow_idx, skb, +- fq_flow_get_default_func); ++ flow = fq_flow_classify(fq, tin, flow_idx, skb); + head = skb_peek_tail(&flow->queue); + if (!head || skb_is_gso(head)) + goto out; diff --git a/package/kernel/mac80211/patches/subsys/312-net-fq_impl-do-not-maintain-a-backlog-sorted-list-of.patch b/package/kernel/mac80211/patches/subsys/312-net-fq_impl-do-not-maintain-a-backlog-sorted-list-of.patch new file mode 100644 index 0000000000..0e923991db --- /dev/null +++ b/package/kernel/mac80211/patches/subsys/312-net-fq_impl-do-not-maintain-a-backlog-sorted-list-of.patch @@ -0,0 +1,314 @@ +From: Felix Fietkau +Date: Wed, 25 Nov 2020 18:10:34 +0100 +Subject: [PATCH] net/fq_impl: do not maintain a backlog-sorted list of + flows + +A sorted flow list is only needed to drop packets in the biggest flow when +hitting the overmemory condition. +By scanning flows only when needed, we can avoid paying the cost of +maintaining the list under normal conditions +In order to avoid scanning lots of empty flows and touching too many cold +cache lines, a bitmap of flows with backlog is maintained + +Signed-off-by: Felix Fietkau +--- + +--- a/include/net/fq.h ++++ b/include/net/fq.h +@@ -19,8 +19,6 @@ struct fq_tin; + * @flowchain: can be linked to fq_tin's new_flows or old_flows. Used for DRR++ + * (deficit round robin) based round robin queuing similar to the one + * found in net/sched/sch_fq_codel.c +- * @backlogchain: can be linked to other fq_flow and fq. Used to keep track of +- * fat flows and efficient head-dropping if packet limit is reached + * @queue: sk_buff queue to hold packets + * @backlog: number of bytes pending in the queue. The number of packets can be + * found in @queue.qlen +@@ -29,7 +27,6 @@ struct fq_tin; + struct fq_flow { + struct fq_tin *tin; + struct list_head flowchain; +- struct list_head backlogchain; + struct sk_buff_head queue; + u32 backlog; + int deficit; +@@ -47,6 +44,7 @@ struct fq_flow { + struct fq_tin { + struct list_head new_flows; + struct list_head old_flows; ++ struct list_head tin_list; + struct fq_flow default_flow; + u32 backlog_bytes; + u32 backlog_packets; +@@ -60,14 +58,14 @@ struct fq_tin { + /** + * struct fq - main container for fair queuing purposes + * +- * @backlogs: linked to fq_flows. Used to maintain fat flows for efficient +- * head-dropping when @backlog reaches @limit + * @limit: max number of packets that can be queued across all flows + * @backlog: number of packets queued across all flows + */ + struct fq { + struct fq_flow *flows; +- struct list_head backlogs; ++ unsigned long *flows_bitmap; ++ ++ struct list_head tin_backlog; + spinlock_t lock; + u32 flows_cnt; + u32 limit; +--- a/include/net/fq_impl.h ++++ b/include/net/fq_impl.h +@@ -17,12 +17,24 @@ __fq_adjust_removal(struct fq *fq, struc + unsigned int bytes, unsigned int truesize) + { + struct fq_tin *tin = flow->tin; ++ int idx; + + tin->backlog_bytes -= bytes; + tin->backlog_packets -= packets; + flow->backlog -= bytes; + fq->backlog -= packets; + fq->memory_usage -= truesize; ++ ++ if (flow->backlog) ++ return; ++ ++ if (flow == &tin->default_flow) { ++ list_del_init(&tin->tin_list); ++ return; ++ } ++ ++ idx = flow - fq->flows; ++ __clear_bit(idx, fq->flows_bitmap); + } + + static void fq_adjust_removal(struct fq *fq, +@@ -32,24 +44,6 @@ static void fq_adjust_removal(struct fq + __fq_adjust_removal(fq, flow, 1, skb->len, skb->truesize); + } + +-static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow) +-{ +- struct fq_flow *i; +- +- if (flow->backlog == 0) { +- list_del_init(&flow->backlogchain); +- } else { +- i = flow; +- +- list_for_each_entry_continue(i, &fq->backlogs, backlogchain) +- if (i->backlog < flow->backlog) +- break; +- +- list_move_tail(&flow->backlogchain, +- &i->backlogchain); +- } +-} +- + static struct sk_buff *fq_flow_dequeue(struct fq *fq, + struct fq_flow *flow) + { +@@ -62,7 +56,6 @@ static struct sk_buff *fq_flow_dequeue(s + return NULL; + + fq_adjust_removal(fq, flow, skb); +- fq_rejigger_backlog(fq, flow); + + return skb; + } +@@ -90,7 +83,6 @@ static int fq_flow_drop(struct fq *fq, s + } while (packets < pending); + + __fq_adjust_removal(fq, flow, packets, bytes, truesize); +- fq_rejigger_backlog(fq, flow); + + return packets; + } +@@ -170,22 +162,36 @@ static struct fq_flow *fq_flow_classify( + return flow; + } + +-static void fq_recalc_backlog(struct fq *fq, +- struct fq_tin *tin, +- struct fq_flow *flow) ++static struct fq_flow *fq_find_fattest_flow(struct fq *fq) + { +- struct fq_flow *i; ++ struct fq_tin *tin; ++ struct fq_flow *flow = NULL; ++ u32 len = 0; ++ int i; + +- if (list_empty(&flow->backlogchain)) +- list_add_tail(&flow->backlogchain, &fq->backlogs); ++ for_each_set_bit(i, fq->flows_bitmap, fq->flows_cnt) { ++ struct fq_flow *cur = &fq->flows[i]; ++ unsigned int cur_len; + +- i = flow; +- list_for_each_entry_continue_reverse(i, &fq->backlogs, +- backlogchain) +- if (i->backlog > flow->backlog) +- break; ++ cur_len = cur->backlog; ++ if (cur_len <= len) ++ continue; + +- list_move(&flow->backlogchain, &i->backlogchain); ++ flow = cur; ++ len = cur_len; ++ } ++ ++ list_for_each_entry(tin, &fq->tin_backlog, tin_list) { ++ unsigned int cur_len = tin->default_flow.backlog; ++ ++ if (cur_len <= len) ++ continue; ++ ++ flow = &tin->default_flow; ++ len = cur_len; ++ } ++ ++ return flow; + } + + static void fq_tin_enqueue(struct fq *fq, +@@ -200,6 +206,13 @@ static void fq_tin_enqueue(struct fq *fq + + flow = fq_flow_classify(fq, tin, idx, skb); + ++ if (!flow->backlog) { ++ if (flow != &tin->default_flow) ++ __set_bit(idx, fq->flows_bitmap); ++ else if (list_empty(&tin->tin_list)) ++ list_add(&tin->tin_list, &fq->tin_backlog); ++ } ++ + flow->tin = tin; + flow->backlog += skb->len; + tin->backlog_bytes += skb->len; +@@ -207,8 +220,6 @@ static void fq_tin_enqueue(struct fq *fq + fq->memory_usage += skb->truesize; + fq->backlog++; + +- fq_recalc_backlog(fq, tin, flow); +- + if (list_empty(&flow->flowchain)) { + flow->deficit = fq->quantum; + list_add_tail(&flow->flowchain, +@@ -218,9 +229,7 @@ static void fq_tin_enqueue(struct fq *fq + __skb_queue_tail(&flow->queue, skb); + oom = (fq->memory_usage > fq->memory_limit); + while (fq->backlog > fq->limit || oom) { +- flow = list_first_entry_or_null(&fq->backlogs, +- struct fq_flow, +- backlogchain); ++ flow = fq_find_fattest_flow(fq); + if (!flow) + return; + +@@ -255,8 +264,6 @@ static void fq_flow_filter(struct fq *fq + fq_adjust_removal(fq, flow, skb); + free_func(fq, tin, flow, skb); + } +- +- fq_rejigger_backlog(fq, flow); + } + + static void fq_tin_filter(struct fq *fq, +@@ -279,16 +286,18 @@ static void fq_flow_reset(struct fq *fq, + struct fq_flow *flow, + fq_skb_free_t free_func) + { ++ struct fq_tin *tin = flow->tin; + struct sk_buff *skb; + + while ((skb = fq_flow_dequeue(fq, flow))) +- free_func(fq, flow->tin, flow, skb); ++ free_func(fq, tin, flow, skb); + +- if (!list_empty(&flow->flowchain)) ++ if (!list_empty(&flow->flowchain)) { + list_del_init(&flow->flowchain); +- +- if (!list_empty(&flow->backlogchain)) +- list_del_init(&flow->backlogchain); ++ if (list_empty(&tin->new_flows) && ++ list_empty(&tin->old_flows)) ++ list_del_init(&tin->tin_list); ++ } + + flow->tin = NULL; + +@@ -314,6 +323,7 @@ static void fq_tin_reset(struct fq *fq, + fq_flow_reset(fq, flow, free_func); + } + ++ WARN_ON_ONCE(!list_empty(&tin->tin_list)); + WARN_ON_ONCE(tin->backlog_bytes); + WARN_ON_ONCE(tin->backlog_packets); + } +@@ -321,7 +331,6 @@ static void fq_tin_reset(struct fq *fq, + static void fq_flow_init(struct fq_flow *flow) + { + INIT_LIST_HEAD(&flow->flowchain); +- INIT_LIST_HEAD(&flow->backlogchain); + __skb_queue_head_init(&flow->queue); + } + +@@ -329,6 +338,7 @@ static void fq_tin_init(struct fq_tin *t + { + INIT_LIST_HEAD(&tin->new_flows); + INIT_LIST_HEAD(&tin->old_flows); ++ INIT_LIST_HEAD(&tin->tin_list); + fq_flow_init(&tin->default_flow); + } + +@@ -337,8 +347,8 @@ static int fq_init(struct fq *fq, int fl + int i; + + memset(fq, 0, sizeof(fq[0])); +- INIT_LIST_HEAD(&fq->backlogs); + spin_lock_init(&fq->lock); ++ INIT_LIST_HEAD(&fq->tin_backlog); + fq->flows_cnt = max_t(u32, flows_cnt, 1); + fq->quantum = 300; + fq->limit = 8192; +@@ -348,6 +358,14 @@ static int fq_init(struct fq *fq, int fl + if (!fq->flows) + return -ENOMEM; + ++ fq->flows_bitmap = kcalloc(BITS_TO_LONGS(fq->flows_cnt), sizeof(long), ++ GFP_KERNEL); ++ if (!fq->flows_bitmap) { ++ kvfree(fq->flows); ++ fq->flows = NULL; ++ return -ENOMEM; ++ } ++ + for (i = 0; i < fq->flows_cnt; i++) + fq_flow_init(&fq->flows[i]); + +@@ -364,6 +382,9 @@ static void fq_reset(struct fq *fq, + + kvfree(fq->flows); + fq->flows = NULL; ++ ++ kfree(fq->flows_bitmap); ++ fq->flows_bitmap = NULL; + } + + #endif +--- a/net/mac80211/tx.c ++++ b/net/mac80211/tx.c +@@ -3364,8 +3364,6 @@ out_recalc: + if (head->len != orig_len) { + flow->backlog += head->len - orig_len; + tin->backlog_bytes += head->len - orig_len; +- +- fq_recalc_backlog(fq, tin, flow); + } + out: + spin_unlock_bh(&fq->lock); diff --git a/package/kernel/mac80211/patches/subsys/500-mac80211_configure_antenna_gain.patch b/package/kernel/mac80211/patches/subsys/500-mac80211_configure_antenna_gain.patch index d76fc88238..cd2a3b424f 100644 --- a/package/kernel/mac80211/patches/subsys/500-mac80211_configure_antenna_gain.patch +++ b/package/kernel/mac80211/patches/subsys/500-mac80211_configure_antenna_gain.patch @@ -87,7 +87,7 @@ CFG80211_TESTMODE_CMD(ieee80211_testmode_cmd) --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h -@@ -1401,6 +1401,7 @@ struct ieee80211_local { +@@ -1400,6 +1400,7 @@ struct ieee80211_local { int dynamic_ps_forced_timeout; int user_power_level; /* in dBm, for all interfaces */ -- 2.30.2