From: Felix Fietkau Date: Sun, 26 Jul 2020 13:12:32 +0000 (+0200) Subject: mac80211: merge performance improvement patches X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=3d731fc9030655e18d86f81ca8aed3341ab2bc1e;p=openwrt%2Fstaging%2Fthess.git mac80211: merge performance improvement patches Fix fq_codel performance issues Add a new rx function for batch processing Signed-off-by: Felix Fietkau --- diff --git a/package/kernel/mac80211/patches/subsys/307-mac80211-add-a-function-for-running-rx-without-passi.patch b/package/kernel/mac80211/patches/subsys/307-mac80211-add-a-function-for-running-rx-without-passi.patch new file mode 100644 index 0000000000..5837a7b651 --- /dev/null +++ b/package/kernel/mac80211/patches/subsys/307-mac80211-add-a-function-for-running-rx-without-passi.patch @@ -0,0 +1,186 @@ +From: Felix Fietkau +Date: Sat, 25 Jul 2020 20:53:23 +0200 +Subject: [PATCH] mac80211: add a function for running rx without passing skbs + to the stack + +This can be used to run mac80211 rx processing on a batch of frames in NAPI +poll before passing them to the network stack in a large batch. +This can improve icache footprint, or it can be used to pass frames via +netif_receive_skb_list. + +Signed-off-by: Felix Fietkau +--- + +--- a/include/net/mac80211.h ++++ b/include/net/mac80211.h +@@ -4358,6 +4358,31 @@ void ieee80211_free_hw(struct ieee80211_ + void ieee80211_restart_hw(struct ieee80211_hw *hw); + + /** ++ * ieee80211_rx_list - receive frame and store processed skbs in a list ++ * ++ * Use this function to hand received frames to mac80211. The receive ++ * buffer in @skb must start with an IEEE 802.11 header. In case of a ++ * paged @skb is used, the driver is recommended to put the ieee80211 ++ * header of the frame on the linear part of the @skb to avoid memory ++ * allocation and/or memcpy by the stack. ++ * ++ * This function may not be called in IRQ context. Calls to this function ++ * for a single hardware must be synchronized against each other. Calls to ++ * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be ++ * mixed for a single hardware. Must not run concurrently with ++ * ieee80211_tx_status() or ieee80211_tx_status_ni(). ++ * ++ * This function must be called with BHs disabled and RCU read lock ++ * ++ * @hw: the hardware this frame came in on ++ * @sta: the station the frame was received from, or %NULL ++ * @skb: the buffer to receive, owned by mac80211 after this call ++ * @list: the destination list ++ */ ++void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *sta, ++ struct sk_buff *skb, struct list_head *list); ++ ++/** + * ieee80211_rx_napi - receive frame from NAPI context + * + * Use this function to hand received frames to mac80211. The receive +--- a/net/mac80211/ieee80211_i.h ++++ b/net/mac80211/ieee80211_i.h +@@ -218,7 +218,7 @@ enum ieee80211_rx_flags { + }; + + struct ieee80211_rx_data { +- struct napi_struct *napi; ++ struct list_head *list; + struct sk_buff *skb; + struct ieee80211_local *local; + struct ieee80211_sub_if_data *sdata; +--- a/net/mac80211/rx.c ++++ b/net/mac80211/rx.c +@@ -2552,8 +2552,8 @@ static void ieee80211_deliver_skb_to_loc + memset(skb->cb, 0, sizeof(skb->cb)); + + /* deliver to local stack */ +- if (rx->napi) +- napi_gro_receive(rx->napi, skb); ++ if (rx->list) ++ list_add_tail(&skb->list, rx->list); + else + netif_receive_skb(skb); + } +@@ -3843,7 +3843,6 @@ void ieee80211_release_reorder_timeout(s + /* This is OK -- must be QoS data frame */ + .security_idx = tid, + .seqno_idx = tid, +- .napi = NULL, /* must be NULL to not have races */ + }; + struct tid_ampdu_rx *tid_agg_rx; + +@@ -4453,8 +4452,8 @@ static bool ieee80211_invoke_fast_rx(str + /* deliver to local stack */ + skb->protocol = eth_type_trans(skb, fast_rx->dev); + memset(skb->cb, 0, sizeof(skb->cb)); +- if (rx->napi) +- napi_gro_receive(rx->napi, skb); ++ if (rx->list) ++ list_add_tail(&skb->list, rx->list); + else + netif_receive_skb(skb); + +@@ -4521,7 +4520,7 @@ static bool ieee80211_prepare_and_rx_han + static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, + struct ieee80211_sta *pubsta, + struct sk_buff *skb, +- struct napi_struct *napi) ++ struct list_head *list) + { + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; +@@ -4536,7 +4535,7 @@ static void __ieee80211_rx_handle_packet + memset(&rx, 0, sizeof(rx)); + rx.skb = skb; + rx.local = local; +- rx.napi = napi; ++ rx.list = list; + + if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc)) + I802_DEBUG_INC(local->dot11ReceivedFragmentCount); +@@ -4644,8 +4643,8 @@ static void __ieee80211_rx_handle_packet + * This is the receive path handler. It is called by a low level driver when an + * 802.11 MPDU is received from the hardware. + */ +-void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, +- struct sk_buff *skb, struct napi_struct *napi) ++void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, ++ struct sk_buff *skb, struct list_head *list) + { + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_rate *rate = NULL; +@@ -4737,36 +4736,53 @@ void ieee80211_rx_napi(struct ieee80211_ + status->rx_flags = 0; + + /* +- * key references and virtual interfaces are protected using RCU +- * and this requires that we are in a read-side RCU section during +- * receive processing +- */ +- rcu_read_lock(); +- +- /* + * Frames with failed FCS/PLCP checksum are not returned, + * all other frames are returned without radiotap header + * if it was previously present. + * Also, frames with less than 16 bytes are dropped. + */ + skb = ieee80211_rx_monitor(local, skb, rate); +- if (!skb) { +- rcu_read_unlock(); ++ if (!skb) + return; +- } + + ieee80211_tpt_led_trig_rx(local, + ((struct ieee80211_hdr *)skb->data)->frame_control, + skb->len); + +- __ieee80211_rx_handle_packet(hw, pubsta, skb, napi); +- +- rcu_read_unlock(); ++ __ieee80211_rx_handle_packet(hw, pubsta, skb, list); + + return; + drop: + kfree_skb(skb); + } ++EXPORT_SYMBOL(ieee80211_rx_list); ++ ++void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, ++ struct sk_buff *skb, struct napi_struct *napi) ++{ ++ struct sk_buff *tmp; ++ LIST_HEAD(list); ++ ++ ++ /* ++ * key references and virtual interfaces are protected using RCU ++ * and this requires that we are in a read-side RCU section during ++ * receive processing ++ */ ++ rcu_read_lock(); ++ ieee80211_rx_list(hw, pubsta, skb, &list); ++ rcu_read_unlock(); ++ ++ if (!napi) { ++ netif_receive_skb_list(&list); ++ return; ++ } ++ ++ list_for_each_entry_safe(skb, tmp, &list, list) { ++ skb_list_del_init(skb); ++ napi_gro_receive(napi, skb); ++ } ++} + EXPORT_SYMBOL(ieee80211_rx_napi); + + /* This is a version of the rx handler that can be called from hard irq diff --git a/package/kernel/mac80211/patches/subsys/308-net-fq_impl-use-skb_get_hash-instead-of-skb_get_hash.patch b/package/kernel/mac80211/patches/subsys/308-net-fq_impl-use-skb_get_hash-instead-of-skb_get_hash.patch new file mode 100644 index 0000000000..77ecc82302 --- /dev/null +++ b/package/kernel/mac80211/patches/subsys/308-net-fq_impl-use-skb_get_hash-instead-of-skb_get_hash.patch @@ -0,0 +1,55 @@ +From: Felix Fietkau +Date: Sun, 26 Jul 2020 14:37:02 +0200 +Subject: [PATCH] net/fq_impl: use skb_get_hash instead of + skb_get_hash_perturb + +This avoids unnecessary regenerating of the skb flow hash + +Signed-off-by: Felix Fietkau +--- + +--- a/include/net/fq.h ++++ b/include/net/fq.h +@@ -69,15 +69,6 @@ struct fq { + struct list_head backlogs; + spinlock_t lock; + u32 flows_cnt; +-#if LINUX_VERSION_IS_GEQ(5,3,10) || \ +- LINUX_VERSION_IN_RANGE(4,19,83, 4,20,0) || \ +- LINUX_VERSION_IN_RANGE(4,14,153, 4,15,0) || \ +- LINUX_VERSION_IN_RANGE(4,9,200, 4,10,0) || \ +- LINUX_VERSION_IN_RANGE(4,4,200, 4,5,0) +- siphash_key_t perturbation; +-#else +- u32 perturbation; +-#endif + u32 limit; + u32 memory_limit; + u32 memory_usage; +--- a/include/net/fq_impl.h ++++ b/include/net/fq_impl.h +@@ -108,15 +108,7 @@ begin: + + static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb) + { +-#if LINUX_VERSION_IS_GEQ(5,3,10) || \ +- LINUX_VERSION_IN_RANGE(4,19,83, 4,20,0) || \ +- LINUX_VERSION_IN_RANGE(4,14,153, 4,15,0) || \ +- LINUX_VERSION_IN_RANGE(4,9,200, 4,10,0) || \ +- LINUX_VERSION_IN_RANGE(4,4,200, 4,5,0) +- u32 hash = skb_get_hash_perturb(skb, &fq->perturbation); +-#else +- u32 hash = skb_get_hash_perturb(skb, fq->perturbation); +-#endif ++ u32 hash = skb_get_hash(skb); + + return reciprocal_scale(hash, fq->flows_cnt); + } +@@ -316,7 +308,6 @@ static int fq_init(struct fq *fq, int fl + INIT_LIST_HEAD(&fq->backlogs); + spin_lock_init(&fq->lock); + fq->flows_cnt = max_t(u32, flows_cnt, 1); +- get_random_bytes(&fq->perturbation, sizeof(fq->perturbation)); + fq->quantum = 300; + fq->limit = 8192; + fq->memory_limit = 16 << 20; /* 16 MBytes */ diff --git a/package/kernel/mac80211/patches/subsys/309-mac80211-calculcate-skb-hash-early-when-using-itxq.patch b/package/kernel/mac80211/patches/subsys/309-mac80211-calculcate-skb-hash-early-when-using-itxq.patch new file mode 100644 index 0000000000..92b136279a --- /dev/null +++ b/package/kernel/mac80211/patches/subsys/309-mac80211-calculcate-skb-hash-early-when-using-itxq.patch @@ -0,0 +1,19 @@ +From: Felix Fietkau +Date: Sun, 26 Jul 2020 14:42:58 +0200 +Subject: [PATCH] mac80211: calculcate skb hash early when using itxq + +This avoids flow separation issues when using software encryption + +Signed-off-by: Felix Fietkau +--- + +--- a/net/mac80211/tx.c ++++ b/net/mac80211/tx.c +@@ -3937,6 +3937,7 @@ void __ieee80211_subif_start_xmit(struct + if (local->ops->wake_tx_queue) { + u16 queue = __ieee80211_select_queue(sdata, sta, skb); + skb_set_queue_mapping(skb, queue); ++ skb_get_hash(skb); + } + + if (sta) {