From: Felix Fietkau Date: Wed, 22 Jan 2025 11:11:56 +0000 (+0100) Subject: kernel: backport improvement to page pool fragment handling from 6.7 X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=9508ca44eb577a13e795e0a6c5683689efc61475;p=openwrt%2Fopenwrt.git kernel: backport improvement to page pool fragment handling from 6.7 Makes it easier to keep drivers like mt76 in sync with newer versions Signed-off-by: Felix Fietkau --- diff --git a/target/linux/generic/backport-6.6/620-01-page_pool-fragment-API-support-for-32-bit-arch-with-.patch b/target/linux/generic/backport-6.6/620-01-page_pool-fragment-API-support-for-32-bit-arch-with-.patch new file mode 100644 index 0000000000..0f57e2ec66 --- /dev/null +++ b/target/linux/generic/backport-6.6/620-01-page_pool-fragment-API-support-for-32-bit-arch-with-.patch @@ -0,0 +1,139 @@ +From: Yunsheng Lin +Date: Fri, 13 Oct 2023 14:48:21 +0800 +Subject: [PATCH] page_pool: fragment API support for 32-bit arch with 64-bit + DMA + +Currently page_pool_alloc_frag() is not supported in 32-bit +arch with 64-bit DMA because of the overlap issue between +pp_frag_count and dma_addr_upper in 'struct page' for those +arches, which seems to be quite common, see [1], which means +driver may need to handle it when using fragment API. + +It is assumed that the combination of the above arch with an +address space >16TB does not exist, as all those arches have +64b equivalent, it seems logical to use the 64b version for a +system with a large address space. It is also assumed that dma +address is page aligned when we are dma mapping a page aligned +buffer, see [2]. + +That means we're storing 12 bits of 0 at the lower end for a +dma address, we can reuse those bits for the above arches to +support 32b+12b, which is 16TB of memory. + +If we make a wrong assumption, a warning is emitted so that +user can report to us. + +1. https://lore.kernel.org/all/20211117075652.58299-1-linyunsheng@huawei.com/ +2. https://lore.kernel.org/all/20230818145145.4b357c89@kernel.org/ + +Tested-by: Alexander Lobakin +Signed-off-by: Yunsheng Lin +CC: Lorenzo Bianconi +CC: Alexander Duyck +CC: Liang Chen +CC: Guillaume Tucker +CC: Matthew Wilcox +CC: Linux-MM +Link: https://lore.kernel.org/r/20231013064827.61135-2-linyunsheng@huawei.com +Signed-off-by: Jakub Kicinski +--- + +--- a/include/linux/mm_types.h ++++ b/include/linux/mm_types.h +@@ -125,18 +125,7 @@ struct page { + struct page_pool *pp; + unsigned long _pp_mapping_pad; + unsigned long dma_addr; +- union { +- /** +- * dma_addr_upper: might require a 64-bit +- * value on 32-bit architectures. +- */ +- unsigned long dma_addr_upper; +- /** +- * For frag page support, not supported in +- * 32-bit architectures with 64-bit DMA. +- */ +- atomic_long_t pp_frag_count; +- }; ++ atomic_long_t pp_frag_count; + }; + struct { /* Tail pages of compound page */ + unsigned long compound_head; /* Bit zero is set */ +--- a/include/net/page_pool/helpers.h ++++ b/include/net/page_pool/helpers.h +@@ -197,7 +197,7 @@ static inline void page_pool_recycle_dir + page_pool_put_full_page(pool, page, true); + } + +-#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ ++#define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA \ + (sizeof(dma_addr_t) > sizeof(unsigned long)) + + /** +@@ -211,17 +211,25 @@ static inline dma_addr_t page_pool_get_d + { + dma_addr_t ret = page->dma_addr; + +- if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) +- ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16; ++ if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) ++ ret <<= PAGE_SHIFT; + + return ret; + } + +-static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) ++static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) + { ++ if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) { ++ page->dma_addr = addr >> PAGE_SHIFT; ++ ++ /* We assume page alignment to shave off bottom bits, ++ * if this "compression" doesn't work we need to drop. ++ */ ++ return addr != (dma_addr_t)page->dma_addr << PAGE_SHIFT; ++ } ++ + page->dma_addr = addr; +- if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) +- page->dma_addr_upper = upper_32_bits(addr); ++ return false; + } + + static inline bool page_pool_put(struct page_pool *pool) +--- a/net/core/page_pool.c ++++ b/net/core/page_pool.c +@@ -211,10 +211,6 @@ static int page_pool_init(struct page_po + */ + } + +- if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT && +- pool->p.flags & PP_FLAG_PAGE_FRAG) +- return -EINVAL; +- + #ifdef CONFIG_PAGE_POOL_STATS + pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); + if (!pool->recycle_stats) +@@ -363,12 +359,20 @@ static bool page_pool_dma_map(struct pag + if (dma_mapping_error(pool->p.dev, dma)) + return false; + +- page_pool_set_dma_addr(page, dma); ++ if (page_pool_set_dma_addr(page, dma)) ++ goto unmap_failed; + + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) + page_pool_dma_sync_for_device(pool, page, pool->p.max_len); + + return true; ++ ++unmap_failed: ++ WARN_ON_ONCE("unexpected DMA address, please report to netdev@"); ++ dma_unmap_page_attrs(pool->p.dev, dma, ++ PAGE_SIZE << pool->p.order, pool->p.dma_dir, ++ DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); ++ return false; + } + + static void page_pool_set_pp_info(struct page_pool *pool, diff --git a/target/linux/generic/backport-6.6/620-02-v6.7-page_pool-unify-frag_count-handling-in-page_pool_is_.patch b/target/linux/generic/backport-6.6/620-02-v6.7-page_pool-unify-frag_count-handling-in-page_pool_is_.patch new file mode 100644 index 0000000000..1ad0eb449d --- /dev/null +++ b/target/linux/generic/backport-6.6/620-02-v6.7-page_pool-unify-frag_count-handling-in-page_pool_is_.patch @@ -0,0 +1,183 @@ +From: Yunsheng Lin +Date: Fri, 20 Oct 2023 17:59:48 +0800 +Subject: [PATCH] page_pool: unify frag_count handling in + page_pool_is_last_frag() + +Currently when page_pool_create() is called with +PP_FLAG_PAGE_FRAG flag, page_pool_alloc_pages() is only +allowed to be called under the below constraints: +1. page_pool_fragment_page() need to be called to setup + page->pp_frag_count immediately. +2. page_pool_defrag_page() often need to be called to drain + the page->pp_frag_count when there is no more user will + be holding on to that page. + +Those constraints exist in order to support a page to be +split into multi fragments. + +And those constraints have some overhead because of the +cache line dirtying/bouncing and atomic update. + +Those constraints are unavoidable for case when we need a +page to be split into more than one fragment, but there is +also case that we want to avoid the above constraints and +their overhead when a page can't be split as it can only +hold a fragment as requested by user, depending on different +use cases: +use case 1: allocate page without page splitting. +use case 2: allocate page with page splitting. +use case 3: allocate page with or without page splitting + depending on the fragment size. + +Currently page pool only provide page_pool_alloc_pages() and +page_pool_alloc_frag() API to enable the 1 & 2 separately, +so we can not use a combination of 1 & 2 to enable 3, it is +not possible yet because of the per page_pool flag +PP_FLAG_PAGE_FRAG. + +So in order to allow allocating unsplit page without the +overhead of split page while still allow allocating split +page we need to remove the per page_pool flag in +page_pool_is_last_frag(), as best as I can think of, it seems +there are two methods as below: +1. Add per page flag/bit to indicate a page is split or + not, which means we might need to update that flag/bit + everytime the page is recycled, dirtying the cache line + of 'struct page' for use case 1. +2. Unify the page->pp_frag_count handling for both split and + unsplit page by assuming all pages in the page pool is split + into a big fragment initially. + +As page pool already supports use case 1 without dirtying the +cache line of 'struct page' whenever a page is recyclable, we +need to support the above use case 3 with minimal overhead, +especially not adding any noticeable overhead for use case 1, +and we are already doing an optimization by not updating +pp_frag_count in page_pool_defrag_page() for the last fragment +user, this patch chooses to unify the pp_frag_count handling +to support the above use case 3. + +There is no noticeable performance degradation and some +justification for unifying the frag_count handling with this +patch applied using a micro-benchmark testing in [1]. + +1. https://lore.kernel.org/all/bf2591f8-7b3c-4480-bb2c-31dc9da1d6ac@huawei.com/ + +Signed-off-by: Yunsheng Lin +CC: Lorenzo Bianconi +CC: Alexander Duyck +CC: Liang Chen +CC: Alexander Lobakin +Link: https://lore.kernel.org/r/20231020095952.11055-2-linyunsheng@huawei.com +Signed-off-by: Jakub Kicinski +--- + +--- a/include/net/page_pool/helpers.h ++++ b/include/net/page_pool/helpers.h +@@ -115,28 +115,49 @@ static inline long page_pool_defrag_page + long ret; + + /* If nr == pp_frag_count then we have cleared all remaining +- * references to the page. No need to actually overwrite it, instead +- * we can leave this to be overwritten by the calling function. ++ * references to the page: ++ * 1. 'n == 1': no need to actually overwrite it. ++ * 2. 'n != 1': overwrite it with one, which is the rare case ++ * for pp_frag_count draining. + * +- * The main advantage to doing this is that an atomic_read is +- * generally a much cheaper operation than an atomic update, +- * especially when dealing with a page that may be partitioned +- * into only 2 or 3 pieces. ++ * The main advantage to doing this is that not only we avoid a atomic ++ * update, as an atomic_read is generally a much cheaper operation than ++ * an atomic update, especially when dealing with a page that may be ++ * partitioned into only 2 or 3 pieces; but also unify the pp_frag_count ++ * handling by ensuring all pages have partitioned into only 1 piece ++ * initially, and only overwrite it when the page is partitioned into ++ * more than one piece. + */ +- if (atomic_long_read(&page->pp_frag_count) == nr) ++ if (atomic_long_read(&page->pp_frag_count) == nr) { ++ /* As we have ensured nr is always one for constant case using ++ * the BUILD_BUG_ON(), only need to handle the non-constant case ++ * here for pp_frag_count draining, which is a rare case. ++ */ ++ BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1); ++ if (!__builtin_constant_p(nr)) ++ atomic_long_set(&page->pp_frag_count, 1); ++ + return 0; ++ } + + ret = atomic_long_sub_return(nr, &page->pp_frag_count); + WARN_ON(ret < 0); ++ ++ /* We are the last user here too, reset pp_frag_count back to 1 to ++ * ensure all pages have been partitioned into 1 piece initially, ++ * this should be the rare case when the last two fragment users call ++ * page_pool_defrag_page() currently. ++ */ ++ if (unlikely(!ret)) ++ atomic_long_set(&page->pp_frag_count, 1); ++ + return ret; + } + +-static inline bool page_pool_is_last_frag(struct page_pool *pool, +- struct page *page) ++static inline bool page_pool_is_last_frag(struct page *page) + { +- /* If fragments aren't enabled or count is 0 we were the last user */ +- return !(pool->p.flags & PP_FLAG_PAGE_FRAG) || +- (page_pool_defrag_page(page, 1) == 0); ++ /* If page_pool_defrag_page() returns 0, we were the last user */ ++ return page_pool_defrag_page(page, 1) == 0; + } + + /** +@@ -161,7 +182,7 @@ static inline void page_pool_put_page(st + * allow registering MEM_TYPE_PAGE_POOL, but shield linker. + */ + #ifdef CONFIG_PAGE_POOL +- if (!page_pool_is_last_frag(pool, page)) ++ if (!page_pool_is_last_frag(page)) + return; + + page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct); +--- a/net/core/page_pool.c ++++ b/net/core/page_pool.c +@@ -380,6 +380,14 @@ static void page_pool_set_pp_info(struct + { + page->pp = pool; + page->pp_magic |= PP_SIGNATURE; ++ ++ /* Ensuring all pages have been split into one fragment initially: ++ * page_pool_set_pp_info() is only called once for every page when it ++ * is allocated from the page allocator and page_pool_fragment_page() ++ * is dirtying the same cache line as the page->pp_magic above, so ++ * the overhead is negligible. ++ */ ++ page_pool_fragment_page(page, 1); + if (pool->p.init_callback) + pool->p.init_callback(page, pool->p.init_arg); + } +@@ -676,7 +684,7 @@ void page_pool_put_page_bulk(struct page + struct page *page = virt_to_head_page(data[i]); + + /* It is not the last user for the page frag case */ +- if (!page_pool_is_last_frag(pool, page)) ++ if (!page_pool_is_last_frag(page)) + continue; + + page = __page_pool_put_page(pool, page, -1, false); +@@ -752,8 +760,7 @@ struct page *page_pool_alloc_frag(struct + unsigned int max_size = PAGE_SIZE << pool->p.order; + struct page *page = pool->frag_page; + +- if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) || +- size > max_size)) ++ if (WARN_ON(size > max_size)) + return NULL; + + size = ALIGN(size, dma_get_cache_alignment());