From: Felix Fietkau Date: Fri, 12 Apr 2024 19:51:50 +0000 (+0200) Subject: netifd: rewrite packet steering script X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=a205a5734eda4604a19b6cb0f65909ef69730699;p=openwrt%2Fopenwrt.git netifd: rewrite packet steering script The new script uses a different strategy compared to the previous one. Instead of trying to split flows by hash and spread them to all CPUs, use RPS to redirect packets to a single core only. Try to spread NAPI thread and RPS target CPUs across available CPUs and try to ensure that the NAPI thread is on a different CPU than the RPS target. This significantly reduces cycles wasted on the scheduler. Signed-off-by: Felix Fietkau --- diff --git a/package/network/config/netifd/Makefile b/package/network/config/netifd/Makefile index 35b5c0b277..d80c2eeed6 100644 --- a/package/network/config/netifd/Makefile +++ b/package/network/config/netifd/Makefile @@ -21,7 +21,7 @@ include $(INCLUDE_DIR)/cmake.mk define Package/netifd SECTION:=base CATEGORY:=Base system - DEPENDS:=+libuci +libnl-tiny +libubus +ubus +ubusd +jshn +libubox +libudebug + DEPENDS:=+libuci +libnl-tiny +libubus +ubus +ubusd +jshn +libubox +libudebug +ucode +ucode-mod-fs TITLE:=OpenWrt Network Interface Configuration Daemon endef diff --git a/package/network/config/netifd/files/etc/init.d/packet_steering b/package/network/config/netifd/files/etc/init.d/packet_steering index 9d8f791e23..d6f6afc2e1 100755 --- a/package/network/config/netifd/files/etc/init.d/packet_steering +++ b/package/network/config/netifd/files/etc/init.d/packet_steering @@ -14,5 +14,10 @@ service_triggers() { } reload_service() { - /usr/libexec/network/packet-steering.sh + packet_steering="$(uci get "network.@globals[0].packet_steering")" + if [ -e "/usr/libexec/platform/packet-steering.sh" ]; then + /usr/libexec/platform/packet-steering.sh "$packet_steering" + else + /usr/libexec/network/packet-steering.uc "$packet_steering" + fi } diff --git a/package/network/config/netifd/files/usr/libexec/network/packet-steering.sh b/package/network/config/netifd/files/usr/libexec/network/packet-steering.sh deleted file mode 100755 index 799c080805..0000000000 --- a/package/network/config/netifd/files/usr/libexec/network/packet-steering.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/sh -NPROCS="$(grep -c "^processor.*:" /proc/cpuinfo)" -[ "$NPROCS" -gt 1 ] || exit - -PROC_MASK="$(( (1 << $NPROCS) - 1 ))" - -find_irq_cpu() { - local dev="$1" - local match="$(grep -m 1 "$dev\$" /proc/interrupts)" - local cpu=0 - - [ -n "$match" ] && { - set -- $match - shift - for cur in $(seq 1 $NPROCS); do - [ "$1" -gt 0 ] && { - cpu=$(($cur - 1)) - break - } - shift - done - } - - echo "$cpu" -} - -set_hex_val() { - local file="$1" - local val="$2" - val="$(printf %x "$val")" - [ -n "$DEBUG" ] && echo "$file = $val" - echo "$val" > "$file" -} - -packet_steering="$(uci get "network.@globals[0].packet_steering")" -[ "$packet_steering" != 1 ] && exit 0 - -exec 512>/var/lock/smp_tune.lock -flock 512 || exit 1 - -[ -e "/usr/libexec/platform/packet-steering.sh" ] && { - /usr/libexec/platform/packet-steering.sh - exit 0 -} - -for dev in /sys/class/net/*; do - [ -d "$dev" ] || continue - - # ignore virtual interfaces - [ -n "$(ls "${dev}/" | grep '^lower_')" ] && continue - [ -d "${dev}/device" ] || continue - - device="$(readlink "${dev}/device")" - device="$(basename "$device")" - irq_cpu="$(find_irq_cpu "$device")" - irq_cpu_mask="$((1 << $irq_cpu))" - - for q in ${dev}/queues/tx-*; do - set_hex_val "$q/xps_cpus" "$PROC_MASK" - done - - # ignore dsa slave ports for RPS - subsys="$(readlink "${dev}/device/subsystem")" - subsys="$(basename "$subsys")" - [ "$subsys" = "mdio_bus" ] && continue - - for q in ${dev}/queues/rx-*; do - set_hex_val "$q/rps_cpus" "$PROC_MASK" - done -done diff --git a/package/network/config/netifd/files/usr/libexec/network/packet-steering.uc b/package/network/config/netifd/files/usr/libexec/network/packet-steering.uc new file mode 100755 index 0000000000..f146a96e41 --- /dev/null +++ b/package/network/config/netifd/files/usr/libexec/network/packet-steering.uc @@ -0,0 +1,200 @@ +#!/usr/bin/env ucode +'use strict'; +import { glob, basename, dirname, readlink, readfile, realpath, writefile, error, open } from "fs"; + +let napi_weight = 1.0; +let cpu_thread_weight = 0.75; +let rx_weight = 0.75; +let eth_bias = 2.0; +let debug = 0, do_nothing = 0; +let disable; +let cpus; + +for (let arg in ARGV) { + switch (arg) { + case "-d": + debug++; + break; + case "-n": + do_nothing++; + break; + case '0': + disable = true; + break; + } +} + +function task_name(pid) +{ + let stat = open(`/proc/${pid}/status`, "r"); + let line = stat.read("line"); + stat.close(); + return trim(split(line, "\t", 2)[1]); +} + +function set_task_cpu(pid, cpu) { + if (disable) + cpu = join(",", map(cpus, (cpu) => cpu.id)); + if (debug || do_nothing) + warn(`taskset -p -c ${cpu} ${task_name(pid)}\n`); + if (!do_nothing) + system(`taskset -p -c ${cpu} ${pid}`); +} + +function set_netdev_cpu(dev, cpu) { + let queues = glob(`/sys/class/net/${dev}/queues/rx-*/rps_cpus`); + let val = sprintf("%x", (1 << int(cpu))); + if (disable) + val = 0; + for (let queue in queues) { + if (debug || do_nothing) + warn(`echo ${val} > ${queue}\n`); + if (!do_nothing) + writefile(queue, `${val}`); + } +} + +function task_device_match(name, device) +{ + let napi_match = match(name, /napi\/([^-+])-\d+/); + if (!napi_match) + napi_match = match(name, /mt76-tx (phy\d+)/); + if (napi_match && + (index(device.phy, napi_match[1]) >= 0 || + index(device.netdev, napi_match[1]) >= 0)) + return true; + + if (device.driver == "mtk_soc_eth" && match(name, /napi\/mtk_eth-/)) + return true; + + return false; +} + +cpus = map(glob("/sys/bus/cpu/devices/*"), (path) => { + return { + id: int(match(path, /.*cpu(\d+)/)[1]), + core: int(trim(readfile(`${path}/topology/core_id`))), + load: 0.0, + }; +}); + +cpus = slice(cpus, 0, 64); +if (length(cpus) < 2) + exit(0); + +function cpu_add_weight(cpu_id, weight) +{ + let cpu = cpus[cpu_id]; + cpu.load += weight; + for (let sibling in cpus) { + if (sibling == cpu || sibling.core != cpu.core) + continue; + sibling.load += weight * cpu_thread_weight; + } +} + +function get_next_cpu(weight, prev_cpu) +{ + if (disable) + return 0; + + let sort_cpus = sort(slice(cpus), (a, b) => a.load - b.load); + let idx = 0; + + if (prev_cpu != null && sort_cpus[idx].id == prev_cpu) + idx++; + + let cpu = sort_cpus[idx].id; + cpu_add_weight(cpu, weight); + return cpu; +} + +let phys_devs = {}; +let netdev_phys = {}; +let netdevs = map(glob("/sys/class/net/*"), (dev) => basename(dev)); + +for (let dev in netdevs) { + let pdev_path = realpath(`/sys/class/net/${dev}/device`); + if (!pdev_path) + continue; + + if (length(glob(`/sys/class/net/${dev}/lower_*`)) > 0) + continue; + + let pdev = phys_devs[pdev_path]; + if (!pdev) { + pdev = phys_devs[pdev_path] = { + path: pdev_path, + driver: basename(readlink(`${pdev_path}/driver`)), + netdev: [], + phy: [], + tasks: [], + }; + } + + let phyidx = trim(readfile(`/sys/class/net/${dev}/phy80211/index`)); + if (phyidx != null) { + let phy = `phy${phyidx}`; + if (index(pdev.phy, phy) < 0) + push(pdev.phy, phy); + } + + push(pdev.netdev, dev); + netdev_phys[dev] = pdev; +} + +for (let path in glob("/proc/*/exe")) { + readlink(path); + if (error() != "No such file or directory") + continue; + + let pid = basename(dirname(path)); + let name = task_name(pid); + for (let devname in phys_devs) { + let dev = phys_devs[devname]; + if (!task_device_match(name, dev)) + continue; + + push(dev.tasks, pid); + break; + } +} + +function assign_dev_cpu(dev) { + if (length(dev.tasks) > 0) { + let cpu = dev.napi_cpu = get_next_cpu(napi_weight); + for (let task in dev.tasks) + set_task_cpu(task, cpu); + } + + if (length(dev.netdev) > 0) { + let cpu = dev.rx_cpu = get_next_cpu(rx_weight, dev.napi_cpu); + for (let netdev in dev.netdev) + set_netdev_cpu(netdev, cpu); + } +} + +// Assign ethernet devices first +for (let devname in phys_devs) { + let dev = phys_devs[devname]; + if (!length(dev.phy)) + assign_dev_cpu(dev); +} + +// Add bias to avoid assigning other tasks to CPUs with ethernet NAPI +for (let devname in phys_devs) { + let dev = phys_devs[devname]; + if (!length(dev.tasks) || dev.napi_cpu == null) + continue; + cpu_add_weight(dev.napi_cpu, eth_bias); +} + +// Assign WLAN devices +for (let devname in phys_devs) { + let dev = phys_devs[devname]; + if (length(dev.phy) > 0) + assign_dev_cpu(dev); +} + +if (debug > 1) + warn(sprintf("devices: %.J\ncpus: %.J\n", phys_devs, cpus));