From: Kevin Darbyshire-Bryant Date: Thu, 19 Jul 2018 08:16:42 +0000 (+0100) Subject: Revert "iproute2: tc: bump to support kmod-sched-cake" X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=54b91c85e786c2a9fbe082befb26130f4ed2a426;p=openwrt%2Fstaging%2Fhauke.git Revert "iproute2: tc: bump to support kmod-sched-cake" This reverts commit 8d4da3c5898ae3b594530b16c6f2ab79a2b7095b. 17.01.5 encountered mismatch between kmod version ABI & iproute2/tc version ABI. Revert for now, revisit for 17.01.6 Signed-off-by: Kevin Darbyshire-Bryant --- diff --git a/package/network/utils/iproute2/patches/950-add-cake-to-tc.patch b/package/network/utils/iproute2/patches/950-add-cake-to-tc.patch index c2a9bdef1a..882db8af19 100644 --- a/package/network/utils/iproute2/patches/950-add-cake-to-tc.patch +++ b/package/network/utils/iproute2/patches/950-add-cake-to-tc.patch @@ -1,12 +1,14 @@ --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h -@@ -852,2 +852,116 @@ enum { +@@ -850,4 +850,63 @@ struct tc_pie_xstats { + __u32 maxq; /* maximum queue size */ + __u32 ecn_mark; /* packets marked with ecn*/ }; ++ +/* CAKE */ +enum { + TCA_CAKE_UNSPEC, -+ TCA_CAKE_PAD, -+ TCA_CAKE_BASE_RATE64, ++ TCA_CAKE_BASE_RATE, + TCA_CAKE_DIFFSERV_MODE, + TCA_CAKE_ATM, + TCA_CAKE_FLOW_MODE, @@ -16,761 +18,100 @@ + TCA_CAKE_AUTORATE, + TCA_CAKE_MEMORY, + TCA_CAKE_NAT, -+ TCA_CAKE_RAW, // was _ETHERNET ++ TCA_CAKE_ETHERNET, + TCA_CAKE_WASH, + TCA_CAKE_MPU, + TCA_CAKE_INGRESS, + TCA_CAKE_ACK_FILTER, -+ TCA_CAKE_SPLIT_GSO, + __TCA_CAKE_MAX +}; +#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1) + -+enum { -+ __TCA_CAKE_STATS_INVALID, -+ TCA_CAKE_STATS_PAD, -+ TCA_CAKE_STATS_CAPACITY_ESTIMATE64, -+ TCA_CAKE_STATS_MEMORY_LIMIT, -+ TCA_CAKE_STATS_MEMORY_USED, -+ TCA_CAKE_STATS_AVG_NETOFF, -+ TCA_CAKE_STATS_MIN_NETLEN, -+ TCA_CAKE_STATS_MAX_NETLEN, -+ TCA_CAKE_STATS_MIN_ADJLEN, -+ TCA_CAKE_STATS_MAX_ADJLEN, -+ TCA_CAKE_STATS_TIN_STATS, -+ TCA_CAKE_STATS_DEFICIT, -+ TCA_CAKE_STATS_COBALT_COUNT, -+ TCA_CAKE_STATS_DROPPING, -+ TCA_CAKE_STATS_DROP_NEXT_US, -+ TCA_CAKE_STATS_P_DROP, -+ TCA_CAKE_STATS_BLUE_TIMER_US, -+ __TCA_CAKE_STATS_MAX ++struct tc_cake_traffic_stats { ++ __u32 packets; ++ __u32 link_ms; ++ __u64 bytes; +}; -+#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1) + -+enum { -+ __TCA_CAKE_TIN_STATS_INVALID, -+ TCA_CAKE_TIN_STATS_PAD, -+ TCA_CAKE_TIN_STATS_SENT_PACKETS, -+ TCA_CAKE_TIN_STATS_SENT_BYTES64, -+ TCA_CAKE_TIN_STATS_DROPPED_PACKETS, -+ TCA_CAKE_TIN_STATS_DROPPED_BYTES64, -+ TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS, -+ TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64, -+ TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS, -+ TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64, -+ TCA_CAKE_TIN_STATS_BACKLOG_PACKETS, -+ TCA_CAKE_TIN_STATS_BACKLOG_BYTES, -+ TCA_CAKE_TIN_STATS_THRESHOLD_RATE64, -+ TCA_CAKE_TIN_STATS_TARGET_US, -+ TCA_CAKE_TIN_STATS_INTERVAL_US, -+ TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS, -+ TCA_CAKE_TIN_STATS_WAY_MISSES, -+ TCA_CAKE_TIN_STATS_WAY_COLLISIONS, -+ TCA_CAKE_TIN_STATS_PEAK_DELAY_US, -+ TCA_CAKE_TIN_STATS_AVG_DELAY_US, -+ TCA_CAKE_TIN_STATS_BASE_DELAY_US, -+ TCA_CAKE_TIN_STATS_SPARSE_FLOWS, -+ TCA_CAKE_TIN_STATS_BULK_FLOWS, -+ TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS, -+ TCA_CAKE_TIN_STATS_MAX_SKBLEN, -+ TCA_CAKE_TIN_STATS_FLOW_QUANTUM, -+ __TCA_CAKE_TIN_STATS_MAX -+}; -+#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1) +#define TC_CAKE_MAX_TINS (8) -+ -+enum { -+ CAKE_FLOW_NONE = 0, -+ CAKE_FLOW_SRC_IP, -+ CAKE_FLOW_DST_IP, -+ CAKE_FLOW_HOSTS, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */ -+ CAKE_FLOW_FLOWS, -+ CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */ -+ CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */ -+ CAKE_FLOW_TRIPLE, /* = CAKE_FLOW_HOSTS | CAKE_FLOW_FLOWS */ -+ CAKE_FLOW_MAX, ++struct tc_cake_xstats { ++ __u16 version; /* == 5, increments when struct extended */ ++ __u8 max_tins; /* == TC_CAKE_MAX_TINS */ ++ __u8 tin_cnt; /* <= TC_CAKE_MAX_TINS */ ++ ++ __u32 threshold_rate [TC_CAKE_MAX_TINS]; ++ __u32 target_us [TC_CAKE_MAX_TINS]; ++ struct tc_cake_traffic_stats sent [TC_CAKE_MAX_TINS]; ++ struct tc_cake_traffic_stats dropped [TC_CAKE_MAX_TINS]; ++ struct tc_cake_traffic_stats ecn_marked[TC_CAKE_MAX_TINS]; ++ struct tc_cake_traffic_stats backlog [TC_CAKE_MAX_TINS]; ++ __u32 interval_us [TC_CAKE_MAX_TINS]; ++ __u32 way_indirect_hits[TC_CAKE_MAX_TINS]; ++ __u32 way_misses [TC_CAKE_MAX_TINS]; ++ __u32 way_collisions [TC_CAKE_MAX_TINS]; ++ __u32 peak_delay_us [TC_CAKE_MAX_TINS]; /* ~= bulk flow delay */ ++ __u32 avge_delay_us [TC_CAKE_MAX_TINS]; ++ __u32 base_delay_us [TC_CAKE_MAX_TINS]; /* ~= sparse flows delay */ ++ __u16 sparse_flows [TC_CAKE_MAX_TINS]; ++ __u16 bulk_flows [TC_CAKE_MAX_TINS]; ++ __u16 unresponse_flows [TC_CAKE_MAX_TINS]; /* v4 - was u32 last_len */ ++ __u16 spare [TC_CAKE_MAX_TINS]; /* v4 - split last_len */ ++ __u32 max_skblen [TC_CAKE_MAX_TINS]; ++ __u32 capacity_estimate; /* version 2 */ ++ __u32 memory_limit; /* version 3 */ ++ __u32 memory_used; /* version 3 */ ++ struct tc_cake_traffic_stats ack_drops [TC_CAKE_MAX_TINS]; /* v5 */ +}; -+ -+enum { -+ CAKE_DIFFSERV_DIFFSERV3 = 0, -+ CAKE_DIFFSERV_DIFFSERV4, -+ CAKE_DIFFSERV_DIFFSERV8, -+ CAKE_DIFFSERV_BESTEFFORT, -+ CAKE_DIFFSERV_PRECEDENCE, -+ CAKE_DIFFSERV_MAX -+}; -+ -+enum { -+ CAKE_ACK_NONE = 0, -+ CAKE_ACK_FILTER, -+ CAKE_ACK_AGGRESSIVE, -+ CAKE_ACK_MAX -+}; -+ -+enum { -+ CAKE_ATM_NONE = 0, -+ CAKE_ATM_ATM, -+ CAKE_ATM_PTM, -+ CAKE_ATM_MAX -+}; -+ + #endif ---- /dev/null -+++ b/man/man8/tc-cake.8 -@@ -0,0 +1,632 @@ -+.TH CAKE 8 "23 November 2017" "iproute2" "Linux" -+.SH NAME -+CAKE \- Common Applications Kept Enhanced (CAKE) -+.SH SYNOPSIS -+.B tc qdisc ... cake -+.br -+[ -+.BR bandwidth -+RATE | -+.BR unlimited* -+| -+.BR autorate_ingress -+] -+.br -+[ -+.BR rtt -+TIME | -+.BR datacentre -+| -+.BR lan -+| -+.BR metro -+| -+.BR regional -+| -+.BR internet* -+| -+.BR oceanic -+| -+.BR satellite -+| -+.BR interplanetary -+] -+.br -+[ -+.BR besteffort -+| -+.BR diffserv8 -+| -+.BR diffserv4 -+| -+.BR diffserv3* -+] -+.br -+[ -+.BR flowblind -+| -+.BR srchost -+| -+.BR dsthost -+| -+.BR hosts -+| -+.BR flows -+| -+.BR dual-srchost -+| -+.BR dual-dsthost -+| -+.BR triple-isolate* -+] -+.br -+[ -+.BR nat -+| -+.BR nonat* -+] -+.br -+[ -+.BR wash -+| -+.BR nowash* -+] -+.br -+[ -+.BR ack-filter -+| -+.BR ack-filter-aggressive -+| -+.BR no-ack-filter* -+] -+.br -+[ -+.BR memlimit -+LIMIT ] -+.br -+[ -+.BR ptm -+| -+.BR atm -+| -+.BR noatm* -+] -+.br -+[ -+.BR overhead -+N | -+.BR conservative -+| -+.BR raw* -+] -+.br -+[ -+.BR mpu -+N ] -+.br -+[ -+.BR ingress -+| -+.BR egress* -+] -+.br -+(* marks defaults) -+ -+ -+.SH DESCRIPTION -+CAKE (Common Applications Kept Enhanced) is a shaping-capable queue discipline -+which uses both AQM and FQ. It combines COBALT, which is an AQM algorithm -+combining Codel and BLUE, a shaper which operates in deficit mode, and a variant -+of DRR++ for flow isolation. 8-way set-associative hashing is used to virtually -+eliminate hash collisions. Priority queuing is available through a simplified -+diffserv implementation. Overhead compensation for various encapsulation -+schemes is tightly integrated. -+ -+All settings are optional; the default settings are chosen to be sensible in -+most common deployments. Most people will only need to set the -+.B bandwidth -+parameter to get useful results, but reading the -+.B Overhead Compensation -+and -+.B Round Trip Time -+sections is strongly encouraged. -+ -+.SH SHAPER PARAMETERS -+CAKE uses a deficit-mode shaper, which does not exhibit the initial burst -+typical of token-bucket shapers. It will automatically burst precisely as much -+as required to maintain the configured throughput. As such, it is very -+straightforward to configure. -+.PP -+.B unlimited -+(default) -+.br -+ No limit on the bandwidth. -+.PP -+.B bandwidth -+RATE -+.br -+ Set the shaper bandwidth. See -+.BR tc(8) -+or examples below for details of the RATE value. -+.PP -+.B autorate_ingress -+.br -+ Automatic capacity estimation based on traffic arriving at this qdisc. -+This is most likely to be useful with cellular links, which tend to change -+quality randomly. A -+.B bandwidth -+parameter can be used in conjunction to specify an initial estimate. The shaper -+will periodically be set to a bandwidth slightly below the estimated rate. This -+estimator cannot estimate the bandwidth of links downstream of itself. -+ -+.SH OVERHEAD COMPENSATION PARAMETERS -+The size of each packet on the wire may differ from that seen by Linux. The -+following parameters allow CAKE to compensate for this difference by internally -+considering each packet to be bigger than Linux informs it. To assist users who -+are not expert network engineers, keywords have been provided to represent a -+number of common link technologies. -+ -+.SS Manual Overhead Specification -+.B overhead -+BYTES -+.br -+ Adds BYTES to the size of each packet. BYTES may be negative; values -+between -64 and 256 (inclusive) are accepted. -+.PP -+.B mpu -+BYTES -+.br -+ Rounds each packet (including overhead) up to a minimum length -+BYTES. BYTES may not be negative; values between 0 and 256 (inclusive) -+are accepted. -+.PP -+.B atm -+.br -+ Compensates for ATM cell framing, which is normally found on ADSL links. -+This is performed after the -+.B overhead -+parameter above. ATM uses fixed 53-byte cells, each of which can carry 48 bytes -+payload. -+.PP -+.B ptm -+.br -+ Compensates for PTM encoding, which is normally found on VDSL2 links and -+uses a 64b/65b encoding scheme. It is even more efficient to simply -+derate the specified shaper bandwidth by a factor of 64/65 or 0.984. See -+ITU G.992.3 Annex N and IEEE 802.3 Section 61.3 for details. -+.PP -+.B noatm -+.br -+ Disables ATM and PTM compensation. -+ -+.SS Failsafe Overhead Keywords -+These two keywords are provided for quick-and-dirty setup. Use them if you -+can't be bothered to read the rest of this section. -+.PP -+.B raw -+(default) -+.br -+ Turns off all overhead compensation in CAKE. The packet size reported -+by Linux will be used directly. -+.PP -+ Other overhead keywords may be added after "raw". The effect of this is -+to make the overhead compensation operate relative to the reported packet size, -+not the underlying IP packet size. -+.PP -+.B conservative -+.br -+ Compensates for more overhead than is likely to occur on any -+widely-deployed link technology. -+.br -+ Equivalent to -+.B overhead 48 atm. -+ -+.SS ADSL Overhead Keywords -+Most ADSL modems have a way to check which framing scheme is in use. Often this -+is also specified in the settings document provided by the ISP. The keywords in -+this section are intended to correspond with these sources of information. All -+of them implicitly set the -+.B atm -+flag. -+.PP -+.B pppoa-vcmux -+.br -+ Equivalent to -+.B overhead 10 atm -+.PP -+.B pppoa-llc -+.br -+ Equivalent to -+.B overhead 14 atm -+.PP -+.B pppoe-vcmux -+.br -+ Equivalent to -+.B overhead 32 atm -+.PP -+.B pppoe-llcsnap -+.br -+ Equivalent to -+.B overhead 40 atm -+.PP -+.B bridged-vcmux -+.br -+ Equivalent to -+.B overhead 24 atm -+.PP -+.B bridged-llcsnap -+.br -+ Equivalent to -+.B overhead 32 atm -+.PP -+.B ipoa-vcmux -+.br -+ Equivalent to -+.B overhead 8 atm -+.PP -+.B ipoa-llcsnap -+.br -+ Equivalent to -+.B overhead 16 atm -+.PP -+See also the Ethernet Correction Factors section below. -+ -+.SS VDSL2 Overhead Keywords -+ATM was dropped from VDSL2 in favour of PTM, which is a much more -+straightforward framing scheme. Some ISPs retained PPPoE for compatibility with -+their existing back-end systems. -+.PP -+.B pppoe-ptm -+.br -+ Equivalent to -+.B overhead 30 ptm -+ -+.br -+ PPPoE: 2B PPP + 6B PPPoE + -+.br -+ ETHERNET: 6B dest MAC + 6B src MAC + 2B ethertype + 4B Frame Check Sequence + -+.br -+ PTM: 1B Start of Frame (S) + 1B End of Frame (Ck) + 2B TC-CRC (PTM-FCS) -+.br -+.PP -+.B bridged-ptm -+.br -+ Equivalent to -+.B overhead 22 ptm -+.br -+ ETHERNET: 6B dest MAC + 6B src MAC + 2B ethertype + 4B Frame Check Sequence + -+.br -+ PTM: 1B Start of Frame (S) + 1B End of Frame (Ck) + 2B TC-CRC (PTM-FCS) -+.br -+.PP -+See also the Ethernet Correction Factors section below. -+ -+.SS DOCSIS Cable Overhead Keyword -+DOCSIS is the universal standard for providing Internet service over cable-TV -+infrastructure. -+ -+In this case, the actual on-wire overhead is less important than the packet size -+the head-end equipment uses for shaping and metering. This is specified to be -+an Ethernet frame including the CRC (aka FCS). -+.PP -+.B docsis -+.br -+ Equivalent to -+.B overhead 18 mpu 64 noatm -+ -+.SS Ethernet Overhead Keywords -+.PP -+.B ethernet -+.br -+ Accounts for Ethernet's preamble, inter-frame gap, and Frame Check -+Sequence. Use this keyword when the bottleneck being shaped for is an -+actual Ethernet cable. -+.br -+ Equivalent to -+.B overhead 38 mpu 84 noatm -+.PP -+.B ether-vlan -+.br -+ Adds 4 bytes to the overhead compensation, accounting for an IEEE 802.1Q -+VLAN header appended to the Ethernet frame header. NB: Some ISPs use one or -+even two of these within PPPoE; this keyword may be repeated as necessary to -+express this. -+ -+.SH ROUND TRIP TIME PARAMETERS -+Active Queue Management (AQM) consists of embedding congestion signals in the -+packet flow, which receivers use to instruct senders to slow down when the queue -+is persistently occupied. CAKE uses ECN signalling when available, and packet -+drops otherwise, according to a combination of the Codel and BLUE AQM algorithms -+called COBALT. -+ -+Very short latencies require a very rapid AQM response to adequately control -+latency. However, such a rapid response tends to impair throughput when the -+actual RTT is relatively long. CAKE allows specifying the RTT it assumes for -+tuning various parameters. Actual RTTs within an order of magnitude of this -+will generally work well for both throughput and latency management. -+ -+At the 'lan' setting and below, the time constants are similar in magnitude to -+the jitter in the Linux kernel itself, so congestion might be signalled -+prematurely. The flows will then become sparse and total throughput reduced, -+leaving little or no back-pressure for the fairness logic to work against. Use -+the "metro" setting for local lans unless you have a custom kernel. -+.PP -+.B rtt -+TIME -+.br -+ Manually specify an RTT. -+.PP -+.B datacentre -+.br -+ For extremely high-performance 10GigE+ networks only. Equivalent to -+.B rtt 100us. -+.PP -+.B lan -+.br -+ For pure Ethernet (not Wi-Fi) networks, at home or in the office. Don't -+use this when shaping for an Internet access link. Equivalent to -+.B rtt 1ms. -+.PP -+.B metro -+.br -+ For traffic mostly within a single city. Equivalent to -+.B rtt 10ms. -+.PP -+.B regional -+.br -+ For traffic mostly within a European-sized country. Equivalent to -+.B rtt 30ms. -+.PP -+.B internet -+(default) -+.br -+ This is suitable for most Internet traffic. Equivalent to -+.B rtt 100ms. -+.PP -+.B oceanic -+.br -+ For Internet traffic with generally above-average latency, such as that -+suffered by Australasian residents. Equivalent to -+.B rtt 300ms. -+.PP -+.B satellite -+.br -+ For traffic via geostationary satellites. Equivalent to -+.B rtt 1000ms. -+.PP -+.B interplanetary -+.br -+ So named because Jupiter is about 1 light-hour from Earth. Use this to -+(almost) completely disable AQM actions. Equivalent to -+.B rtt 1000s. -+ -+.SH FLOW ISOLATION PARAMETERS -+With flow isolation enabled, CAKE places packets from different flows into -+different queues, each of which carries its own AQM state. Packets from each -+queue are then delivered fairly, according to a DRR++ algorithm which minimises -+latency for "sparse" flows. CAKE uses a set-associative hashing algorithm to -+minimise flow collisions. -+ -+These keywords specify whether fairness based on source address, destination -+address, individual flows, or any combination of those is desired. -+.PP -+.B flowblind -+.br -+ Disables flow isolation; all traffic passes through a single queue for -+each tin. -+.PP -+.B srchost -+.br -+ Flows are defined only by source address. Could be useful on the egress -+path of an ISP backhaul. -+.PP -+.B dsthost -+.br -+ Flows are defined only by destination address. Could be useful on the -+ingress path of an ISP backhaul. -+.PP -+.B hosts -+.br -+ Flows are defined by source-destination host pairs. This is host -+isolation, rather than flow isolation. -+.PP -+.B flows -+.br -+ Flows are defined by the entire 5-tuple of source address, destination -+address, transport protocol, source port and destination port. This is the type -+of flow isolation performed by SFQ and fq_codel. -+.PP -+.B dual-srchost -+.br -+ Flows are defined by the 5-tuple, and fairness is applied first over -+source addresses, then over individual flows. Good for use on egress traffic -+from a LAN to the internet, where it'll prevent any one LAN host from -+monopolising the uplink, regardless of the number of flows they use. -+.PP -+.B dual-dsthost -+.br -+ Flows are defined by the 5-tuple, and fairness is applied first over -+destination addresses, then over individual flows. Good for use on ingress -+traffic to a LAN from the internet, where it'll prevent any one LAN host from -+monopolising the downlink, regardless of the number of flows they use. -+.PP -+.B triple-isolate -+(default) -+.br -+ Flows are defined by the 5-tuple, and fairness is applied over source -+*and* destination addresses intelligently (ie. not merely by host-pairs), and -+also over individual flows. Use this if you're not certain whether to use -+dual-srchost or dual-dsthost; it'll do both jobs at once, preventing any one -+host on *either* side of the link from monopolising it with a large number of -+flows. -+.PP -+.B nat -+.br -+ Instructs Cake to perform a NAT lookup before applying flow-isolation -+rules, to determine the true addresses and port numbers of the packet, to -+improve fairness between hosts "inside" the NAT. This has no practical effect -+in "flowblind" or "flows" modes, or if NAT is performed on a different host. -+.PP -+.B nonat -+(default) -+.br -+ Cake will not perform a NAT lookup. Flow isolation will be performed -+using the addresses and port numbers directly visible to the interface Cake is -+attached to. -+ -+.SH PRIORITY QUEUE PARAMETERS -+CAKE can divide traffic into "tins" based on the Diffserv field. Each tin has -+its own independent set of flow-isolation queues, and is serviced based on a WRR -+algorithm. To avoid perverse Diffserv marking incentives, tin weights have a -+"priority sharing" value when bandwidth used by that tin is below a threshold, -+and a lower "bandwidth sharing" value when above. Bandwidth is compared against -+the threshold using the same algorithm as the deficit-mode shaper. -+ -+Detailed customisation of tin parameters is not provided. The following presets -+perform all necessary tuning, relative to the current shaper bandwidth and RTT -+settings. -+.PP -+.B besteffort -+.br -+ Disables priority queuing by placing all traffic in one tin. -+.PP -+.B precedence -+.br -+ Enables legacy interpretation of TOS "Precedence" field. Use of this -+preset on the modern Internet is firmly discouraged. -+.PP -+.B diffserv4 -+.br -+ Provides a general-purpose Diffserv implementation with four tins: -+.br -+ Bulk (CS1), 6.25% threshold, generally low priority. -+.br -+ Best Effort (general), 100% threshold. -+.br -+ Video (AF4x, AF3x, CS3, AF2x, CS2, TOS4, TOS1), 50% threshold. -+.br -+ Voice (CS7, CS6, EF, VA, CS5, CS4), 25% threshold. -+.PP -+.B diffserv3 -+(default) -+.br -+ Provides a simple, general-purpose Diffserv implementation with three tins: -+.br -+ Bulk (CS1), 6.25% threshold, generally low priority. -+.br -+ Best Effort (general), 100% threshold. -+.br -+ Voice (CS7, CS6, EF, VA, TOS4), 25% threshold, reduced Codel interval. -+ -+.SH OTHER PARAMETERS -+.B memlimit -+LIMIT -+.br -+ Limit the memory consumed by Cake to LIMIT bytes. Note that this does -+not translate directly to queue size (so do not size this based on bandwidth -+delay product considerations, but rather on worst case acceptable memory -+consumption), as there is some overhead in the data structures containing the -+packets, especially for small packets. -+ -+ By default, the limit is calculated based on the bandwidth and RTT -+settings. -+ -+.PP -+.B wash -+ -+.br -+ Traffic entering your diffserv domain is frequently mis-marked in -+transit from the perspective of your network, and traffic exiting yours may be -+mis-marked from the perspective of the transiting provider. -+ -+Apply the wash option to clear all extra diffserv (but not ECN bits), after -+priority queuing has taken place. -+ -+If you are shaping inbound, and cannot trust the diffserv markings (as is the -+case for Comcast Cable, among others), it is best to use a single queue -+"besteffort" mode with wash. -+ -+.SH EXAMPLES -+# tc qdisc delete root dev eth0 -+.br -+# tc qdisc add root dev eth0 cake bandwidth 100Mbit ethernet -+.br -+# tc -s qdisc show dev eth0 -+.br -+qdisc cake 1: dev eth0 root refcnt 2 bandwidth 100Mbit diffserv3 triple-isolate rtt 100.0ms noatm overhead 38 mpu 84 -+ Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) -+ backlog 0b 0p requeues 0 -+ memory used: 0b of 5000000b -+ capacity estimate: 100Mbit -+ min/max network layer size: 65535 / 0 -+ min/max overhead-adjusted size: 65535 / 0 -+ average network hdr offset: 0 -+ -+ Bulk Best Effort Voice -+ thresh 6250Kbit 100Mbit 25Mbit -+ target 5.0ms 5.0ms 5.0ms -+ interval 100.0ms 100.0ms 100.0ms -+ pk_delay 0us 0us 0us -+ av_delay 0us 0us 0us -+ sp_delay 0us 0us 0us -+ pkts 0 0 0 -+ bytes 0 0 0 -+ way_inds 0 0 0 -+ way_miss 0 0 0 -+ way_cols 0 0 0 -+ drops 0 0 0 -+ marks 0 0 0 -+ ack_drop 0 0 0 -+ sp_flows 0 0 0 -+ bk_flows 0 0 0 -+ un_flows 0 0 0 -+ max_len 0 0 0 -+ quantum 300 1514 762 -+ -+After some use: -+.br -+# tc -s qdisc show dev eth0 -+ -+qdisc cake 1: root refcnt 2 bandwidth 100Mbit diffserv3 triple-isolate rtt 100.0ms noatm overhead 38 mpu 84 -+ Sent 44709231 bytes 31931 pkt (dropped 45, overlimits 93782 requeues 0) -+ backlog 33308b 22p requeues 0 -+ memory used: 292352b of 5000000b -+ capacity estimate: 100Mbit -+ min/max network layer size: 28 / 1500 -+ min/max overhead-adjusted size: 84 / 1538 -+ average network hdr offset: 14 -+ -+ Bulk Best Effort Voice -+ thresh 6250Kbit 100Mbit 25Mbit -+ target 5.0ms 5.0ms 5.0ms -+ interval 100.0ms 100.0ms 100.0ms -+ pk_delay 8.7ms 6.9ms 5.0ms -+ av_delay 4.9ms 5.3ms 3.8ms -+ sp_delay 727us 1.4ms 511us -+ pkts 2590 21271 8137 -+ bytes 3081804 30302659 11426206 -+ way_inds 0 46 0 -+ way_miss 3 17 4 -+ way_cols 0 0 0 -+ drops 20 15 10 -+ marks 0 0 0 -+ ack_drop 0 0 0 -+ sp_flows 2 4 1 -+ bk_flows 1 2 1 -+ un_flows 0 0 0 -+ max_len 1514 1514 1514 -+ quantum 300 1514 762 -+ -+.SH SEE ALSO -+.BR tc (8), -+.BR tc-codel (8), -+.BR tc-fq_codel (8), -+.BR tc-red (8) -+ -+.SH AUTHORS -+Cake's principal author is Jonathan Morton, with contributions from -+Tony Ambardar, Kevin Darbyshire-Bryant, Toke Høiland-Jørgensen, -+Sebastian Moeller, Ryan Mounce, Dean Scarff, Nils Andreas Svee, and Dave Täht. -+ -+This manual page was written by Loganaden Velvindron. Please report corrections -+to the Linux Networking mailing list . --- a/tc/Makefile +++ b/tc/Makefile -@@ -64,6 +64,7 @@ TCMODULES += em_meta.o - TCMODULES += q_mqprio.o - TCMODULES += q_codel.o +@@ -63,6 +63,7 @@ TCMODULES += q_codel.o TCMODULES += q_fq_codel.o -+TCMODULES += q_cake.o TCMODULES += q_fq.o TCMODULES += q_pie.o ++TCMODULES += q_cake.o TCMODULES += q_hhf.o + TCMODULES += e_bpf.o + --- /dev/null +++ b/tc/q_cake.c -@@ -0,0 +1,730 @@ -+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +@@ -0,0 +1,771 @@ +/* + * Common Applications Kept Enhanced -- CAKE + * -+ * Copyright (C) 2014-2018 Jonathan Morton -+ * Copyright (C) 2017-2018 Toke Høiland-Jørgensen ++ * Copyright (C) 2014-2015 Jonathan Morton ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions, and the following disclaimer, ++ * without modification. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. The names of the authors may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * Alternatively, provided that this notice is retained in full, this ++ * software may be distributed under the terms of the GNU General ++ * Public License ("GPL") version 2, in which case the provisions of the ++ * GPL apply INSTEAD OF those given above. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH ++ * DAMAGE. ++ * + */ + +#include @@ -787,46 +128,19 @@ +#include "utils.h" +#include "tc_util.h" + -+struct cake_preset { -+ char *name; -+ unsigned int target; -+ unsigned int interval; -+}; -+ -+static struct cake_preset presets[] = { -+ {"datacentre", 5, 100}, -+ {"lan", 50, 1000}, -+ {"metro", 500, 10000}, -+ {"regional", 1500, 30000}, -+ {"internet", 5000, 100000}, -+ {"oceanic", 15000, 300000}, -+ {"satellite", 50000, 1000000}, -+ {"interplanetary", 50000000, 1000000000}, -+}; -+ -+ -+static struct cake_preset *find_preset(char *argv) -+{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(presets); i++) -+ if (!strcmp(argv, presets[i].name)) -+ return &presets[i]; -+ return NULL; -+} -+ +static void explain(void) +{ + fprintf(stderr, +"Usage: ... cake [ bandwidth RATE | unlimited* | autorate_ingress ]\n" +" [ rtt TIME | datacentre | lan | metro | regional |\n" +" internet* | oceanic | satellite | interplanetary ]\n" -+" [ besteffort | diffserv8 | diffserv4 | diffserv3* ]\n" ++" [ besteffort | diffserv8 | diffserv4 | diffserv-llt |\n" ++" diffserv3* ]\n" +" [ flowblind | srchost | dsthost | hosts | flows |\n" +" dual-srchost | dual-dsthost | triple-isolate* ]\n" +" [ nat | nonat* ]\n" -+" [ wash | nowash* ]\n" -+" [ ack-filter | ack-filter-aggressive | no-ack-filter* ]\n" ++" [ wash | nowash * ]\n" ++" [ ack-filter | ack-filter-aggressive | no-ack-filter * ]\n" +" [ memlimit LIMIT ]\n" +" [ ptm | atm | noatm* ] [ overhead N | conservative | raw* ]\n" +" [ mpu N ] [ ingress | egress* ]\n" @@ -834,10 +148,10 @@ +} + +static int cake_parse_opt(struct qdisc_util *qu, int argc, char **argv, -+ struct nlmsghdr *n, const char *dev) ++ struct nlmsghdr *n) +{ + int unlimited = 0; -+ __u64 bandwidth = 0; ++ unsigned bandwidth = 0; + unsigned interval = 0; + unsigned target = 0; + unsigned diffserv = 0; @@ -854,12 +168,11 @@ + int ingress = -1; + int ack_filter = -1; + struct rtattr *tail; -+ struct cake_preset *preset, *preset_set = NULL; + + while (argc > 0) { + if (strcmp(*argv, "bandwidth") == 0) { + NEXT_ARG(); -+ if (get_rate64(&bandwidth, *argv)) { ++ if (get_rate(&bandwidth, *argv)) { + fprintf(stderr, "Illegal \"bandwidth\"\n"); + return -1; + } @@ -881,25 +194,45 @@ + target = interval / 20; + if(!target) + target = 1; -+ } else if ((preset = find_preset(*argv))) { -+ if (preset_set) -+ duparg(*argv, preset_set->name); -+ preset_set = preset; -+ target = preset->target; -+ interval = preset->interval; ++ } else if (strcmp(*argv, "datacentre") == 0) { ++ interval = 100; ++ target = 5; ++ } else if (strcmp(*argv, "lan") == 0) { ++ interval = 1000; ++ target = 50; ++ } else if (strcmp(*argv, "metro") == 0) { ++ interval = 10000; ++ target = 500; ++ } else if (strcmp(*argv, "regional") == 0) { ++ interval = 30000; ++ target = 1500; ++ } else if (strcmp(*argv, "internet") == 0) { ++ interval = 100000; ++ target = 5000; ++ } else if (strcmp(*argv, "oceanic") == 0) { ++ interval = 300000; ++ target = 15000; ++ } else if (strcmp(*argv, "satellite") == 0) { ++ interval = 1000000; ++ target = 50000; ++ } else if (strcmp(*argv, "interplanetary") == 0) { ++ interval = 3600000000U; ++ target = 5000; + + } else if (strcmp(*argv, "besteffort") == 0) { -+ diffserv = CAKE_DIFFSERV_BESTEFFORT; ++ diffserv = 1; + } else if (strcmp(*argv, "precedence") == 0) { -+ diffserv = CAKE_DIFFSERV_PRECEDENCE; ++ diffserv = 2; + } else if (strcmp(*argv, "diffserv8") == 0) { -+ diffserv = CAKE_DIFFSERV_DIFFSERV8; ++ diffserv = 3; + } else if (strcmp(*argv, "diffserv4") == 0) { -+ diffserv = CAKE_DIFFSERV_DIFFSERV4; ++ diffserv = 4; + } else if (strcmp(*argv, "diffserv") == 0) { -+ diffserv = CAKE_DIFFSERV_DIFFSERV4; ++ diffserv = 4; ++ } else if (strcmp(*argv, "diffserv-llt") == 0) { ++ diffserv = 5; + } else if (strcmp(*argv, "diffserv3") == 0) { -+ diffserv = CAKE_DIFFSERV_DIFFSERV3; ++ diffserv = 6; + + } else if (strcmp(*argv, "nowash") == 0) { + wash = 0; @@ -907,21 +240,21 @@ + wash = 1; + + } else if (strcmp(*argv, "flowblind") == 0) { -+ flowmode = CAKE_FLOW_NONE; ++ flowmode = 0; + } else if (strcmp(*argv, "srchost") == 0) { -+ flowmode = CAKE_FLOW_SRC_IP; ++ flowmode = 1; + } else if (strcmp(*argv, "dsthost") == 0) { -+ flowmode = CAKE_FLOW_DST_IP; ++ flowmode = 2; + } else if (strcmp(*argv, "hosts") == 0) { -+ flowmode = CAKE_FLOW_HOSTS; ++ flowmode = 3; + } else if (strcmp(*argv, "flows") == 0) { -+ flowmode = CAKE_FLOW_FLOWS; ++ flowmode = 4; + } else if (strcmp(*argv, "dual-srchost") == 0) { -+ flowmode = CAKE_FLOW_DUAL_SRC; ++ flowmode = 5; + } else if (strcmp(*argv, "dual-dsthost") == 0) { -+ flowmode = CAKE_FLOW_DUAL_DST; ++ flowmode = 6; + } else if (strcmp(*argv, "triple-isolate") == 0) { -+ flowmode = CAKE_FLOW_TRIPLE; ++ flowmode = 7; + + } else if (strcmp(*argv, "nat") == 0) { + nat = 1; @@ -929,14 +262,14 @@ + nat = 0; + + } else if (strcmp(*argv, "ptm") == 0) { -+ atm = CAKE_ATM_PTM; ++ atm = 2; + } else if (strcmp(*argv, "atm") == 0) { -+ atm = CAKE_ATM_ATM; ++ atm = 1; + } else if (strcmp(*argv, "noatm") == 0) { -+ atm = CAKE_ATM_NONE; ++ atm = 0; + + } else if (strcmp(*argv, "raw") == 0) { -+ atm = CAKE_ATM_NONE; ++ atm = 0; + overhead = 0; + overhead_set = true; + overhead_override = true; @@ -946,41 +279,41 @@ + * one whole ATM cell plus ATM framing. + * A safe choice if the actual overhead is unknown. + */ -+ atm = CAKE_ATM_ATM; ++ atm = 1; + overhead = 48; + overhead_set = true; + + /* Various ADSL framing schemes, all over ATM cells */ + } else if (strcmp(*argv, "ipoa-vcmux") == 0) { -+ atm = CAKE_ATM_ATM; ++ atm = 1; + overhead += 8; + overhead_set = true; + } else if (strcmp(*argv, "ipoa-llcsnap") == 0) { -+ atm = CAKE_ATM_ATM; ++ atm = 1; + overhead += 16; + overhead_set = true; + } else if (strcmp(*argv, "bridged-vcmux") == 0) { -+ atm = CAKE_ATM_ATM; ++ atm = 1; + overhead += 24; + overhead_set = true; + } else if (strcmp(*argv, "bridged-llcsnap") == 0) { -+ atm = CAKE_ATM_ATM; ++ atm = 1; + overhead += 32; + overhead_set = true; + } else if (strcmp(*argv, "pppoa-vcmux") == 0) { -+ atm = CAKE_ATM_ATM; ++ atm = 1; + overhead += 10; + overhead_set = true; + } else if (strcmp(*argv, "pppoa-llc") == 0) { -+ atm = CAKE_ATM_ATM; ++ atm = 1; + overhead += 14; + overhead_set = true; + } else if (strcmp(*argv, "pppoe-vcmux") == 0) { -+ atm = CAKE_ATM_ATM; ++ atm = 1; + overhead += 32; + overhead_set = true; + } else if (strcmp(*argv, "pppoe-llcsnap") == 0) { -+ atm = CAKE_ATM_ATM; ++ atm = 1; + overhead += 40; + overhead_set = true; + @@ -992,7 +325,7 @@ + * + 1B Start of Frame (S) + 1B End of Frame (Ck) + * + 2B TC-CRC (PTM-FCS) = 30B + */ -+ atm = CAKE_ATM_PTM; ++ atm = 2; + overhead += 30; + overhead_set = true; + } else if (strcmp(*argv, "bridged-ptm") == 0) { @@ -1001,7 +334,7 @@ + * + 1B Start of Frame (S) + 1B End of Frame (Ck) + * + 2B TC-CRC (PTM-FCS) = 22B + */ -+ atm = CAKE_ATM_PTM; ++ atm = 2; + overhead += 22; + overhead_set = true; + @@ -1019,6 +352,23 @@ + * active. + */ + ++ } else if (strcmp(*argv, "total_overhead") == 0) { ++ /* ++ * This is the overhead cake accounts for; added here so ++ * that cake's "tc -s qdisc" output can be directly ++ * pasted into the tc command to instantate a new cake.. ++ */ ++ NEXT_ARG(); ++ ++ } else if (strcmp(*argv, "hard_header_len") == 0) { ++ /* ++ * This is the overhead the kernel automatically ++ * accounted for; added here so that cake's "tc -s ++ * qdisc" output can be directly pasted into the tc ++ * command to instantiate a new cake.. ++ */ ++ NEXT_ARG(); ++ + } else if (strcmp(*argv, "ethernet") == 0) { + /* ethernet pre-amble & interframe gap & FCS + * you may need to add vlan tag */ @@ -1037,7 +387,7 @@ + * but not interframe gap or preamble. + */ + } else if (strcmp(*argv, "docsis") == 0) { -+ atm = CAKE_ATM_NONE; ++ atm = 0; + overhead += 18; + overhead_set = true; + mpu = 64; @@ -1067,11 +417,11 @@ + ingress = 0; + + } else if (strcmp(*argv, "no-ack-filter") == 0) { -+ ack_filter = CAKE_ACK_NONE; ++ ack_filter = 0; + } else if (strcmp(*argv, "ack-filter") == 0) { -+ ack_filter = CAKE_ACK_FILTER; ++ ack_filter = 0x0200; + } else if (strcmp(*argv, "ack-filter-aggressive") == 0) { -+ ack_filter = CAKE_ACK_AGGRESSIVE; ++ ack_filter = 0x0600; + + } else if (strcmp(*argv, "memlimit") == 0) { + NEXT_ARG(); @@ -1094,7 +444,7 @@ + tail = NLMSG_TAIL(n); + addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); + if (bandwidth || unlimited) -+ addattr_l(n, 1024, TCA_CAKE_BASE_RATE64, &bandwidth, sizeof(bandwidth)); ++ addattr_l(n, 1024, TCA_CAKE_BASE_RATE, &bandwidth, sizeof(bandwidth)); + if (diffserv) + addattr_l(n, 1024, TCA_CAKE_DIFFSERV_MODE, &diffserv, sizeof(diffserv)); + if (atm != -1) @@ -1105,7 +455,7 @@ + addattr_l(n, 1024, TCA_CAKE_OVERHEAD, &overhead, sizeof(overhead)); + if (overhead_override) { + unsigned zero = 0; -+ addattr_l(n, 1024, TCA_CAKE_RAW, &zero, sizeof(zero)); ++ addattr_l(n, 1024, TCA_CAKE_ETHERNET, &zero, sizeof(zero)); + } + if (mpu > 0) + addattr_l(n, 1024, TCA_CAKE_MPU, &mpu, sizeof(mpu)); @@ -1134,13 +484,13 @@ +static int cake_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) +{ + struct rtattr *tb[TCA_CAKE_MAX + 1]; -+ __u64 bandwidth = 0; ++ unsigned bandwidth = 0; + unsigned diffserv = 0; + unsigned flowmode = 0; + unsigned interval = 0; + unsigned memlimit = 0; + int overhead = 0; -+ int raw = 0; ++ int ethernet = 0; + int mpu = 0; + int atm = 0; + int nat = 0; @@ -1148,7 +498,6 @@ + int wash = 0; + int ingress = 0; + int ack_filter = 0; -+ int split_gso = 0; + SPRINT_BUF(b1); + SPRINT_BUF(b2); + @@ -1157,89 +506,87 @@ + + parse_rtattr_nested(tb, TCA_CAKE_MAX, opt); + -+ if (tb[TCA_CAKE_BASE_RATE64] && -+ RTA_PAYLOAD(tb[TCA_CAKE_BASE_RATE64]) >= sizeof(bandwidth)) { -+ bandwidth = rta_getattr_u64(tb[TCA_CAKE_BASE_RATE64]); -+ if(bandwidth) { ++ if (tb[TCA_CAKE_BASE_RATE] && ++ RTA_PAYLOAD(tb[TCA_CAKE_BASE_RATE]) >= sizeof(__u32)) { ++ bandwidth = rta_getattr_u32(tb[TCA_CAKE_BASE_RATE]); ++ if(bandwidth) + fprintf(f, "bandwidth %s ", sprint_rate(bandwidth, b1)); -+ } else ++ else + fprintf(f, "unlimited "); + } + if (tb[TCA_CAKE_AUTORATE] && + RTA_PAYLOAD(tb[TCA_CAKE_AUTORATE]) >= sizeof(__u32)) { + autorate = rta_getattr_u32(tb[TCA_CAKE_AUTORATE]); + if(autorate == 1) -+ fprintf(f, "ingress"); ++ fprintf(f, "autorate_ingress "); + else if(autorate) -+ fprintf(f, "unknown"); ++ fprintf(f, "(?autorate?) "); + } + if (tb[TCA_CAKE_DIFFSERV_MODE] && + RTA_PAYLOAD(tb[TCA_CAKE_DIFFSERV_MODE]) >= sizeof(__u32)) { + diffserv = rta_getattr_u32(tb[TCA_CAKE_DIFFSERV_MODE]); + switch(diffserv) { -+ case CAKE_DIFFSERV_DIFFSERV3: -+ fprintf(f, "diffserv3 "); ++ case 1: ++ fprintf(f, "besteffort "); + break; -+ case CAKE_DIFFSERV_DIFFSERV4: -+ fprintf(f, "diffserv4 "); ++ case 2: ++ fprintf(f, "precedence "); + break; -+ case CAKE_DIFFSERV_DIFFSERV8: ++ case 3: + fprintf(f, "diffserv8 "); + break; -+ case CAKE_DIFFSERV_BESTEFFORT: -+ fprintf(f, "besteffort "); ++ case 4: ++ fprintf(f, "diffserv4 "); + break; -+ case CAKE_DIFFSERV_PRECEDENCE: -+ fprintf(f, "precedence "); ++ case 5: ++ fprintf(f, "diffserv-llt "); ++ break; ++ case 6: ++ fprintf(f, "diffserv3 "); + break; + default: -+ fprintf(f, "unknown "); ++ fprintf(f, "(?diffserv?) "); + break; + }; + } + if (tb[TCA_CAKE_FLOW_MODE] && + RTA_PAYLOAD(tb[TCA_CAKE_FLOW_MODE]) >= sizeof(__u32)) { + flowmode = rta_getattr_u32(tb[TCA_CAKE_FLOW_MODE]); ++ nat = !!(flowmode & 64); ++ flowmode &= ~64; + switch(flowmode) { -+ case CAKE_FLOW_NONE: ++ case 0: + fprintf(f, "flowblind "); + break; -+ case CAKE_FLOW_SRC_IP: ++ case 1: + fprintf(f, "srchost "); + break; -+ case CAKE_FLOW_DST_IP: ++ case 2: + fprintf(f, "dsthost "); + break; -+ case CAKE_FLOW_HOSTS: ++ case 3: + fprintf(f, "hosts "); + break; -+ case CAKE_FLOW_FLOWS: ++ case 4: + fprintf(f, "flows "); + break; -+ case CAKE_FLOW_DUAL_SRC: ++ case 5: + fprintf(f, "dual-srchost "); + break; -+ case CAKE_FLOW_DUAL_DST: ++ case 6: + fprintf(f, "dual-dsthost "); + break; -+ case CAKE_FLOW_TRIPLE: ++ case 7: + fprintf(f, "triple-isolate "); + break; + default: -+ fprintf(f, "unknown "); ++ fprintf(f, "(?flowmode?) "); + break; + }; + ++ if(nat) ++ fprintf(f, "nat "); + } -+ -+ if (tb[TCA_CAKE_NAT] && -+ RTA_PAYLOAD(tb[TCA_CAKE_NAT]) >= sizeof(__u32)) { -+ nat = rta_getattr_u32(tb[TCA_CAKE_NAT]); -+ } -+ -+ if(nat) -+ fprintf(f, "nat "); -+ + if (tb[TCA_CAKE_WASH] && + RTA_PAYLOAD(tb[TCA_CAKE_WASH]) >= sizeof(__u32)) { + wash = rta_getattr_u32(tb[TCA_CAKE_WASH]); @@ -1249,8 +596,8 @@ + atm = rta_getattr_u32(tb[TCA_CAKE_ATM]); + } + if (tb[TCA_CAKE_OVERHEAD] && -+ RTA_PAYLOAD(tb[TCA_CAKE_OVERHEAD]) >= sizeof(__s32)) { -+ overhead = *(__s32 *) RTA_DATA(tb[TCA_CAKE_OVERHEAD]); ++ RTA_PAYLOAD(tb[TCA_CAKE_OVERHEAD]) >= sizeof(__u32)) { ++ overhead = rta_getattr_u32(tb[TCA_CAKE_OVERHEAD]); + } + if (tb[TCA_CAKE_MPU] && + RTA_PAYLOAD(tb[TCA_CAKE_MPU]) >= sizeof(__u32)) { @@ -1264,12 +611,9 @@ + RTA_PAYLOAD(tb[TCA_CAKE_ACK_FILTER]) >= sizeof(__u32)) { + ack_filter = rta_getattr_u32(tb[TCA_CAKE_ACK_FILTER]); + } -+ if (tb[TCA_CAKE_SPLIT_GSO] && -+ RTA_PAYLOAD(tb[TCA_CAKE_SPLIT_GSO]) >= sizeof(__u32)) { -+ split_gso = rta_getattr_u32(tb[TCA_CAKE_SPLIT_GSO]); -+ } -+ if (tb[TCA_CAKE_RAW]) { -+ raw = 1; ++ if (tb[TCA_CAKE_ETHERNET] && ++ RTA_PAYLOAD(tb[TCA_CAKE_ETHERNET]) >= sizeof(__u32)) { ++ ethernet = rta_getattr_u32(tb[TCA_CAKE_ETHERNET]); + } + if (tb[TCA_CAKE_RTT] && + RTA_PAYLOAD(tb[TCA_CAKE_RTT]) >= sizeof(__u32)) { @@ -1277,42 +621,50 @@ + } + + if (wash) -+ fprintf(f, "wash "); ++ fprintf(f,"wash "); + + if (ingress) -+ fprintf(f, "ingress "); ++ fprintf(f,"ingress "); + -+ if (ack_filter == CAKE_ACK_AGGRESSIVE) -+ fprintf(f, "ack-filter-aggresssive "); -+ else if (ack_filter == CAKE_ACK_FILTER) -+ fprintf(f, "ack-filter "); -+ else -+ fprintf(f, "no-ack-filter "); -+ -+ if (split_gso) -+ fprintf(f, "split-gso "); ++ if (ack_filter == 0x0600) ++ fprintf(f,"ack-filter-aggressive "); ++ else if (ack_filter) ++ fprintf(f,"ack-filter "); + + if (interval) + fprintf(f, "rtt %s ", sprint_time(interval, b2)); + -+ if (raw) ++ if (!atm && overhead == ethernet) { + fprintf(f, "raw "); ++ } else { ++ if (atm == 1) ++ fprintf(f, "atm "); ++ else if (atm == 2) ++ fprintf(f, "ptm "); ++ else ++ fprintf(f, "noatm "); ++ ++ fprintf(f, "overhead %d ", overhead); ++ ++ /* This is actually the *amount* of automatic compensation, but ++ * we only report its presence as a boolean for now. ++ */ ++ if (ethernet) ++ fprintf(f, "via-ethernet "); ++ } + -+ if (atm == CAKE_ATM_ATM) -+ fprintf(f, "atm "); -+ else if (atm == CAKE_ATM_PTM) -+ fprintf(f, "ptm "); -+ else if (!raw) -+ fprintf(f, "noatm "); -+ -+ fprintf(f, "overhead %d ", overhead); ++ /* unconditionally report the overhead and hard_header_len overhead the ++ * kernel added automatically ++ */ ++ fprintf(f, "total_overhead %d ", overhead); ++ fprintf(f, "hard_header_len %d ", ethernet); + -+ if (mpu) -+ fprintf(f, "mpu %u ", mpu); ++ if (mpu) { ++ fprintf(f, "mpu %d ", mpu); ++ } + -+ if (memlimit) { ++ if (memlimit) + fprintf(f, "memlimit %s", sprint_size(memlimit, b1)); -+ } + + return 0; +} @@ -1320,108 +672,58 @@ +static int cake_print_xstats(struct qdisc_util *qu, FILE *f, + struct rtattr *xstats) +{ ++ /* fq_codel stats format borrowed */ ++ struct tc_fq_codel_xstats *st; ++ struct tc_cake_xstats *stnc; + SPRINT_BUF(b1); -+ struct rtattr *st[TCA_CAKE_STATS_MAX + 1]; -+ int i; ++ SPRINT_BUF(b2); + + if (xstats == NULL) + return 0; + -+#define GET_STAT_U32(attr) rta_getattr_u32(st[TCA_CAKE_STATS_ ## attr]) -+#define GET_STAT_S32(attr) (*(__s32*)RTA_DATA(st[TCA_CAKE_STATS_ ## attr])) -+#define GET_STAT_U64(attr) rta_getattr_u64(st[TCA_CAKE_STATS_ ## attr]) -+ -+ parse_rtattr_nested(st, TCA_CAKE_STATS_MAX, xstats); -+ -+ if (st[TCA_CAKE_STATS_MEMORY_USED] && -+ st[TCA_CAKE_STATS_MEMORY_LIMIT]) { -+ fprintf(f, " memory used: %s", -+ sprint_size(GET_STAT_U32(MEMORY_USED), b1)); -+ -+ fprintf(f, " of %s\n", -+ sprint_size(GET_STAT_U32(MEMORY_LIMIT), b1)); -+ } -+ -+ if (st[TCA_CAKE_STATS_CAPACITY_ESTIMATE64]) { -+ fprintf(f, " capacity estimate: %s\n", -+ sprint_rate(GET_STAT_U64(CAPACITY_ESTIMATE64), b1)); -+ } -+ -+ if (st[TCA_CAKE_STATS_MIN_NETLEN] && -+ st[TCA_CAKE_STATS_MAX_NETLEN]) { -+ fprintf(f, " min/max network layer size: %8u", -+ GET_STAT_U32(MIN_NETLEN)); -+ fprintf(f, " /%8u\n", GET_STAT_U32(MAX_NETLEN)); -+ } -+ -+ if (st[TCA_CAKE_STATS_MIN_ADJLEN] && -+ st[TCA_CAKE_STATS_MAX_ADJLEN]) { -+ fprintf(f, " min/max overhead-adjusted size: %8u", -+ GET_STAT_U32(MIN_ADJLEN)); -+ fprintf(f, " /%8u\n", GET_STAT_U32(MAX_ADJLEN)); -+ } -+ -+ if (st[TCA_CAKE_STATS_AVG_NETOFF]) -+ fprintf(f, " average network hdr offset: %8u\n\n", -+ GET_STAT_U32(AVG_NETOFF)); -+ -+ /* class stats */ -+ if (st[TCA_CAKE_STATS_DEFICIT]) -+ fprintf(f, "deficit %u", -+ GET_STAT_S32(DEFICIT)); -+ if (st[TCA_CAKE_STATS_COBALT_COUNT]) -+ fprintf(f, "count %u", -+ GET_STAT_U32(COBALT_COUNT)); -+ -+ if (st[TCA_CAKE_STATS_DROPPING] && GET_STAT_U32(DROPPING)) { -+ fprintf(f, " dropping"); -+ if (st[TCA_CAKE_STATS_DROP_NEXT_US]) { -+ int drop_next = GET_STAT_S32(DROP_NEXT_US); -+ if (drop_next < 0) { ++ if (RTA_PAYLOAD(xstats) < sizeof(st->type)) ++ return -1; ++ ++ st = RTA_DATA(xstats); ++ stnc = RTA_DATA(xstats); ++ ++ if (st->type == TCA_FQ_CODEL_XSTATS_QDISC && RTA_PAYLOAD(xstats) >= sizeof(*st)) { ++ fprintf(f, " maxpacket %u drop_overlimit %u new_flow_count %u ecn_mark %u", ++ st->qdisc_stats.maxpacket, ++ st->qdisc_stats.drop_overlimit, ++ st->qdisc_stats.new_flow_count, ++ st->qdisc_stats.ecn_mark); ++ fprintf(f, "\n new_flows_len %u old_flows_len %u", ++ st->qdisc_stats.new_flows_len, ++ st->qdisc_stats.old_flows_len); ++ } else if (st->type == TCA_FQ_CODEL_XSTATS_CLASS && RTA_PAYLOAD(xstats) >= sizeof(*st)) { ++ fprintf(f, " deficit %d count %u lastcount %u ldelay %s", ++ st->class_stats.deficit, ++ st->class_stats.count, ++ st->class_stats.lastcount, ++ sprint_time(st->class_stats.ldelay, b1)); ++ if (st->class_stats.dropping) { ++ fprintf(f, " dropping"); ++ if (st->class_stats.drop_next < 0) + fprintf(f, " drop_next -%s", -+ sprint_time(drop_next, b1)); -+ } else { ++ sprint_time(-st->class_stats.drop_next, b1)); ++ else + fprintf(f, " drop_next %s", -+ sprint_time(drop_next, b1)); -+ } -+ } -+ } -+ -+ if (st[TCA_CAKE_STATS_P_DROP]) { -+ fprintf(f, " blue_prob %u", -+ GET_STAT_U32(P_DROP)); -+ if (st[TCA_CAKE_STATS_BLUE_TIMER_US]) { -+ int blue_timer = GET_STAT_S32(BLUE_TIMER_US); -+ if (blue_timer < 0) { -+ fprintf(f, " blue_timer -%s", -+ sprint_time(blue_timer, b1)); -+ } else { -+ fprintf(f, " blue_timer %s", -+ sprint_time(blue_timer, b1)); -+ } ++ sprint_time(st->class_stats.drop_next, b1)); + } -+ } -+ -+#undef GET_STAT_U32 -+#undef GET_STAT_S32 -+#undef GET_STAT_U64 -+ -+ if (st[TCA_CAKE_STATS_TIN_STATS]) { -+ struct rtattr *tins[TC_CAKE_MAX_TINS + 1]; -+ struct rtattr *tstat[TC_CAKE_MAX_TINS][TCA_CAKE_TIN_STATS_MAX + 1]; -+ int num_tins = 0; ++ } else if (stnc->version >= 1 && stnc->version < 0xFF ++ && stnc->max_tins == TC_CAKE_MAX_TINS ++ && RTA_PAYLOAD(xstats) >= offsetof(struct tc_cake_xstats, capacity_estimate)) ++ { ++ int i; + -+ parse_rtattr_nested(tins, TC_CAKE_MAX_TINS, st[TCA_CAKE_STATS_TIN_STATS]); ++ if(stnc->version >= 3) ++ fprintf(f, " memory used: %s of %s\n", sprint_size(stnc->memory_used, b1), sprint_size(stnc->memory_limit, b2)); + -+ for (i = 1; i <= TC_CAKE_MAX_TINS && tins[i]; i++) { -+ parse_rtattr_nested(tstat[i-1], TCA_CAKE_TIN_STATS_MAX, tins[i]); -+ num_tins++; -+ } -+ -+ if (!num_tins) -+ return 0; ++ if(stnc->version >= 2) ++ fprintf(f, " capacity estimate: %s\n", sprint_rate(stnc->capacity_estimate, b1)); + -+ switch(num_tins) { ++ switch(stnc->tin_cnt) { + case 3: + fprintf(f, " Bulk Best Effort Voice\n"); + break; @@ -1430,61 +732,112 @@ + fprintf(f, " Bulk Best Effort Video Voice\n"); + break; + ++ case 5: ++ fprintf(f, " Low Loss Best Effort Low Delay Bulk Net Control\n"); ++ break; ++ + default: + fprintf(f, " "); -+ for(i=0; i < num_tins; i++) ++ for(i=0; i < stnc->tin_cnt; i++) + fprintf(f, " Tin %u", i); + fprintf(f, "\n"); + }; + -+#define GET_TSTAT(i, attr) (tstat[i][TCA_CAKE_TIN_STATS_ ## attr]) -+#define PRINT_TSTAT(name, attr, fmts, val) do { \ -+ if (GET_TSTAT(0, attr)) { \ -+ fprintf(f, name); \ -+ for (i = 0; i < num_tins; i++) \ -+ fprintf(f, " %12" fmts, val); \ -+ fprintf(f, "\n"); \ -+ } \ -+ } while (0) -+ -+#define SPRINT_TSTAT(pfunc, type, name, attr) PRINT_TSTAT( \ -+ name, attr, "s", sprint_ ## pfunc( \ -+ rta_getattr_ ## type(GET_TSTAT(i, attr)), b1)) -+ -+#define PRINT_TSTAT_U32(name, attr) PRINT_TSTAT( \ -+ name, attr, "u", rta_getattr_u32(GET_TSTAT(i, attr))) -+ -+#define PRINT_TSTAT_U64(name, attr) PRINT_TSTAT( \ -+ name, attr, "llu", rta_getattr_u64(GET_TSTAT(i, attr))) -+ -+ SPRINT_TSTAT(rate, u64, " thresh ", THRESHOLD_RATE64); -+ SPRINT_TSTAT(time, u32, " target ", TARGET_US); -+ SPRINT_TSTAT(time, u32, " interval", INTERVAL_US); -+ SPRINT_TSTAT(time, u32, " pk_delay", PEAK_DELAY_US); -+ SPRINT_TSTAT(time, u32, " av_delay", AVG_DELAY_US); -+ SPRINT_TSTAT(time, u32, " sp_delay", BASE_DELAY_US); -+ SPRINT_TSTAT(size, u32, " backlog ", BACKLOG_BYTES); -+ -+ PRINT_TSTAT_U32(" pkts ", SENT_PACKETS); -+ PRINT_TSTAT_U64(" bytes ", SENT_BYTES64); -+ -+ PRINT_TSTAT_U32(" way_inds", WAY_INDIRECT_HITS); -+ PRINT_TSTAT_U32(" way_miss", WAY_MISSES); -+ PRINT_TSTAT_U32(" way_cols", WAY_COLLISIONS); -+ PRINT_TSTAT_U32(" drops ", DROPPED_PACKETS); -+ PRINT_TSTAT_U32(" marks ", ECN_MARKED_PACKETS); -+ PRINT_TSTAT_U32(" ack_drop", ACKS_DROPPED_PACKETS); -+ PRINT_TSTAT_U32(" sp_flows", SPARSE_FLOWS); -+ PRINT_TSTAT_U32(" bk_flows", BULK_FLOWS); -+ PRINT_TSTAT_U32(" un_flows", UNRESPONSIVE_FLOWS); -+ PRINT_TSTAT_U32(" max_len ", MAX_SKBLEN); -+ PRINT_TSTAT_U32(" quantum ", FLOW_QUANTUM); -+ -+#undef GET_STAT -+#undef PRINT_TSTAT -+#undef SPRINT_TSTAT -+#undef PRINT_TSTAT_U32 -+#undef PRINT_TSTAT_U64 ++ fprintf(f, " thresh "); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12s", sprint_rate(stnc->threshold_rate[i], b1)); ++ fprintf(f, "\n"); ++ ++ fprintf(f, " target "); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12s", sprint_time(stnc->target_us[i], b1)); ++ fprintf(f, "\n"); ++ ++ fprintf(f, " interval"); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12s", sprint_time(stnc->interval_us[i], b1)); ++ fprintf(f, "\n"); ++ ++ fprintf(f, " pk_delay"); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12s", sprint_time(stnc->peak_delay_us[i], b1)); ++ fprintf(f, "\n"); ++ ++ fprintf(f, " av_delay"); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12s", sprint_time(stnc->avge_delay_us[i], b1)); ++ fprintf(f, "\n"); ++ ++ fprintf(f, " sp_delay"); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12s", sprint_time(stnc->base_delay_us[i], b1)); ++ fprintf(f, "\n"); ++ ++ fprintf(f, " pkts "); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12u", stnc->sent[i].packets); ++ fprintf(f, "\n"); ++ ++ fprintf(f, " bytes "); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12llu", stnc->sent[i].bytes); ++ fprintf(f, "\n"); ++ ++ fprintf(f, " way_inds"); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12u", stnc->way_indirect_hits[i]); ++ fprintf(f, "\n"); ++ ++ fprintf(f, " way_miss"); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12u", stnc->way_misses[i]); ++ fprintf(f, "\n"); ++ ++ fprintf(f, " way_cols"); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12u", stnc->way_collisions[i]); ++ fprintf(f, "\n"); ++ ++ fprintf(f, " drops "); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12u", stnc->dropped[i].packets); ++ fprintf(f, "\n"); ++ ++ fprintf(f, " marks "); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12u", stnc->ecn_marked[i].packets); ++ fprintf(f, "\n"); ++ ++ if(stnc->version >= 5) { ++ fprintf(f, " ack_drop"); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12u", stnc->ack_drops[i].packets); ++ fprintf(f, "\n"); ++ } ++ ++ fprintf(f, " sp_flows"); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12u", stnc->sparse_flows[i]); ++ fprintf(f, "\n"); ++ ++ fprintf(f, " bk_flows"); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12u", stnc->bulk_flows[i]); ++ fprintf(f, "\n"); ++ ++ if(stnc->version >= 4) { ++ fprintf(f, " un_flows"); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12u", stnc->unresponse_flows[i]); ++ fprintf(f, "\n"); ++ } ++ ++ fprintf(f, " max_len "); ++ for(i=0; i < stnc->tin_cnt; i++) ++ fprintf(f, " %12u", stnc->max_skblen[i]); ++ fprintf(f, "\n"); ++ } else { ++ return -1; + } + return 0; +}