From: Florian Fainelli Date: Wed, 13 Jun 2007 19:12:44 +0000 (+0000) Subject: Really apply the esfq patch X-Git-Tag: reboot~28939 X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=1c7ab50199366c5b455faccd93c7907e30a78742;p=openwrt%2Fstaging%2Fxback.git Really apply the esfq patch SVN-Revision: 7623 --- diff --git a/target/linux/brcm-2.4/config/default b/target/linux/brcm-2.4/config/default index 504acb66d4..ff3d183975 100644 --- a/target/linux/brcm-2.4/config/default +++ b/target/linux/brcm-2.4/config/default @@ -182,6 +182,7 @@ CONFIG_MTD_SFLASH=y # CONFIG_NETROM is not set CONFIG_NET_PCI=y # CONFIG_NET_PCMCIA is not set +CONFIG_NET_SCH_ESFQ=m CONFIG_NET_WIRELESS=y CONFIG_NEW_IRQ=y CONFIG_NEW_TIME_C=y diff --git a/target/linux/generic-2.4/patches/621-tc_esfq.patch b/target/linux/generic-2.4/patches/621-tc_esfq.patch index d342042c4c..f4b9a6edda 100644 --- a/target/linux/generic-2.4/patches/621-tc_esfq.patch +++ b/target/linux/generic-2.4/patches/621-tc_esfq.patch @@ -1,747 +1,743 @@ -diff -urN target.old/linux/generic-2.4/patches/232-esfq_kmod.patch target/linux/generic-2.4/patches/232-esfq_kmod.patch ---- target.old/linux/generic-2.4/patches/232-esfq_kmod.patch 1969-12-31 19:00:00.000000000 -0500 -+++ target/linux/generic-2.4/patches/232-esfq_kmod.patch 2007-06-08 02:35:06.000000000 -0400 -@@ -0,0 +1,743 @@ -+diff -urN linux-2.4.34/Documentation/Configure.help linux-2.4.34/Documentation/Configure.help -+--- linux-2.4.34/Documentation/Configure.help 2007-05-10 19:37:42.000000000 -0400 -++++ linux-2.4.34/Documentation/Configure.help 2007-05-10 19:49:49.000000000 -0400 -+@@ -11127,6 +11127,24 @@ -+ whenever you want). If you want to compile it as a module, say M -+ here and read . -+ -++ESFQ queue -++CONFIG_NET_SCH_ESFQ -++ Say Y here if you want to use the Stochastic Fairness Queueing (SFQ) -++ packet scheduling algorithm for some of your network devices or as a -++ leaf discipline for the CBQ scheduling algorithm (see the top of -++ for details and references about the SFQ -++ algorithm). -++ -++ This is an enchanced SFQ version which allows you to control the -++ hardcoded values in the SFQ scheduler: queue depth, hash table size, -++ queues limit. Also adds control to the hash function used to identify -++ packet flows. Hash by src or dst ip and original sfq hash. -++ -++ This code is also available as a module called sch_esfq.o ( = code -++ which can be inserted in and removed from the running kernel -++ whenever you want). If you want to compile it as a module, say M -++ here and read . -++ -+ CSZ packet scheduler -+ CONFIG_NET_SCH_CSZ -+ Say Y here if you want to use the Clark-Shenker-Zhang (CSZ) packet -+diff -urN linux-2.4.34/include/linux/pkt_sched.h linux-2.4.34/include/linux/pkt_sched.h -+--- linux-2.4.34/include/linux/pkt_sched.h 2007-05-10 19:38:19.000000000 -0400 -++++ linux-2.4.34/include/linux/pkt_sched.h 2007-05-10 19:53:59.000000000 -0400 -+@@ -173,8 +173,36 @@ -+ * -+ * The only reason for this is efficiency, it is possible -+ * to change these parameters in compile time. -++ * -++ * If you need to play with these values use esfq instead. -+ */ -+ -++/* ESFQ section */ -++ -++enum -++{ -++ /* traditional */ -++ TCA_SFQ_HASH_CLASSIC, -++ TCA_SFQ_HASH_DST, -++ TCA_SFQ_HASH_SRC, -++ /* conntrack */ -++ TCA_SFQ_HASH_CTORIGDST, -++ TCA_SFQ_HASH_CTORIGSRC, -++ TCA_SFQ_HASH_CTREPLDST, -++ TCA_SFQ_HASH_CTREPLSRC, -++ TCA_SFQ_HASH_CTNATCHG, -++}; -++ -++struct tc_esfq_qopt -++{ -++ unsigned quantum; /* Bytes per round allocated to flow */ -++ int perturb_period; /* Period of hash perturbation */ -++ __u32 limit; /* Maximal packets in queue */ -++ unsigned divisor; /* Hash divisor */ -++ unsigned flows; /* Maximal number of flows */ -++ unsigned hash_kind; /* Hash function to use for flow identification */ -++}; -++ -+ /* RED section */ -+ -+ enum -+diff -urN linux-2.4.34/net/sched/Config.in linux-2.4.34/net/sched/Config.in -+--- linux-2.4.34/net/sched/Config.in 2007-05-10 19:38:31.000000000 -0400 -++++ linux-2.4.34/net/sched/Config.in 2007-05-10 19:54:45.000000000 -0400 -+@@ -12,6 +12,7 @@ -+ tristate ' The simplest PRIO pseudoscheduler' CONFIG_NET_SCH_PRIO -+ tristate ' RED queue' CONFIG_NET_SCH_RED -+ tristate ' SFQ queue' CONFIG_NET_SCH_SFQ -++tristate ' ESFQ queue' CONFIG_NET_SCH_ESFQ -+ tristate ' TEQL queue' CONFIG_NET_SCH_TEQL -+ tristate ' TBF queue' CONFIG_NET_SCH_TBF -+ tristate ' GRED queue' CONFIG_NET_SCH_GRED -+diff -urN linux-2.4.34/net/sched/Makefile linux-2.4.34/net/sched/Makefile -+--- linux-2.4.34/net/sched/Makefile 2007-05-10 19:38:31.000000000 -0400 -++++ linux-2.4.34/net/sched/Makefile 2007-05-10 19:55:13.000000000 -0400 -+@@ -19,6 +19,7 @@ -+ obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o -+ obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o -+ obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o -++obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o -+ obj-$(CONFIG_NET_SCH_RED) += sch_red.o -+ obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o -+ obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o -+diff -urN linux-2.4.34/net/sched/sch_esfq.c linux-2.4.34/net/sched/sch_esfq.c -+--- linux-2.4.34/net/sched/sch_esfq.c 1969-12-31 19:00:00.000000000 -0500 -++++ linux-2.4.34/net/sched/sch_esfq.c 2007-05-10 19:57:15.000000000 -0400 -+@@ -0,0 +1,649 @@ -++/* -++ * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline. -++ * -++ * This program is free software; you can redistribute it and/or -++ * modify it under the terms of the GNU General Public License -++ * as published by the Free Software Foundation; either version -++ * 2 of the License, or (at your option) any later version. -++ * -++ * Authors: Alexey Kuznetsov, -++ * -++ * Changes: Alexander Atanasov, -++ * Added dynamic depth,limit,divisor,hash_kind options. -++ * Added dst and src hashes. -++ * -++ * Alexander Clouter, -++ * Ported ESFQ to Linux 2.6. -++ * -++ * Corey Hickey, -++ * Maintenance of the Linux 2.6 port. -++ * Added fwmark hash (thanks to Robert Kurjata). -++ * Added usage of jhash. -++ * -++ */ -++ -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++#include -++ -++#define IPPROTO_DCCP 33 -++#define qdisc_priv(q) ((void *)(q->data)) -++ -++#ifdef CONFIG_IP_NF_CONNTRACK -++/* #include */ -++#include -++#endif -++ -++/* Stochastic Fairness Queuing algorithm. -++ For more comments look at sch_sfq.c. -++ The difference is that you can change limit, depth, -++ hash table size and choose alternate hash types. -++ -++ classic: same as in sch_sfq.c -++ dst: destination IP address -++ src: source IP address -++ ctorigdst: original destination IP address -++ ctorigsrc: original source IP address -++ ctrepldst: reply destination IP address -++ ctreplsrc: reply source IP -++ ctnatchg: use the address which changed via nat -++ -++*/ -++ -++ -++/* This type should contain at least SFQ_DEPTH*2 values */ -++typedef unsigned int esfq_index; -++ -++struct esfq_head -++{ -++ esfq_index next; -++ esfq_index prev; -++}; -++ -++struct esfq_sched_data -++{ -++/* Parameters */ -++ int perturb_period; -++ unsigned quantum; /* Allotment per round: MUST BE >= MTU */ -++ int limit; -++ unsigned depth; -++ unsigned hash_divisor; -++ unsigned hash_kind; -++/* Variables */ -++ struct timer_list perturb_timer; -++ int perturbation; -++ esfq_index tail; /* Index of current slot in round */ -++ esfq_index max_depth; /* Maximal depth */ -++ -++ esfq_index *ht; /* Hash table */ -++ esfq_index *next; /* Active slots link */ -++ short *allot; /* Current allotment per slot */ -++ unsigned short *hash; /* Hash value indexed by slots */ -++ struct sk_buff_head *qs; /* Slot queue */ -++ struct esfq_head *dep; /* Linked list of slots, indexed by depth */ -++ unsigned dyn_min; /* For dynamic divisor adjustment; minimum value seen */ -++ unsigned dyn_max; /* maximum value seen */ -++ unsigned dyn_range; /* saved range */ -++}; -++ -++/* This contains the info we will hash. */ -++struct esfq_packet_info -++{ -++ u32 proto; /* protocol or port */ -++ u32 src; /* source from packet header */ -++ u32 dst; /* destination from packet header */ -++ u32 ctorigsrc; /* original source from conntrack */ -++ u32 ctorigdst; /* original destination from conntrack */ -++ u32 ctreplsrc; /* reply source from conntrack */ -++ u32 ctrepldst; /* reply destination from conntrack */ -++}; -++ -++static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a) -++{ -++ return jhash_1word(a, q->perturbation) & (q->hash_divisor-1); -++} -++ -++static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b) -++{ -++ return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1); -++} -++ -++static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c) -++{ -++ return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1); -++} -++ -++ -++static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb) -++{ -++ struct esfq_packet_info info; -++#ifdef CONFIG_IP_NF_CONNTRACK -++ enum ip_conntrack_info ctinfo; -++ struct ip_conntrack *ct = ip_conntrack_get(skb, &ctinfo); -++#endif -++ -++ switch (skb->protocol) { -++ case __constant_htons(ETH_P_IP): -++ { -++ struct iphdr *iph = skb->nh.iph; -++ info.dst = iph->daddr; -++ info.src = iph->saddr; -++ if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && -++ (iph->protocol == IPPROTO_TCP || -++ iph->protocol == IPPROTO_UDP || -++ iph->protocol == IPPROTO_SCTP || -++ iph->protocol == IPPROTO_DCCP || -++ iph->protocol == IPPROTO_ESP)) -++ info.proto = *(((u32*)iph) + iph->ihl); -++ else -++ info.proto = iph->protocol; -++ break; -++ } -++ default: -++ info.dst = (u32)(unsigned long)skb->dst; -++ info.src = (u32)(unsigned long)skb->sk; -++ info.proto = skb->protocol; -++ } -++ -++#ifdef CONFIG_IP_NF_CONNTRACK -++ /* defaults if there is no conntrack info */ -++ info.ctorigsrc = info.src; -++ info.ctorigdst = info.dst; -++ info.ctreplsrc = info.dst; -++ info.ctrepldst = info.src; -++ /* collect conntrack info */ -++ IP_NF_ASSERT(ct); -++ if (ct) { -++ if (skb->protocol == __constant_htons(ETH_P_IP)) { -++ info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; -++ info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; -++ info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; -++ info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; -++ } -++ } -++#endif -++ -++ switch(q->hash_kind) -++ { -++ case TCA_SFQ_HASH_CLASSIC: -++ return esfq_jhash_3words(q, info.dst, info.src, info.proto); -++ case TCA_SFQ_HASH_DST: -++ return esfq_jhash_1word(q, info.dst); -++ case TCA_SFQ_HASH_SRC: -++ return esfq_jhash_1word(q, info.src); -++#ifdef CONFIG_IP_NF_CONNTRACK -++ case TCA_SFQ_HASH_CTORIGDST: -++ return esfq_jhash_1word(q, info.ctorigdst); -++ case TCA_SFQ_HASH_CTORIGSRC: -++ return esfq_jhash_1word(q, info.ctorigsrc); -++ case TCA_SFQ_HASH_CTREPLDST: -++ return esfq_jhash_1word(q, info.ctrepldst); -++ case TCA_SFQ_HASH_CTREPLSRC: -++ return esfq_jhash_1word(q, info.ctreplsrc); -++ case TCA_SFQ_HASH_CTNATCHG: -++ { -++ if (info.ctorigdst == info.ctreplsrc) -++ return esfq_jhash_1word(q, info.ctorigsrc); -++ else -++ return esfq_jhash_1word(q, info.ctreplsrc); -++ } -++#endif -++ default: -++ if (net_ratelimit()) -++ printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n"); -++ } -++ return esfq_jhash_3words(q, info.dst, info.src, info.proto); -++} -++ -++static inline void esfq_link(struct esfq_sched_data *q, esfq_index x) -++{ -++ esfq_index p, n; -++ int d = q->qs[x].qlen + q->depth; -++ -++ p = d; -++ n = q->dep[d].next; -++ q->dep[x].next = n; -++ q->dep[x].prev = p; -++ q->dep[p].next = q->dep[n].prev = x; -++} -++ -++static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x) -++{ -++ esfq_index p, n; -++ -++ n = q->dep[x].next; -++ p = q->dep[x].prev; -++ q->dep[p].next = n; -++ q->dep[n].prev = p; -++ -++ if (n == p && q->max_depth == q->qs[x].qlen + 1) -++ q->max_depth--; -++ -++ esfq_link(q, x); -++} -++ -++static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x) -++{ -++ esfq_index p, n; -++ int d; -++ -++ n = q->dep[x].next; -++ p = q->dep[x].prev; -++ q->dep[p].next = n; -++ q->dep[n].prev = p; -++ d = q->qs[x].qlen; -++ if (q->max_depth < d) -++ q->max_depth = d; -++ -++ esfq_link(q, x); -++} -++ -++static unsigned int esfq_drop(struct Qdisc *sch) -++{ -++ struct esfq_sched_data *q = qdisc_priv(sch); -++ esfq_index d = q->max_depth; -++ struct sk_buff *skb; -++ unsigned int len; -++ -++ /* Queue is full! Find the longest slot and -++ drop a packet from it */ -++ -++ if (d > 1) { -++ esfq_index x = q->dep[d+q->depth].next; -++ skb = q->qs[x].prev; -++ len = skb->len; -++ __skb_unlink(skb, &q->qs[x]); -++ kfree_skb(skb); -++ esfq_dec(q, x); -++ sch->q.qlen--; -++ sch->stats.drops++; -++ sch->stats.backlog -= len; -++ return len; -++ } -++ -++ if (d == 1) { -++ /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ -++ d = q->next[q->tail]; -++ q->next[q->tail] = q->next[d]; -++ q->allot[q->next[d]] += q->quantum; -++ skb = q->qs[d].prev; -++ len = skb->len; -++ __skb_unlink(skb, &q->qs[d]); -++ kfree_skb(skb); -++ esfq_dec(q, d); -++ sch->q.qlen--; -++ q->ht[q->hash[d]] = q->depth; -++ sch->stats.drops++; -++ sch->stats.backlog -= len; -++ return len; -++ } -++ -++ return 0; -++} -++ -++static int -++esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) -++{ -++ struct esfq_sched_data *q = qdisc_priv(sch); -++ unsigned hash = esfq_hash(q, skb); -++ unsigned depth = q->depth; -++ esfq_index x; -++ -++ x = q->ht[hash]; -++ if (x == depth) { -++ q->ht[hash] = x = q->dep[depth].next; -++ q->hash[x] = hash; -++ } -++ sch->stats.backlog += skb->len; -++ __skb_queue_tail(&q->qs[x], skb); -++ esfq_inc(q, x); -++ if (q->qs[x].qlen == 1) { /* The flow is new */ -++ if (q->tail == depth) { /* It is the first flow */ -++ q->tail = x; -++ q->next[x] = x; -++ q->allot[x] = q->quantum; -++ } else { -++ q->next[x] = q->next[q->tail]; -++ q->next[q->tail] = x; -++ q->tail = x; -++ } -++ } -++ if (++sch->q.qlen < q->limit-1) { -++ sch->stats.bytes += skb->len; -++ sch->stats.packets++; -++ return 0; -++ } -++ -++ esfq_drop(sch); -++ return NET_XMIT_CN; -++} -++ -++static int -++esfq_requeue(struct sk_buff *skb, struct Qdisc* sch) -++{ -++ struct esfq_sched_data *q = qdisc_priv(sch); -++ unsigned hash = esfq_hash(q, skb); -++ unsigned depth = q->depth; -++ esfq_index x; -++ -++ x = q->ht[hash]; -++ if (x == depth) { -++ q->ht[hash] = x = q->dep[depth].next; -++ q->hash[x] = hash; -++ } -++ sch->stats.backlog += skb->len; -++ __skb_queue_head(&q->qs[x], skb); -++ esfq_inc(q, x); -++ if (q->qs[x].qlen == 1) { /* The flow is new */ -++ if (q->tail == depth) { /* It is the first flow */ -++ q->tail = x; -++ q->next[x] = x; -++ q->allot[x] = q->quantum; -++ } else { -++ q->next[x] = q->next[q->tail]; -++ q->next[q->tail] = x; -++ q->tail = x; -++ } -++ } -++ if (++sch->q.qlen < q->limit - 1) { -++ return 0; -++ } -++ -++ sch->stats.drops++; -++ esfq_drop(sch); -++ return NET_XMIT_CN; -++} -++ -++ -++ -++ -++static struct sk_buff * -++esfq_dequeue(struct Qdisc* sch) -++{ -++ struct esfq_sched_data *q = qdisc_priv(sch); -++ struct sk_buff *skb; -++ unsigned depth = q->depth; -++ esfq_index a, old_a; -++ -++ /* No active slots */ -++ if (q->tail == depth) -++ return NULL; -++ -++ a = old_a = q->next[q->tail]; -++ -++ /* Grab packet */ -++ skb = __skb_dequeue(&q->qs[a]); -++ esfq_dec(q, a); -++ sch->q.qlen--; -++ sch->stats.backlog -= skb->len; -++ -++ /* Is the slot empty? */ -++ if (q->qs[a].qlen == 0) { -++ q->ht[q->hash[a]] = depth; -++ a = q->next[a]; -++ if (a == old_a) { -++ q->tail = depth; -++ return skb; -++ } -++ q->next[q->tail] = a; -++ q->allot[a] += q->quantum; -++ } else if ((q->allot[a] -= skb->len) <= 0) { -++ q->tail = a; -++ a = q->next[a]; -++ q->allot[a] += q->quantum; -++ } -++ -++ return skb; -++} -++ -++static void -++esfq_reset(struct Qdisc* sch) -++{ -++ struct sk_buff *skb; -++ -++ while ((skb = esfq_dequeue(sch)) != NULL) -++ kfree_skb(skb); -++} -++ -++static void esfq_perturbation(unsigned long arg) -++{ -++ struct Qdisc *sch = (struct Qdisc*)arg; -++ struct esfq_sched_data *q = qdisc_priv(sch); -++ -++ q->perturbation = net_random()&0x1F; -++ -++ if (q->perturb_period) { -++ q->perturb_timer.expires = jiffies + q->perturb_period; -++ add_timer(&q->perturb_timer); -++ } -++} -++ -++static int esfq_change(struct Qdisc *sch, struct rtattr *opt) -++{ -++ struct esfq_sched_data *q = qdisc_priv(sch); -++ struct tc_esfq_qopt *ctl = RTA_DATA(opt); -++ int old_perturb = q->perturb_period; -++ -++ if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) -++ return -EINVAL; -++ -++ sch_tree_lock(sch); -++ q->quantum = ctl->quantum ? : psched_mtu(sch->dev); -++ q->perturb_period = ctl->perturb_period*HZ; -++// q->hash_divisor = ctl->divisor; -++// q->tail = q->limit = q->depth = ctl->flows; -++ -++ if (ctl->limit) -++ q->limit = min_t(u32, ctl->limit, q->depth); -++ -++ if (ctl->hash_kind) { -++ q->hash_kind = ctl->hash_kind; -++ if (q->hash_kind != TCA_SFQ_HASH_CLASSIC) -++ q->perturb_period = 0; -++ } -++ -++ // is sch_tree_lock enough to do this ? -++ while (sch->q.qlen >= q->limit-1) -++ esfq_drop(sch); -++ -++ if (old_perturb) -++ del_timer(&q->perturb_timer); -++ if (q->perturb_period) { -++ q->perturb_timer.expires = jiffies + q->perturb_period; -++ add_timer(&q->perturb_timer); -++ } else { -++ q->perturbation = 0; -++ } -++ sch_tree_unlock(sch); -++ return 0; -++} -++ -++static int esfq_init(struct Qdisc *sch, struct rtattr *opt) -++{ -++ struct esfq_sched_data *q = qdisc_priv(sch); -++ struct tc_esfq_qopt *ctl; -++ esfq_index p = ~0U/2; -++ int i; -++ -++ if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl))) -++ return -EINVAL; -++ -++ init_timer(&q->perturb_timer); -++ q->perturb_timer.data = (unsigned long)sch; -++ q->perturb_timer.function = esfq_perturbation; -++ q->perturbation = 0; -++ q->hash_kind = TCA_SFQ_HASH_CLASSIC; -++ q->max_depth = 0; -++ q->dyn_min = ~0U; /* maximum value for this type */ -++ q->dyn_max = 0; /* dyn_min/dyn_max will be set properly upon first packet */ -++ if (opt == NULL) { -++ q->quantum = psched_mtu(sch->dev); -++ q->perturb_period = 0; -++ q->hash_divisor = 1024; -++ q->tail = q->limit = q->depth = 128; -++ -++ } else { -++ ctl = RTA_DATA(opt); -++ q->quantum = ctl->quantum ? : psched_mtu(sch->dev); -++ q->perturb_period = ctl->perturb_period*HZ; -++ q->hash_divisor = ctl->divisor ? : 1024; -++ q->tail = q->limit = q->depth = ctl->flows ? : 128; -++ -++ if ( q->depth > p - 1 ) -++ return -EINVAL; -++ -++ if (ctl->limit) -++ q->limit = min_t(u32, ctl->limit, q->depth); -++ -++ if (ctl->hash_kind) { -++ q->hash_kind = ctl->hash_kind; -++ } -++ -++ if (q->perturb_period) { -++ q->perturb_timer.expires = jiffies + q->perturb_period; -++ add_timer(&q->perturb_timer); -++ } -++ } -++ -++ q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL); -++ if (!q->ht) -++ goto err_case; -++ -++ q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL); -++ if (!q->dep) -++ goto err_case; -++ q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL); -++ if (!q->next) -++ goto err_case; -++ -++ q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL); -++ if (!q->allot) -++ goto err_case; -++ q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL); -++ if (!q->hash) -++ goto err_case; -++ q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL); -++ if (!q->qs) -++ goto err_case; -++ -++ for (i=0; i< q->hash_divisor; i++) -++ q->ht[i] = q->depth; -++ for (i=0; idepth; i++) { -++ skb_queue_head_init(&q->qs[i]); -++ q->dep[i+q->depth].next = i+q->depth; -++ q->dep[i+q->depth].prev = i+q->depth; -++ } -++ -++ for (i=0; idepth; i++) -++ esfq_link(q, i); -++ return 0; -++err_case: -++ del_timer(&q->perturb_timer); -++ if (q->ht) -++ kfree(q->ht); -++ if (q->dep) -++ kfree(q->dep); -++ if (q->next) -++ kfree(q->next); -++ if (q->allot) -++ kfree(q->allot); -++ if (q->hash) -++ kfree(q->hash); -++ if (q->qs) -++ kfree(q->qs); -++ return -ENOBUFS; -++} -++ -++static void esfq_destroy(struct Qdisc *sch) -++{ -++ struct esfq_sched_data *q = qdisc_priv(sch); -++ del_timer(&q->perturb_timer); -++ if(q->ht) -++ kfree(q->ht); -++ if(q->dep) -++ kfree(q->dep); -++ if(q->next) -++ kfree(q->next); -++ if(q->allot) -++ kfree(q->allot); -++ if(q->hash) -++ kfree(q->hash); -++ if(q->qs) -++ kfree(q->qs); -++} -++ -++static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb) -++{ -++ struct esfq_sched_data *q = qdisc_priv(sch); -++ unsigned char *b = skb->tail; -++ struct tc_esfq_qopt opt; -++ -++ opt.quantum = q->quantum; -++ opt.perturb_period = q->perturb_period/HZ; -++ -++ opt.limit = q->limit; -++ opt.divisor = q->hash_divisor; -++ opt.flows = q->depth; -++ opt.hash_kind = q->hash_kind; -++ -++ RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); -++ -++ return skb->len; -++ -++rtattr_failure: -++ skb_trim(skb, b - skb->data); -++ return -1; -++} -++ -++static struct Qdisc_ops esfq_qdisc_ops = -++{ -++ .next = NULL, -++ .cl_ops = NULL, -++ .id = "esfq", -++ .priv_size = sizeof(struct esfq_sched_data), -++ .enqueue = esfq_enqueue, -++ .dequeue = esfq_dequeue, -++ .requeue = esfq_requeue, -++ .drop = esfq_drop, -++ .init = esfq_init, -++ .reset = esfq_reset, -++ .destroy = esfq_destroy, -++ .change = NULL, /* esfq_change - needs more work */ -++ .dump = esfq_dump, -++}; -++ -++static int __init esfq_module_init(void) -++{ -++ return register_qdisc(&esfq_qdisc_ops); -++} -++static void __exit esfq_module_exit(void) -++{ -++ unregister_qdisc(&esfq_qdisc_ops); -++} -++module_init(esfq_module_init) -++module_exit(esfq_module_exit) -++MODULE_LICENSE("GPL"); +diff -urN linux-2.4.34/Documentation/Configure.help linux-2.4.34/Documentation/Configure.help +--- linux-2.4.34/Documentation/Configure.help 2007-05-10 19:37:42.000000000 -0400 ++++ linux-2.4.34/Documentation/Configure.help 2007-05-10 19:49:49.000000000 -0400 +@@ -11127,6 +11127,24 @@ + whenever you want). If you want to compile it as a module, say M + here and read . + ++ESFQ queue ++CONFIG_NET_SCH_ESFQ ++ Say Y here if you want to use the Stochastic Fairness Queueing (SFQ) ++ packet scheduling algorithm for some of your network devices or as a ++ leaf discipline for the CBQ scheduling algorithm (see the top of ++ for details and references about the SFQ ++ algorithm). ++ ++ This is an enchanced SFQ version which allows you to control the ++ hardcoded values in the SFQ scheduler: queue depth, hash table size, ++ queues limit. Also adds control to the hash function used to identify ++ packet flows. Hash by src or dst ip and original sfq hash. ++ ++ This code is also available as a module called sch_esfq.o ( = code ++ which can be inserted in and removed from the running kernel ++ whenever you want). If you want to compile it as a module, say M ++ here and read . ++ + CSZ packet scheduler + CONFIG_NET_SCH_CSZ + Say Y here if you want to use the Clark-Shenker-Zhang (CSZ) packet +diff -urN linux-2.4.34/include/linux/pkt_sched.h linux-2.4.34/include/linux/pkt_sched.h +--- linux-2.4.34/include/linux/pkt_sched.h 2007-05-10 19:38:19.000000000 -0400 ++++ linux-2.4.34/include/linux/pkt_sched.h 2007-05-10 19:53:59.000000000 -0400 +@@ -173,8 +173,36 @@ + * + * The only reason for this is efficiency, it is possible + * to change these parameters in compile time. ++ * ++ * If you need to play with these values use esfq instead. + */ + ++/* ESFQ section */ ++ ++enum ++{ ++ /* traditional */ ++ TCA_SFQ_HASH_CLASSIC, ++ TCA_SFQ_HASH_DST, ++ TCA_SFQ_HASH_SRC, ++ /* conntrack */ ++ TCA_SFQ_HASH_CTORIGDST, ++ TCA_SFQ_HASH_CTORIGSRC, ++ TCA_SFQ_HASH_CTREPLDST, ++ TCA_SFQ_HASH_CTREPLSRC, ++ TCA_SFQ_HASH_CTNATCHG, ++}; ++ ++struct tc_esfq_qopt ++{ ++ unsigned quantum; /* Bytes per round allocated to flow */ ++ int perturb_period; /* Period of hash perturbation */ ++ __u32 limit; /* Maximal packets in queue */ ++ unsigned divisor; /* Hash divisor */ ++ unsigned flows; /* Maximal number of flows */ ++ unsigned hash_kind; /* Hash function to use for flow identification */ ++}; ++ + /* RED section */ + + enum +diff -urN linux-2.4.34/net/sched/Config.in linux-2.4.34/net/sched/Config.in +--- linux-2.4.34/net/sched/Config.in 2007-05-10 19:38:31.000000000 -0400 ++++ linux-2.4.34/net/sched/Config.in 2007-05-10 19:54:45.000000000 -0400 +@@ -12,6 +12,7 @@ + tristate ' The simplest PRIO pseudoscheduler' CONFIG_NET_SCH_PRIO + tristate ' RED queue' CONFIG_NET_SCH_RED + tristate ' SFQ queue' CONFIG_NET_SCH_SFQ ++tristate ' ESFQ queue' CONFIG_NET_SCH_ESFQ + tristate ' TEQL queue' CONFIG_NET_SCH_TEQL + tristate ' TBF queue' CONFIG_NET_SCH_TBF + tristate ' GRED queue' CONFIG_NET_SCH_GRED +diff -urN linux-2.4.34/net/sched/Makefile linux-2.4.34/net/sched/Makefile +--- linux-2.4.34/net/sched/Makefile 2007-05-10 19:38:31.000000000 -0400 ++++ linux-2.4.34/net/sched/Makefile 2007-05-10 19:55:13.000000000 -0400 +@@ -19,6 +19,7 @@ + obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o + obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o + obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o ++obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o + obj-$(CONFIG_NET_SCH_RED) += sch_red.o + obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o + obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o +diff -urN linux-2.4.34/net/sched/sch_esfq.c linux-2.4.34/net/sched/sch_esfq.c +--- linux-2.4.34/net/sched/sch_esfq.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.4.34/net/sched/sch_esfq.c 2007-05-10 19:57:15.000000000 -0400 +@@ -0,0 +1,649 @@ ++/* ++ * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: Alexey Kuznetsov, ++ * ++ * Changes: Alexander Atanasov, ++ * Added dynamic depth,limit,divisor,hash_kind options. ++ * Added dst and src hashes. ++ * ++ * Alexander Clouter, ++ * Ported ESFQ to Linux 2.6. ++ * ++ * Corey Hickey, ++ * Maintenance of the Linux 2.6 port. ++ * Added fwmark hash (thanks to Robert Kurjata). ++ * Added usage of jhash. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define IPPROTO_DCCP 33 ++#define qdisc_priv(q) ((void *)(q->data)) ++ ++#ifdef CONFIG_IP_NF_CONNTRACK ++/* #include */ ++#include ++#endif ++ ++/* Stochastic Fairness Queuing algorithm. ++ For more comments look at sch_sfq.c. ++ The difference is that you can change limit, depth, ++ hash table size and choose alternate hash types. ++ ++ classic: same as in sch_sfq.c ++ dst: destination IP address ++ src: source IP address ++ ctorigdst: original destination IP address ++ ctorigsrc: original source IP address ++ ctrepldst: reply destination IP address ++ ctreplsrc: reply source IP ++ ctnatchg: use the address which changed via nat ++ ++*/ ++ ++ ++/* This type should contain at least SFQ_DEPTH*2 values */ ++typedef unsigned int esfq_index; ++ ++struct esfq_head ++{ ++ esfq_index next; ++ esfq_index prev; ++}; ++ ++struct esfq_sched_data ++{ ++/* Parameters */ ++ int perturb_period; ++ unsigned quantum; /* Allotment per round: MUST BE >= MTU */ ++ int limit; ++ unsigned depth; ++ unsigned hash_divisor; ++ unsigned hash_kind; ++/* Variables */ ++ struct timer_list perturb_timer; ++ int perturbation; ++ esfq_index tail; /* Index of current slot in round */ ++ esfq_index max_depth; /* Maximal depth */ ++ ++ esfq_index *ht; /* Hash table */ ++ esfq_index *next; /* Active slots link */ ++ short *allot; /* Current allotment per slot */ ++ unsigned short *hash; /* Hash value indexed by slots */ ++ struct sk_buff_head *qs; /* Slot queue */ ++ struct esfq_head *dep; /* Linked list of slots, indexed by depth */ ++ unsigned dyn_min; /* For dynamic divisor adjustment; minimum value seen */ ++ unsigned dyn_max; /* maximum value seen */ ++ unsigned dyn_range; /* saved range */ ++}; ++ ++/* This contains the info we will hash. */ ++struct esfq_packet_info ++{ ++ u32 proto; /* protocol or port */ ++ u32 src; /* source from packet header */ ++ u32 dst; /* destination from packet header */ ++ u32 ctorigsrc; /* original source from conntrack */ ++ u32 ctorigdst; /* original destination from conntrack */ ++ u32 ctreplsrc; /* reply source from conntrack */ ++ u32 ctrepldst; /* reply destination from conntrack */ ++}; ++ ++static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a) ++{ ++ return jhash_1word(a, q->perturbation) & (q->hash_divisor-1); ++} ++ ++static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b) ++{ ++ return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1); ++} ++ ++static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c) ++{ ++ return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1); ++} ++ ++ ++static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb) ++{ ++ struct esfq_packet_info info; ++#ifdef CONFIG_IP_NF_CONNTRACK ++ enum ip_conntrack_info ctinfo; ++ struct ip_conntrack *ct = ip_conntrack_get(skb, &ctinfo); ++#endif ++ ++ switch (skb->protocol) { ++ case __constant_htons(ETH_P_IP): ++ { ++ struct iphdr *iph = skb->nh.iph; ++ info.dst = iph->daddr; ++ info.src = iph->saddr; ++ if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && ++ (iph->protocol == IPPROTO_TCP || ++ iph->protocol == IPPROTO_UDP || ++ iph->protocol == IPPROTO_SCTP || ++ iph->protocol == IPPROTO_DCCP || ++ iph->protocol == IPPROTO_ESP)) ++ info.proto = *(((u32*)iph) + iph->ihl); ++ else ++ info.proto = iph->protocol; ++ break; ++ } ++ default: ++ info.dst = (u32)(unsigned long)skb->dst; ++ info.src = (u32)(unsigned long)skb->sk; ++ info.proto = skb->protocol; ++ } ++ ++#ifdef CONFIG_IP_NF_CONNTRACK ++ /* defaults if there is no conntrack info */ ++ info.ctorigsrc = info.src; ++ info.ctorigdst = info.dst; ++ info.ctreplsrc = info.dst; ++ info.ctrepldst = info.src; ++ /* collect conntrack info */ ++ IP_NF_ASSERT(ct); ++ if (ct) { ++ if (skb->protocol == __constant_htons(ETH_P_IP)) { ++ info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; ++ info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; ++ info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; ++ info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; ++ } ++ } ++#endif ++ ++ switch(q->hash_kind) ++ { ++ case TCA_SFQ_HASH_CLASSIC: ++ return esfq_jhash_3words(q, info.dst, info.src, info.proto); ++ case TCA_SFQ_HASH_DST: ++ return esfq_jhash_1word(q, info.dst); ++ case TCA_SFQ_HASH_SRC: ++ return esfq_jhash_1word(q, info.src); ++#ifdef CONFIG_IP_NF_CONNTRACK ++ case TCA_SFQ_HASH_CTORIGDST: ++ return esfq_jhash_1word(q, info.ctorigdst); ++ case TCA_SFQ_HASH_CTORIGSRC: ++ return esfq_jhash_1word(q, info.ctorigsrc); ++ case TCA_SFQ_HASH_CTREPLDST: ++ return esfq_jhash_1word(q, info.ctrepldst); ++ case TCA_SFQ_HASH_CTREPLSRC: ++ return esfq_jhash_1word(q, info.ctreplsrc); ++ case TCA_SFQ_HASH_CTNATCHG: ++ { ++ if (info.ctorigdst == info.ctreplsrc) ++ return esfq_jhash_1word(q, info.ctorigsrc); ++ else ++ return esfq_jhash_1word(q, info.ctreplsrc); ++ } ++#endif ++ default: ++ if (net_ratelimit()) ++ printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n"); ++ } ++ return esfq_jhash_3words(q, info.dst, info.src, info.proto); ++} ++ ++static inline void esfq_link(struct esfq_sched_data *q, esfq_index x) ++{ ++ esfq_index p, n; ++ int d = q->qs[x].qlen + q->depth; ++ ++ p = d; ++ n = q->dep[d].next; ++ q->dep[x].next = n; ++ q->dep[x].prev = p; ++ q->dep[p].next = q->dep[n].prev = x; ++} ++ ++static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x) ++{ ++ esfq_index p, n; ++ ++ n = q->dep[x].next; ++ p = q->dep[x].prev; ++ q->dep[p].next = n; ++ q->dep[n].prev = p; ++ ++ if (n == p && q->max_depth == q->qs[x].qlen + 1) ++ q->max_depth--; ++ ++ esfq_link(q, x); ++} ++ ++static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x) ++{ ++ esfq_index p, n; ++ int d; ++ ++ n = q->dep[x].next; ++ p = q->dep[x].prev; ++ q->dep[p].next = n; ++ q->dep[n].prev = p; ++ d = q->qs[x].qlen; ++ if (q->max_depth < d) ++ q->max_depth = d; ++ ++ esfq_link(q, x); ++} ++ ++static unsigned int esfq_drop(struct Qdisc *sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_index d = q->max_depth; ++ struct sk_buff *skb; ++ unsigned int len; ++ ++ /* Queue is full! Find the longest slot and ++ drop a packet from it */ ++ ++ if (d > 1) { ++ esfq_index x = q->dep[d+q->depth].next; ++ skb = q->qs[x].prev; ++ len = skb->len; ++ __skb_unlink(skb, &q->qs[x]); ++ kfree_skb(skb); ++ esfq_dec(q, x); ++ sch->q.qlen--; ++ sch->stats.drops++; ++ sch->stats.backlog -= len; ++ return len; ++ } ++ ++ if (d == 1) { ++ /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ ++ d = q->next[q->tail]; ++ q->next[q->tail] = q->next[d]; ++ q->allot[q->next[d]] += q->quantum; ++ skb = q->qs[d].prev; ++ len = skb->len; ++ __skb_unlink(skb, &q->qs[d]); ++ kfree_skb(skb); ++ esfq_dec(q, d); ++ sch->q.qlen--; ++ q->ht[q->hash[d]] = q->depth; ++ sch->stats.drops++; ++ sch->stats.backlog -= len; ++ return len; ++ } ++ ++ return 0; ++} ++ ++static int ++esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ unsigned hash = esfq_hash(q, skb); ++ unsigned depth = q->depth; ++ esfq_index x; ++ ++ x = q->ht[hash]; ++ if (x == depth) { ++ q->ht[hash] = x = q->dep[depth].next; ++ q->hash[x] = hash; ++ } ++ sch->stats.backlog += skb->len; ++ __skb_queue_tail(&q->qs[x], skb); ++ esfq_inc(q, x); ++ if (q->qs[x].qlen == 1) { /* The flow is new */ ++ if (q->tail == depth) { /* It is the first flow */ ++ q->tail = x; ++ q->next[x] = x; ++ q->allot[x] = q->quantum; ++ } else { ++ q->next[x] = q->next[q->tail]; ++ q->next[q->tail] = x; ++ q->tail = x; ++ } ++ } ++ if (++sch->q.qlen < q->limit-1) { ++ sch->stats.bytes += skb->len; ++ sch->stats.packets++; ++ return 0; ++ } ++ ++ esfq_drop(sch); ++ return NET_XMIT_CN; ++} ++ ++static int ++esfq_requeue(struct sk_buff *skb, struct Qdisc* sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ unsigned hash = esfq_hash(q, skb); ++ unsigned depth = q->depth; ++ esfq_index x; ++ ++ x = q->ht[hash]; ++ if (x == depth) { ++ q->ht[hash] = x = q->dep[depth].next; ++ q->hash[x] = hash; ++ } ++ sch->stats.backlog += skb->len; ++ __skb_queue_head(&q->qs[x], skb); ++ esfq_inc(q, x); ++ if (q->qs[x].qlen == 1) { /* The flow is new */ ++ if (q->tail == depth) { /* It is the first flow */ ++ q->tail = x; ++ q->next[x] = x; ++ q->allot[x] = q->quantum; ++ } else { ++ q->next[x] = q->next[q->tail]; ++ q->next[q->tail] = x; ++ q->tail = x; ++ } ++ } ++ if (++sch->q.qlen < q->limit - 1) { ++ return 0; ++ } ++ ++ sch->stats.drops++; ++ esfq_drop(sch); ++ return NET_XMIT_CN; ++} ++ ++ ++ ++ ++static struct sk_buff * ++esfq_dequeue(struct Qdisc* sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ struct sk_buff *skb; ++ unsigned depth = q->depth; ++ esfq_index a, old_a; ++ ++ /* No active slots */ ++ if (q->tail == depth) ++ return NULL; ++ ++ a = old_a = q->next[q->tail]; ++ ++ /* Grab packet */ ++ skb = __skb_dequeue(&q->qs[a]); ++ esfq_dec(q, a); ++ sch->q.qlen--; ++ sch->stats.backlog -= skb->len; ++ ++ /* Is the slot empty? */ ++ if (q->qs[a].qlen == 0) { ++ q->ht[q->hash[a]] = depth; ++ a = q->next[a]; ++ if (a == old_a) { ++ q->tail = depth; ++ return skb; ++ } ++ q->next[q->tail] = a; ++ q->allot[a] += q->quantum; ++ } else if ((q->allot[a] -= skb->len) <= 0) { ++ q->tail = a; ++ a = q->next[a]; ++ q->allot[a] += q->quantum; ++ } ++ ++ return skb; ++} ++ ++static void ++esfq_reset(struct Qdisc* sch) ++{ ++ struct sk_buff *skb; ++ ++ while ((skb = esfq_dequeue(sch)) != NULL) ++ kfree_skb(skb); ++} ++ ++static void esfq_perturbation(unsigned long arg) ++{ ++ struct Qdisc *sch = (struct Qdisc*)arg; ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ ++ q->perturbation = net_random()&0x1F; ++ ++ if (q->perturb_period) { ++ q->perturb_timer.expires = jiffies + q->perturb_period; ++ add_timer(&q->perturb_timer); ++ } ++} ++ ++static int esfq_change(struct Qdisc *sch, struct rtattr *opt) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ struct tc_esfq_qopt *ctl = RTA_DATA(opt); ++ int old_perturb = q->perturb_period; ++ ++ if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) ++ return -EINVAL; ++ ++ sch_tree_lock(sch); ++ q->quantum = ctl->quantum ? : psched_mtu(sch->dev); ++ q->perturb_period = ctl->perturb_period*HZ; ++// q->hash_divisor = ctl->divisor; ++// q->tail = q->limit = q->depth = ctl->flows; ++ ++ if (ctl->limit) ++ q->limit = min_t(u32, ctl->limit, q->depth); ++ ++ if (ctl->hash_kind) { ++ q->hash_kind = ctl->hash_kind; ++ if (q->hash_kind != TCA_SFQ_HASH_CLASSIC) ++ q->perturb_period = 0; ++ } ++ ++ // is sch_tree_lock enough to do this ? ++ while (sch->q.qlen >= q->limit-1) ++ esfq_drop(sch); ++ ++ if (old_perturb) ++ del_timer(&q->perturb_timer); ++ if (q->perturb_period) { ++ q->perturb_timer.expires = jiffies + q->perturb_period; ++ add_timer(&q->perturb_timer); ++ } else { ++ q->perturbation = 0; ++ } ++ sch_tree_unlock(sch); ++ return 0; ++} ++ ++static int esfq_init(struct Qdisc *sch, struct rtattr *opt) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ struct tc_esfq_qopt *ctl; ++ esfq_index p = ~0U/2; ++ int i; ++ ++ if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl))) ++ return -EINVAL; ++ ++ init_timer(&q->perturb_timer); ++ q->perturb_timer.data = (unsigned long)sch; ++ q->perturb_timer.function = esfq_perturbation; ++ q->perturbation = 0; ++ q->hash_kind = TCA_SFQ_HASH_CLASSIC; ++ q->max_depth = 0; ++ q->dyn_min = ~0U; /* maximum value for this type */ ++ q->dyn_max = 0; /* dyn_min/dyn_max will be set properly upon first packet */ ++ if (opt == NULL) { ++ q->quantum = psched_mtu(sch->dev); ++ q->perturb_period = 0; ++ q->hash_divisor = 1024; ++ q->tail = q->limit = q->depth = 128; ++ ++ } else { ++ ctl = RTA_DATA(opt); ++ q->quantum = ctl->quantum ? : psched_mtu(sch->dev); ++ q->perturb_period = ctl->perturb_period*HZ; ++ q->hash_divisor = ctl->divisor ? : 1024; ++ q->tail = q->limit = q->depth = ctl->flows ? : 128; ++ ++ if ( q->depth > p - 1 ) ++ return -EINVAL; ++ ++ if (ctl->limit) ++ q->limit = min_t(u32, ctl->limit, q->depth); ++ ++ if (ctl->hash_kind) { ++ q->hash_kind = ctl->hash_kind; ++ } ++ ++ if (q->perturb_period) { ++ q->perturb_timer.expires = jiffies + q->perturb_period; ++ add_timer(&q->perturb_timer); ++ } ++ } ++ ++ q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL); ++ if (!q->ht) ++ goto err_case; ++ ++ q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL); ++ if (!q->dep) ++ goto err_case; ++ q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL); ++ if (!q->next) ++ goto err_case; ++ ++ q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL); ++ if (!q->allot) ++ goto err_case; ++ q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL); ++ if (!q->hash) ++ goto err_case; ++ q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL); ++ if (!q->qs) ++ goto err_case; ++ ++ for (i=0; i< q->hash_divisor; i++) ++ q->ht[i] = q->depth; ++ for (i=0; idepth; i++) { ++ skb_queue_head_init(&q->qs[i]); ++ q->dep[i+q->depth].next = i+q->depth; ++ q->dep[i+q->depth].prev = i+q->depth; ++ } ++ ++ for (i=0; idepth; i++) ++ esfq_link(q, i); ++ return 0; ++err_case: ++ del_timer(&q->perturb_timer); ++ if (q->ht) ++ kfree(q->ht); ++ if (q->dep) ++ kfree(q->dep); ++ if (q->next) ++ kfree(q->next); ++ if (q->allot) ++ kfree(q->allot); ++ if (q->hash) ++ kfree(q->hash); ++ if (q->qs) ++ kfree(q->qs); ++ return -ENOBUFS; ++} ++ ++static void esfq_destroy(struct Qdisc *sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ del_timer(&q->perturb_timer); ++ if(q->ht) ++ kfree(q->ht); ++ if(q->dep) ++ kfree(q->dep); ++ if(q->next) ++ kfree(q->next); ++ if(q->allot) ++ kfree(q->allot); ++ if(q->hash) ++ kfree(q->hash); ++ if(q->qs) ++ kfree(q->qs); ++} ++ ++static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ unsigned char *b = skb->tail; ++ struct tc_esfq_qopt opt; ++ ++ opt.quantum = q->quantum; ++ opt.perturb_period = q->perturb_period/HZ; ++ ++ opt.limit = q->limit; ++ opt.divisor = q->hash_divisor; ++ opt.flows = q->depth; ++ opt.hash_kind = q->hash_kind; ++ ++ RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); ++ ++ return skb->len; ++ ++rtattr_failure: ++ skb_trim(skb, b - skb->data); ++ return -1; ++} ++ ++static struct Qdisc_ops esfq_qdisc_ops = ++{ ++ .next = NULL, ++ .cl_ops = NULL, ++ .id = "esfq", ++ .priv_size = sizeof(struct esfq_sched_data), ++ .enqueue = esfq_enqueue, ++ .dequeue = esfq_dequeue, ++ .requeue = esfq_requeue, ++ .drop = esfq_drop, ++ .init = esfq_init, ++ .reset = esfq_reset, ++ .destroy = esfq_destroy, ++ .change = NULL, /* esfq_change - needs more work */ ++ .dump = esfq_dump, ++}; ++ ++static int __init esfq_module_init(void) ++{ ++ return register_qdisc(&esfq_qdisc_ops); ++} ++static void __exit esfq_module_exit(void) ++{ ++ unregister_qdisc(&esfq_qdisc_ops); ++} ++module_init(esfq_module_init) ++module_exit(esfq_module_exit) ++MODULE_LICENSE("GPL");