struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *tx,
- unsigned seq);
+ uint seq,
+ bool pkts_sent
+ );
void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait);
seqlock_t *lock;
u32 count;
u32 tx_limit;
u32 tx_count;
+ u8 starved_cnt;
};
#define SDMA_AVAIL_REASON 0
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *tx,
- unsigned seq),
+ uint seq,
+ bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason),
void (*sdma_drained)(struct iowait *wait))
{
return tx;
}
+/**
+ * iowait_queue - Put the iowait on a wait queue
+ * @pkts_sent: have some packets been sent before queuing?
+ * @w: the iowait struct
+ * @wait_head: the wait queue
+ *
+ * This function is called to insert an iowait struct into a
+ * wait queue after a resource (eg, sdma decriptor or pio
+ * buffer) is run out.
+ */
+static inline void iowait_queue(bool pkts_sent, struct iowait *w,
+ struct list_head *wait_head)
+{
+ /*
+ * To play fair, insert the iowait at the tail of the wait queue if it
+ * has already sent some packets; Otherwise, put it at the head.
+ */
+ if (pkts_sent) {
+ list_add_tail(&w->list, wait_head);
+ w->starved_cnt = 0;
+ } else {
+ list_add(&w->list, wait_head);
+ w->starved_cnt++;
+ }
+}
+
+/**
+ * iowait_starve_clear - clear the wait queue's starve count
+ * @pkts_sent: have some packets been sent?
+ * @w: the iowait struct
+ *
+ * This function is called to clear the starve count. If no
+ * packets have been sent, the starve count will not be cleared.
+ */
+static inline void iowait_starve_clear(bool pkts_sent, struct iowait *w)
+{
+ if (pkts_sent)
+ w->starved_cnt = 0;
+}
+
+/**
+ * iowait_starve_find_max - Find the maximum of the starve count
+ * @w: the iowait struct
+ * @max: a variable containing the max starve count
+ * @idx: the index of the current iowait in an array
+ * @max_idx: a variable containing the array index for the
+ * iowait entry that has the max starve count
+ *
+ * This function is called to compare the starve count of a
+ * given iowait with the given max starve count. The max starve
+ * count and the index will be updated if the iowait's start
+ * count is larger.
+ */
+static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
+ uint idx, uint *max_idx)
+{
+ if (w->starved_cnt > *max) {
+ *max = w->starved_cnt;
+ *max_idx = idx;
+ }
+}
+
#endif
struct rvt_qp *qp;
struct hfi1_qp_priv *priv;
unsigned long flags;
- unsigned i, n = 0;
+ uint i, n = 0, max_idx = 0;
+ u8 max_starved_cnt = 0;
if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
dd->send_contexts[sc->sw_index].type != SC_VL15)
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
+ iowait_starve_find_max(wait, &max_starved_cnt, n, &max_idx);
/* refcount held until actual wake up */
qps[n++] = qp;
}
}
write_sequnlock_irqrestore(&dev->iowait_lock, flags);
- for (i = 0; i < n; i++)
- hfi1_qp_wakeup(qps[i],
+ /* Wake up the most starved one first */
+ if (n)
+ hfi1_qp_wakeup(qps[max_idx],
RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
+ for (i = 0; i < n; i++)
+ if (i != max_idx)
+ hfi1_qp_wakeup(qps[i],
+ RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
}
/* translate a send credit update to a bit code of reasons */
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *stx,
- unsigned seq);
+ unsigned int seq,
+ bool pkts_sent);
static void iowait_wakeup(struct iowait *wait, int reason);
static void iowait_sdma_drained(struct iowait *wait);
static void qp_pio_drain(struct rvt_qp *qp);
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *stx,
- unsigned seq)
+ uint seq,
+ bool pkts_sent)
{
struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq);
struct rvt_qp *qp;
ibp->rvp.n_dmawait++;
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
- list_add_tail(&priv->s_iowait.list, &sde->dmawait);
+ iowait_queue(pkts_sent, &priv->s_iowait,
+ &sde->dmawait);
priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
rvt_get_qp(qp);
static bool schedule_send_yield(struct rvt_qp *qp,
struct hfi1_pkt_state *ps)
{
+ ps->pkts_sent = true;
+
if (unlikely(time_after(jiffies, ps->timeout))) {
if (!ps->in_thread ||
workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) {
ps.timeout = jiffies + ps.timeout_int;
ps.cpu = priv->s_sde ? priv->s_sde->cpu :
cpumask_first(cpumask_of_node(ps.ppd->dd->node));
+ ps.pkts_sent = false;
/* insure a pre-built packet is handled */
ps.s_txreq = get_waiting_verbs_txreq(qp);
spin_lock_irqsave(&qp->s_lock, ps.flags);
}
} while (make_req(qp, &ps));
-
+ iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
}
enum sdma_events event);
static void dump_sdma_state(struct sdma_engine *sde);
static void sdma_make_progress(struct sdma_engine *sde, u64 status);
-static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail);
+static void sdma_desc_avail(struct sdma_engine *sde, uint avail);
static void sdma_flush_descq(struct sdma_engine *sde);
/**
*
* This is called with head_lock held.
*/
-static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail)
+static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
{
struct iowait *wait, *nw;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
- unsigned i, n = 0, seq;
+ uint i, n = 0, seq, max_idx = 0;
struct sdma_txreq *stx;
struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
+ u8 max_starved_cnt = 0;
#ifdef CONFIG_SDMA_VERBOSITY
dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
if (num_desc > avail)
break;
avail -= num_desc;
+ /* Find the most starved wait memeber */
+ iowait_starve_find_max(wait, &max_starved_cnt,
+ n, &max_idx);
list_del_init(&wait->list);
waits[n++] = wait;
}
}
} while (read_seqretry(&dev->iowait_lock, seq));
+ /* Schedule the most starved one first */
+ if (n)
+ waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON);
+
for (i = 0; i < n; i++)
- waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
+ if (i != max_idx)
+ waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
}
/* head_lock must be held */
static int sdma_check_progress(
struct sdma_engine *sde,
struct iowait *wait,
- struct sdma_txreq *tx)
+ struct sdma_txreq *tx,
+ bool pkts_sent)
{
int ret;
seq = raw_seqcount_begin(
(const seqcount_t *)&sde->head_lock.seqcount);
- ret = wait->sleep(sde, wait, tx, seq);
+ ret = wait->sleep(sde, wait, tx, seq, pkts_sent);
if (ret == -EAGAIN)
sde->desc_avail = sdma_descq_freecnt(sde);
} else {
* @sde: sdma engine to use
* @wait: wait structure to use when full (may be NULL)
* @tx: sdma_txreq to submit
+ * @pkts_sent: has any packet been sent yet?
*
* The call submits the tx into the ring. If a iowait structure is non-NULL
* the packet will be queued to the list in wait.
*/
int sdma_send_txreq(struct sdma_engine *sde,
struct iowait *wait,
- struct sdma_txreq *tx)
+ struct sdma_txreq *tx,
+ bool pkts_sent)
{
int ret = 0;
u16 tail;
ret = -ECOMM;
goto unlock;
nodesc:
- ret = sdma_check_progress(sde, wait, tx);
+ ret = sdma_check_progress(sde, wait, tx, pkts_sent);
if (ret == -EAGAIN) {
ret = 0;
goto retry;
}
update_tail:
total_count = submit_count + flush_count;
- if (wait)
+ if (wait) {
iowait_sdma_add(wait, total_count);
+ iowait_starve_clear(submit_count > 0, wait);
+ }
if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail);
spin_unlock_irqrestore(&sde->tail_lock, flags);
ret = -ECOMM;
goto update_tail;
nodesc:
- ret = sdma_check_progress(sde, wait, tx);
+ ret = sdma_check_progress(sde, wait, tx, submit_count > 0);
if (ret == -EAGAIN) {
ret = 0;
goto retry;
int sdma_send_txreq(struct sdma_engine *sde,
struct iowait *wait,
- struct sdma_txreq *tx);
+ struct sdma_txreq *tx,
+ bool pkts_sent);
int sdma_send_txlist(struct sdma_engine *sde,
struct iowait *wait,
struct list_head *tx_list,
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *txreq,
- unsigned int seq);
+ uint seq,
+ bool pkts_sent);
static void activate_packet_queue(struct iowait *wait, int reason);
static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
unsigned long len);
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *txreq,
- unsigned seq)
+ uint seq,
+ bool pkts_sent)
{
struct hfi1_user_sdma_pkt_q *pq =
container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
write_seqlock(&dev->iowait_lock);
if (list_empty(&pq->busy.list))
- list_add_tail(&pq->busy.list, &sde->dmawait);
+ iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
write_sequnlock(&dev->iowait_lock);
return -EBUSY;
eagain:
if (unlikely(ret))
goto bail_build;
}
- ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq);
+ ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq,
+ ps->pkts_sent);
if (unlikely(ret < 0)) {
if (ret == -ECOMM)
goto bail_ecomm;
dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN);
qp->s_flags |= flag;
was_empty = list_empty(&sc->piowait);
- list_add_tail(&priv->s_iowait.list, &sc->piowait);
+ iowait_queue(ps->pkts_sent, &priv->s_iowait,
+ &sc->piowait);
priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
rvt_get_qp(qp);
unsigned long timeout_int;
int cpu;
bool in_thread;
+ bool pkts_sent;
};
#define HFI1_PSN_CREDIT 16
struct sdma_txreq stx;
unsigned int state;
u8 q_idx;
+ bool pkts_sent;
};
/**
goto free_desc;
tx->retry_count = 0;
- ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq);
+ ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq,
+ vnic_sdma->pkts_sent);
/* When -ECOMM, sdma callback will be called with ABORT status */
if (unlikely(ret && unlikely(ret != -ECOMM)))
goto free_desc;
+ if (!ret) {
+ vnic_sdma->pkts_sent = true;
+ iowait_starve_clear(vnic_sdma->pkts_sent, &vnic_sdma->wait);
+ }
return ret;
free_desc:
tx_err:
if (ret != -EBUSY)
dev_kfree_skb_any(skb);
+ else
+ vnic_sdma->pkts_sent = false;
return ret;
}
static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *txreq,
- unsigned int seq)
+ uint seq,
+ bool pkts_sent)
{
struct hfi1_vnic_sdma *vnic_sdma =
container_of(wait, struct hfi1_vnic_sdma, wait);
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
write_seqlock(&dev->iowait_lock);
if (list_empty(&vnic_sdma->wait.list))
- list_add_tail(&vnic_sdma->wait.list, &sde->dmawait);
+ iowait_queue(pkts_sent, wait, &sde->dmawait);
write_sequnlock(&dev->iowait_lock);
return -EBUSY;
}