lnet_nid_t ibp_nid; /* who's on the other end(s) */
struct lnet_ni *ibp_ni; /* LNet interface */
struct list_head ibp_conns; /* all active connections */
+ struct kib_conn *ibp_next_conn; /* next connection to send on for
+ * round robin */
struct list_head ibp_tx_queue; /* msgs waiting for a conn */
__u64 ibp_incarnation; /* incarnation of peer */
/* when (in jiffies) I was last alive */
/* current active connection attempts */
unsigned short ibp_connecting;
/* reconnect this peer later */
- unsigned short ibp_reconnecting:1;
+ unsigned char ibp_reconnecting;
/* counter of how many times we triggered a conn race */
unsigned char ibp_races;
/* # consecutive reconnection attempts to this peer */
static inline struct kib_conn *
kiblnd_get_conn_locked(struct kib_peer *peer)
{
+ struct list_head *next;
+
LASSERT(!list_empty(&peer->ibp_conns));
- /* just return the first connection */
- return list_entry(peer->ibp_conns.next, struct kib_conn, ibc_list);
+ /* Advance to next connection, be sure to skip the head node */
+ if (!peer->ibp_next_conn ||
+ peer->ibp_next_conn->ibc_list.next == &peer->ibp_conns)
+ next = peer->ibp_conns.next;
+ else
+ next = peer->ibp_next_conn->ibc_list.next;
+ peer->ibp_next_conn = list_entry(next, struct kib_conn, ibc_list);
+
+ return peer->ibp_next_conn;
}
static inline int
LASSERT(net);
LASSERT(peer->ibp_connecting > 0);
- LASSERT(!peer->ibp_reconnecting);
cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP,
IB_QPT_RC);
LASSERT(!peer->ibp_accepting && !peer->ibp_connecting &&
list_empty(&peer->ibp_conns));
- peer->ibp_reconnecting = 0;
+ peer->ibp_reconnecting--;
if (!kiblnd_peer_active(peer)) {
list_splice_init(&peer->ibp_tx_queue, &txs);
rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
unsigned long flags;
int rc;
+ int i;
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
/*
* If I get here, I've committed to send, so I complete the tx with
/* Brand new peer */
LASSERT(!peer->ibp_connecting);
- peer->ibp_connecting = 1;
+ tunables = &peer->ibp_ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ peer->ibp_connecting = tunables->lnd_conns_per_peer;
/* always called with a ref on ni, which prevents ni being shutdown */
LASSERT(!((struct kib_net *)ni->ni_data)->ibn_shutdown);
write_unlock_irqrestore(g_lock, flags);
- kiblnd_connect_peer(peer);
+ for (i = 0; i < tunables->lnd_conns_per_peer; i++)
+ kiblnd_connect_peer(peer);
kiblnd_peer_decref(peer);
}
}
dev = ((struct kib_net *)peer->ibp_ni->ni_data)->ibn_dev;
+ if (peer->ibp_next_conn == conn)
+ /* clear next_conn so it won't be used */
+ peer->ibp_next_conn = NULL;
list_del(&conn->ibc_list);
/* connd (see below) takes over ibc_list's ref */
kiblnd_conn_addref(conn);
write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- /* Schedule blocked txs */
+ /* Schedule blocked txs
+ * Note: if we are running with conns_per_peer > 1, these blocked
+ * txs will all get scheduled to the first connection which gets
+ * scheduled. We won't be using round robin on this first batch.
+ */
spin_lock(&conn->ibc_lock);
list_for_each_entry_safe(tx, tmp, &txs, tx_list) {
list_del(&tx->tx_list);
LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
LASSERT(peer->ibp_connecting > 0); /* 'conn' at least */
- LASSERT(!peer->ibp_reconnecting);
if (cp) {
msg_size = cp->ibcp_max_msg_size;
*/
reconnect = (!list_empty(&peer->ibp_tx_queue) ||
peer->ibp_version != version) &&
- peer->ibp_connecting == 1 &&
+ peer->ibp_connecting &&
!peer->ibp_accepting;
if (!reconnect) {
reason = "no need";
}
conn->ibc_reconnect = 1;
- peer->ibp_reconnecting = 1;
+ peer->ibp_reconnecting++;
peer->ibp_version = version;
if (incarnation)
peer->ibp_incarnation = incarnation;
module_param(nscheds, int, 0444);
MODULE_PARM_DESC(nscheds, "number of threads in each scheduler pool");
+static unsigned int conns_per_peer = 1;
+module_param(conns_per_peer, uint, 0444);
+MODULE_PARM_DESC(conns_per_peer, "number of connections per peer");
+
/* NB: this value is shared by all CPTs, it can grow at runtime */
static int ntx = 512;
module_param(ntx, int, 0444);
tunables->lnd_fmr_flush_trigger = fmr_flush_trigger;
if (!tunables->lnd_fmr_cache)
tunables->lnd_fmr_cache = fmr_cache;
+ if (!tunables->lnd_conns_per_peer) {
+ tunables->lnd_conns_per_peer = (conns_per_peer) ?
+ conns_per_peer : 1;
+ }
return 0;
}
default_tunables.lnd_fmr_pool_size = fmr_pool_size;
default_tunables.lnd_fmr_flush_trigger = fmr_flush_trigger;
default_tunables.lnd_fmr_cache = fmr_cache;
+ default_tunables.lnd_conns_per_peer = conns_per_peer;
}