#define RDS_TRANS_COUNT 3
#define RDS_TRANS_NONE (~0)
+/* IOCTLS commands for SOL_RDS */
+#define SIOCRDSSETTOS (SIOCPROTOPRIVATE)
+#define SIOCRDSGETTOS (SIOCPROTOPRIVATE + 1)
+
+typedef __u8 rds_tos_t;
+
/*
* Control message types for SOL_RDS.
*
__be32 faddr;
__u8 transport[TRANSNAMSIZ]; /* null term ascii */
__u8 flags;
+ __u8 tos;
} __attribute__((packed));
struct rds6_info_connection {
__be16 lport;
__be16 fport;
__u8 flags;
+ __u8 tos;
} __attribute__((packed));
struct rds6_info_message {
__u32 last_sent_nxt;
__u32 last_expected_una;
__u32 last_seen_una;
+ __u8 tos;
} __attribute__((packed));
struct rds6_info_tcp_socket {
__u32 max_send_sge;
__u32 rdma_mr_max;
__u32 rdma_mr_size;
+ __u8 tos;
};
struct rds6_info_rdma_connection {
__u32 max_send_sge;
__u32 rdma_mr_max;
__u32 rdma_mr_size;
+ __u8 tos;
};
/* RDS message Receive Path Latency points */
static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
- return -ENOIOCTLCMD;
+ struct rds_sock *rs = rds_sk_to_rs(sock->sk);
+ rds_tos_t tos;
+
+ switch (cmd) {
+ case SIOCRDSSETTOS:
+ if (get_user(tos, (rds_tos_t __user *)arg))
+ return -EFAULT;
+
+ if (rs->rs_transport &&
+ rs->rs_transport->t_type == RDS_TRANS_TCP)
+ tos = 0;
+
+ spin_lock_bh(&rds_sock_lock);
+ if (rs->rs_tos || rs->rs_conn) {
+ spin_unlock_bh(&rds_sock_lock);
+ return -EINVAL;
+ }
+ rs->rs_tos = tos;
+ spin_unlock_bh(&rds_sock_lock);
+ break;
+ case SIOCRDSGETTOS:
+ spin_lock_bh(&rds_sock_lock);
+ tos = rs->rs_tos;
+ spin_unlock_bh(&rds_sock_lock);
+ if (put_user(tos, (rds_tos_t __user *)arg))
+ return -EFAULT;
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+
+ return 0;
}
static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
spin_lock_init(&rs->rs_rdma_lock);
rs->rs_rdma_keys = RB_ROOT;
rs->rs_rx_traces = 0;
+ rs->rs_tos = 0;
+ rs->rs_conn = NULL;
spin_lock_bh(&rds_sock_lock);
list_add_tail(&rs->rs_item, &rds_sock_list);
const struct in6_addr *laddr,
const struct in6_addr *faddr,
struct rds_transport *trans,
- int dev_if)
+ u8 tos, int dev_if)
{
struct rds_connection *conn, *ret = NULL;
if (ipv6_addr_equal(&conn->c_faddr, faddr) &&
ipv6_addr_equal(&conn->c_laddr, laddr) &&
conn->c_trans == trans &&
+ conn->c_tos == tos &&
net == rds_conn_net(conn) &&
conn->c_dev_if == dev_if) {
ret = conn;
const struct in6_addr *laddr,
const struct in6_addr *faddr,
struct rds_transport *trans,
- gfp_t gfp,
+ gfp_t gfp, u8 tos,
int is_outgoing,
int dev_if)
{
int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
rcu_read_lock();
- conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if);
+ conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos, dev_if);
if (conn &&
conn->c_loopback &&
conn->c_trans != &rds_loop_transport &&
conn->c_isv6 = !ipv6_addr_v4mapped(laddr);
conn->c_faddr = *faddr;
conn->c_dev_if = dev_if;
+ conn->c_tos = tos;
#if IS_ENABLED(CONFIG_IPV6)
/* If the local address is link local, set c_bound_if to be the
struct rds_connection *found;
found = rds_conn_lookup(net, head, laddr, faddr, trans,
- dev_if);
+ tos, dev_if);
if (found) {
struct rds_conn_path *cp;
int i;
struct rds_connection *rds_conn_create(struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *faddr,
- struct rds_transport *trans, gfp_t gfp,
- int dev_if)
+ struct rds_transport *trans, u8 tos,
+ gfp_t gfp, int dev_if)
{
- return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if);
+ return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 0, dev_if);
}
EXPORT_SYMBOL_GPL(rds_conn_create);
const struct in6_addr *laddr,
const struct in6_addr *faddr,
struct rds_transport *trans,
- gfp_t gfp, int dev_if)
+ u8 tos, gfp_t gfp, int dev_if)
{
- return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if);
+ return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 1, dev_if);
}
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
iinfo->src_addr = conn->c_laddr.s6_addr32[3];
iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
+ iinfo->tos = conn->c_tos;
memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
/* RDS/IB is not currently netns aware, thus init_net */
conn = rds_conn_create(&init_net, daddr6, saddr6,
- &rds_ib_transport, GFP_KERNEL, ifindex);
+ &rds_ib_transport, 0, GFP_KERNEL, ifindex);
if (IS_ERR(conn)) {
rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
conn = NULL;
pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n",
&conn->c_laddr, &conn->c_faddr);
conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION;
+ conn->c_tos = 0;
rds_conn_drop(conn);
}
rdsdebug("Connection rejected: %s\n",
unsigned int c_version;
possible_net_t c_net;
+ /* TOS */
+ u8 c_tos;
+
struct list_head c_map_item;
unsigned long c_map_queued;
u8 rs_rx_traces;
u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
struct rds_msg_zcopy_queue rs_zcookie_queue;
+ u8 rs_tos;
};
static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
struct rds_connection *rds_conn_create(struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *faddr,
- struct rds_transport *trans, gfp_t gfp,
+ struct rds_transport *trans,
+ u8 tos, gfp_t gfp,
int dev_if);
struct rds_connection *rds_conn_create_outgoing(struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *faddr,
struct rds_transport *trans,
- gfp_t gfp, int dev_if);
+ u8 tos, gfp_t gfp, int dev_if);
void rds_conn_shutdown(struct rds_conn_path *cpath);
void rds_conn_destroy(struct rds_connection *conn);
void rds_conn_drop(struct rds_connection *conn);
minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence);
minfo.len = be32_to_cpu(inc->i_hdr.h_len);
+ minfo.tos = inc->i_conn->c_tos;
if (flip) {
minfo.laddr = daddr;
/* rds_conn_create has a spinlock that runs with IRQ off.
* Caching the conn in the socket helps a lot. */
- if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr))
+ if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr)) {
conn = rs->rs_conn;
- else {
+ } else {
conn = rds_conn_create_outgoing(sock_net(sock->sk),
&rs->rs_bound_addr, &daddr,
- rs->rs_transport,
+ rs->rs_transport, 0,
sock->sk->sk_allocation,
scope_id);
if (IS_ERR(conn)) {
tsinfo.last_sent_nxt = tc->t_last_sent_nxt;
tsinfo.last_expected_una = tc->t_last_expected_una;
tsinfo.last_seen_una = tc->t_last_seen_una;
+ tsinfo.tos = tc->t_cpath->cp_conn->c_tos;
rds_info_copy(iter, &tsinfo, sizeof(tsinfo));
}
conn = rds_conn_create(sock_net(sock->sk),
my_addr, peer_addr,
- &rds_tcp_transport, GFP_KERNEL, dev_if);
+ &rds_tcp_transport, 0, GFP_KERNEL, dev_if);
if (IS_ERR(conn)) {
ret = PTR_ERR(conn);