From: Andy Grover Date: Tue, 12 Jan 2010 20:15:02 +0000 (-0800) Subject: RDS: break out rdma and data ops into nested structs in rds_message X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=e779137aa76d38d5c33a98ed887092ae4e4f016f;p=openwrt%2Fstaging%2Fblogic.git RDS: break out rdma and data ops into nested structs in rds_message Clearly separate rdma-related variables in rm from data-related ones. This is in anticipation of adding atomic support. Signed-off-by: Andy Grover --- diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 0b0090d2ee01..53750203c9e5 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -83,11 +83,11 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, rdsdebug("ic %p send %p rm %p\n", ic, send, rm); ib_dma_unmap_sg(ic->i_cm_id->device, - rm->m_sg, rm->m_nents, - DMA_TO_DEVICE); + rm->data.m_sg, rm->data.m_nents, + DMA_TO_DEVICE); - if (rm->m_rdma_op) { - rds_ib_send_unmap_rdma(ic, rm->m_rdma_op); + if (rm->rdma.m_rdma_op) { + rds_ib_send_unmap_rdma(ic, rm->rdma.m_rdma_op); /* If the user asked for a completion notification on this * message, we can implement three different semantics: @@ -111,10 +111,10 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, */ rds_ib_send_rdma_complete(rm, wc_status); - if (rm->m_rdma_op->r_write) - rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); + if (rm->rdma.m_rdma_op->r_write) + rds_stats_add(s_send_rdma_bytes, rm->rdma.m_rdma_op->r_bytes); else - rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); + rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op->r_bytes); } /* If anyone waited for this message to get flushed out, wake @@ -244,8 +244,8 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) rm = rds_send_get_message(conn, send->s_op); if (rm) { - if (rm->m_rdma_op) - rds_ib_send_unmap_rdma(ic, rm->m_rdma_op); + if (rm->rdma.m_rdma_op) + rds_ib_send_unmap_rdma(ic, rm->rdma.m_rdma_op); rds_ib_send_rdma_complete(rm, wc.status); rds_message_put(rm); } @@ -532,18 +532,20 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, rm->m_inc.i_hdr.h_flags, be32_to_cpu(rm->m_inc.i_hdr.h_len)); */ - if (rm->m_nents) { - rm->m_count = ib_dma_map_sg(dev, - rm->m_sg, rm->m_nents, DMA_TO_DEVICE); - rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count); - if (rm->m_count == 0) { + if (rm->data.m_nents) { + rm->data.m_count = ib_dma_map_sg(dev, + rm->data.m_sg, + rm->data.m_nents, + DMA_TO_DEVICE); + rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.m_count); + if (rm->data.m_count == 0) { rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); ret = -ENOMEM; /* XXX ? */ goto out; } } else { - rm->m_count = 0; + rm->data.m_count = 0; } ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs; @@ -559,10 +561,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, /* If it has a RDMA op, tell the peer we did it. This is * used by the peer to release use-once RDMA MRs. */ - if (rm->m_rdma_op) { + if (rm->rdma.m_rdma_op) { struct rds_ext_header_rdma ext_hdr; - ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key); + ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.m_rdma_op->r_key); rds_message_add_extension(&rm->m_inc.i_hdr, RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); } @@ -590,7 +592,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, send = &ic->i_sends[pos]; first = send; prev = NULL; - scat = &rm->m_sg[sg]; + scat = &rm->data.m_sg[sg]; sent = 0; i = 0; @@ -600,7 +602,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, * or when requested by the user. Right now, we let * the application choose. */ - if (rm->m_rdma_op && rm->m_rdma_op->r_fence) + if (rm->rdma.m_rdma_op && rm->rdma.m_rdma_op->r_fence) send_flags = IB_SEND_FENCE; /* @@ -619,7 +621,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, } /* if there's data reference it with a chain of work reqs */ - for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) { + for (; i < work_alloc && scat != &rm->data.m_sg[rm->data.m_count]; i++) { unsigned int len; send = &ic->i_sends[pos]; @@ -697,7 +699,7 @@ add_header: sent += sizeof(struct rds_header); /* if we finished the message then send completion owns it */ - if (scat == &rm->m_sg[rm->m_count]) { + if (scat == &rm->data.m_sg[rm->data.m_count]) { prev->s_rm = ic->i_rm; prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; ic->i_rm = NULL; diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index dced532f9cfb..c187e8fdeab1 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -83,11 +83,11 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic, rdsdebug("ic %p send %p rm %p\n", ic, send, rm); ib_dma_unmap_sg(ic->i_cm_id->device, - rm->m_sg, rm->m_nents, + rm->data.m_sg, rm->data.m_nents, DMA_TO_DEVICE); - if (rm->m_rdma_op) { - rds_iw_send_unmap_rdma(ic, rm->m_rdma_op); + if (rm->rdma.m_rdma_op) { + rds_iw_send_unmap_rdma(ic, rm->rdma.m_rdma_op); /* If the user asked for a completion notification on this * message, we can implement three different semantics: @@ -111,10 +111,10 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic, */ rds_iw_send_rdma_complete(rm, wc_status); - if (rm->m_rdma_op->r_write) - rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); + if (rm->rdma.m_rdma_op->r_write) + rds_stats_add(s_send_rdma_bytes, rm->rdma.m_rdma_op->r_bytes); else - rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); + rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op->r_bytes); } /* If anyone waited for this message to get flushed out, wake @@ -563,18 +563,20 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, rm->m_inc.i_hdr.h_flags, be32_to_cpu(rm->m_inc.i_hdr.h_len)); */ - if (rm->m_nents) { - rm->m_count = ib_dma_map_sg(dev, - rm->m_sg, rm->m_nents, DMA_TO_DEVICE); - rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count); - if (rm->m_count == 0) { + if (rm->data.m_nents) { + rm->data.m_count = ib_dma_map_sg(dev, + rm->data.m_sg, + rm->data.m_nents, + DMA_TO_DEVICE); + rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.m_count); + if (rm->data.m_count == 0) { rds_iw_stats_inc(s_iw_tx_sg_mapping_failure); rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); ret = -ENOMEM; /* XXX ? */ goto out; } } else { - rm->m_count = 0; + rm->data.m_count = 0; } ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; @@ -590,10 +592,10 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, /* If it has a RDMA op, tell the peer we did it. This is * used by the peer to release use-once RDMA MRs. */ - if (rm->m_rdma_op) { + if (rm->rdma.m_rdma_op) { struct rds_ext_header_rdma ext_hdr; - ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key); + ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.m_rdma_op->r_key); rds_message_add_extension(&rm->m_inc.i_hdr, RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); } @@ -621,7 +623,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, send = &ic->i_sends[pos]; first = send; prev = NULL; - scat = &rm->m_sg[sg]; + scat = &rm->data.m_sg[sg]; sent = 0; i = 0; @@ -631,7 +633,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, * or when requested by the user. Right now, we let * the application choose. */ - if (rm->m_rdma_op && rm->m_rdma_op->r_fence) + if (rm->rdma.m_rdma_op && rm->rdma.m_rdma_op->r_fence) send_flags = IB_SEND_FENCE; /* @@ -650,7 +652,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, } /* if there's data reference it with a chain of work reqs */ - for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) { + for (; i < work_alloc && scat != &rm->data.m_sg[rm->data.m_count]; i++) { unsigned int len; send = &ic->i_sends[pos]; @@ -728,7 +730,7 @@ add_header: sent += sizeof(struct rds_header); /* if we finished the message then send completion owns it */ - if (scat == &rm->m_sg[rm->m_count]) { + if (scat == &rm->data.m_sg[rm->data.m_count]) { prev->s_rm = ic->i_rm; prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; ic->i_rm = NULL; diff --git a/net/rds/message.c b/net/rds/message.c index 809656c2b25c..4421d160b1a4 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -63,17 +63,17 @@ static void rds_message_purge(struct rds_message *rm) if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) return; - for (i = 0; i < rm->m_nents; i++) { - rdsdebug("putting data page %p\n", (void *)sg_page(&rm->m_sg[i])); + for (i = 0; i < rm->data.m_nents; i++) { + rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.m_sg[i])); /* XXX will have to put_page for page refs */ - __free_page(sg_page(&rm->m_sg[i])); + __free_page(sg_page(&rm->data.m_sg[i])); } - rm->m_nents = 0; + rm->data.m_nents = 0; - if (rm->m_rdma_op) - rds_rdma_free_op(rm->m_rdma_op); - if (rm->m_rdma_mr) - rds_mr_put(rm->m_rdma_mr); + if (rm->rdma.m_rdma_op) + rds_rdma_free_op(rm->rdma.m_rdma_op); + if (rm->rdma.m_rdma_mr) + rds_mr_put(rm->rdma.m_rdma_mr); } void rds_message_inc_purge(struct rds_incoming *inc) @@ -224,7 +224,7 @@ struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp) goto out; if (nents) - sg_init_table(rm->m_sg, nents); + sg_init_table(rm->data.m_sg, nents); atomic_set(&rm->m_refcount, 1); INIT_LIST_HEAD(&rm->m_sock_item); INIT_LIST_HEAD(&rm->m_conn_item); @@ -245,10 +245,10 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); - rm->m_nents = ceil(total_len, PAGE_SIZE); + rm->data.m_nents = ceil(total_len, PAGE_SIZE); - for (i = 0; i < rm->m_nents; ++i) { - sg_set_page(&rm->m_sg[i], + for (i = 0; i < rm->data.m_nents; ++i) { + sg_set_page(&rm->data.m_sg[i], virt_to_page(page_addrs[i]), PAGE_SIZE, 0); } @@ -278,7 +278,7 @@ struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, /* * now allocate and copy in the data payload. */ - sg = rm->m_sg; + sg = rm->data.m_sg; iov = first_iov; iov_off = 0; sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ @@ -289,7 +289,7 @@ struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, GFP_HIGHUSER); if (ret) goto out; - rm->m_nents++; + rm->data.m_nents++; sg_off = 0; } @@ -348,7 +348,7 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc, iov = first_iov; iov_off = 0; - sg = rm->m_sg; + sg = rm->data.m_sg; vec_off = 0; copied = 0; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index dee698b979af..24274bb9e329 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -643,14 +643,14 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, struct rds_rdma_op *op; if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) || - rm->m_rdma_op) + rm->rdma.m_rdma_op) return -EINVAL; op = rds_rdma_prepare(rs, CMSG_DATA(cmsg)); if (IS_ERR(op)) return PTR_ERR(op); rds_stats_inc(s_send_rdma); - rm->m_rdma_op = op; + rm->rdma.m_rdma_op = op; return 0; } @@ -679,6 +679,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, */ r_key = rds_rdma_cookie_key(rm->m_rdma_cookie); + spin_lock_irqsave(&rs->rs_rdma_lock, flags); mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); if (!mr) @@ -689,7 +690,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, if (mr) { mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE); - rm->m_rdma_mr = mr; + rm->rdma.m_rdma_mr = mr; } return err; } @@ -707,5 +708,5 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, rm->m_rdma_cookie != 0) return -EINVAL; - return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->m_rdma_mr); + return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.m_rdma_mr); } diff --git a/net/rds/rds.h b/net/rds/rds.h index 1d3eef67137f..07a750b3fb31 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -259,12 +259,18 @@ struct rds_message { */ spinlock_t m_rs_lock; struct rds_sock *m_rs; - struct rds_rdma_op *m_rdma_op; rds_rdma_cookie_t m_rdma_cookie; - struct rds_mr *m_rdma_mr; - unsigned int m_nents; - unsigned int m_count; - struct scatterlist m_sg[0]; + struct { + struct { + struct rds_rdma_op *m_rdma_op; + struct rds_mr *m_rdma_mr; + } rdma; + struct { + unsigned int m_nents; + unsigned int m_count; + struct scatterlist m_sg[0]; + } data; + }; }; /* diff --git a/net/rds/send.c b/net/rds/send.c index 817997daf785..19dfd025498e 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -166,7 +166,7 @@ int rds_send_xmit(struct rds_connection *conn) rm = conn->c_xmit_rm; if (rm && conn->c_xmit_hdr_off == sizeof(struct rds_header) && - conn->c_xmit_sg == rm->m_nents) { + conn->c_xmit_sg == rm->data.m_nents) { conn->c_xmit_rm = NULL; conn->c_xmit_sg = 0; conn->c_xmit_hdr_off = 0; @@ -236,7 +236,7 @@ int rds_send_xmit(struct rds_connection *conn) * connection. * Therefore, we never retransmit messages with RDMA ops. */ - if (rm->m_rdma_op && + if (rm->rdma.m_rdma_op && test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) { spin_lock_irqsave(&conn->c_lock, flags); if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) @@ -268,8 +268,8 @@ int rds_send_xmit(struct rds_connection *conn) * keep this simple and require that the transport either * send the whole rdma or none of it. */ - if (rm->m_rdma_op && !conn->c_xmit_rdma_sent) { - ret = conn->c_trans->xmit_rdma(conn, rm->m_rdma_op); + if (rm->rdma.m_rdma_op && !conn->c_xmit_rdma_sent) { + ret = conn->c_trans->xmit_rdma(conn, rm->rdma.m_rdma_op); if (ret) break; conn->c_xmit_rdma_sent = 1; @@ -279,7 +279,7 @@ int rds_send_xmit(struct rds_connection *conn) } if (conn->c_xmit_hdr_off < sizeof(struct rds_header) || - conn->c_xmit_sg < rm->m_nents) { + conn->c_xmit_sg < rm->data.m_nents) { ret = conn->c_trans->xmit(conn, rm, conn->c_xmit_hdr_off, conn->c_xmit_sg, @@ -295,7 +295,7 @@ int rds_send_xmit(struct rds_connection *conn) ret -= tmp; } - sg = &rm->m_sg[conn->c_xmit_sg]; + sg = &rm->data.m_sg[conn->c_xmit_sg]; while (ret) { tmp = min_t(int, ret, sg->length - conn->c_xmit_data_off); @@ -306,7 +306,7 @@ int rds_send_xmit(struct rds_connection *conn) sg++; conn->c_xmit_sg++; BUG_ON(ret != 0 && - conn->c_xmit_sg == rm->m_nents); + conn->c_xmit_sg == rm->data.m_nents); } } } @@ -419,7 +419,7 @@ void rds_rdma_send_complete(struct rds_message *rm, int status) spin_lock_irqsave(&rm->m_rs_lock, flags); - ro = rm->m_rdma_op; + ro = rm->rdma.m_rdma_op; if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) && ro && ro->r_notify && ro->r_notifier) { notifier = ro->r_notifier; @@ -453,7 +453,7 @@ __rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status { struct rds_rdma_op *ro; - ro = rm->m_rdma_op; + ro = rm->rdma.m_rdma_op; if (ro && ro->r_notify && ro->r_notifier) { ro->r_notifier->n_status = status; list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue); @@ -477,7 +477,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn, spin_lock_irqsave(&conn->c_lock, flags); list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { - if (rm->m_rdma_op == op) { + if (rm->rdma.m_rdma_op == op) { atomic_inc(&rm->m_refcount); found = rm; goto out; @@ -485,7 +485,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn, } list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { - if (rm->m_rdma_op == op) { + if (rm->rdma.m_rdma_op == op) { atomic_inc(&rm->m_refcount); found = rm; break; @@ -545,7 +545,7 @@ void rds_send_remove_from_sock(struct list_head *messages, int status) spin_lock(&rs->rs_lock); if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { - struct rds_rdma_op *ro = rm->m_rdma_op; + struct rds_rdma_op *ro = rm->rdma.m_rdma_op; struct rds_notifier *notifier; list_del_init(&rm->m_sock_item); @@ -557,7 +557,7 @@ void rds_send_remove_from_sock(struct list_head *messages, int status) &rs->rs_notify_queue); if (!notifier->n_status) notifier->n_status = status; - rm->m_rdma_op->r_notifier = NULL; + rm->rdma.m_rdma_op->r_notifier = NULL; } was_on_sock = 1; rm->m_rs = NULL; @@ -874,11 +874,11 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, if (ret) goto out; - if ((rm->m_rdma_cookie || rm->m_rdma_op) && + if ((rm->m_rdma_cookie || rm->rdma.m_rdma_op) && !conn->c_trans->xmit_rdma) { if (printk_ratelimit()) printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", - rm->m_rdma_op, conn->c_trans->xmit_rdma); + rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma); ret = -EOPNOTSUPP; goto out; } diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index e5f6ccef79ef..d63aa35ac673 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -166,21 +166,21 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, goto out; } - while (sg < rm->m_nents) { + while (sg < rm->data.m_nents) { ret = tc->t_sock->ops->sendpage(tc->t_sock, - sg_page(&rm->m_sg[sg]), - rm->m_sg[sg].offset + off, - rm->m_sg[sg].length - off, + sg_page(&rm->data.m_sg[sg]), + rm->data.m_sg[sg].offset + off, + rm->data.m_sg[sg].length - off, MSG_DONTWAIT|MSG_NOSIGNAL); - rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->m_sg[sg]), - rm->m_sg[sg].offset + off, rm->m_sg[sg].length - off, + rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.m_sg[sg]), + rm->data.m_sg[sg].offset + off, rm->data.m_sg[sg].length - off, ret); if (ret <= 0) break; off += ret; done += ret; - if (off == rm->m_sg[sg].length) { + if (off == rm->data.m_sg[sg].length) { off = 0; sg++; }