struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
- /* Data for direct copy to user */
- struct {
- struct sk_buff_head prequeue;
- struct task_struct *task;
- struct msghdr *msg;
- int memory;
- int len;
- } ucopy;
-
u32 snd_wl1; /* Sequence for window update */
u32 snd_wnd; /* The window we expect to receive */
u32 max_window; /* Maximal window ever seen from peer */
__tcp_checksum_complete(skb);
}
-/* Prequeue for VJ style copy to user, combined with checksumming. */
-
-static inline void tcp_prequeue_init(struct tcp_sock *tp)
-{
- tp->ucopy.task = NULL;
- tp->ucopy.len = 0;
- tp->ucopy.memory = 0;
- skb_queue_head_init(&tp->ucopy.prequeue);
-}
-
-bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
int tcp_filter(struct sock *sk, struct sk_buff *skb);
tp->out_of_order_queue = RB_ROOT;
tcp_init_xmit_timers(sk);
- tcp_prequeue_init(tp);
INIT_LIST_HEAD(&tp->tsq_node);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tcp_send_ack(sk);
}
-static void tcp_prequeue_process(struct sock *sk)
-{
- struct sk_buff *skb;
- struct tcp_sock *tp = tcp_sk(sk);
-
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
-
- while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
- sk_backlog_rcv(sk, skb);
-
- /* Clear memory counter. */
- tp->ucopy.memory = 0;
-}
-
static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
{
struct sk_buff *skb;
int err;
int target; /* Read at least this many bytes */
long timeo;
- struct task_struct *user_recv = NULL;
struct sk_buff *skb, *last;
u32 urg_hole = 0;
tcp_cleanup_rbuf(sk, copied);
- if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
- /* Install new reader */
- if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
- user_recv = current;
- tp->ucopy.task = user_recv;
- tp->ucopy.msg = msg;
- }
-
- tp->ucopy.len = len;
-
- WARN_ON(tp->copied_seq != tp->rcv_nxt &&
- !(flags & (MSG_PEEK | MSG_TRUNC)));
-
- /* Ugly... If prequeue is not empty, we have to
- * process it before releasing socket, otherwise
- * order will be broken at second iteration.
- * More elegant solution is required!!!
- *
- * Look: we have the following (pseudo)queues:
- *
- * 1. packets in flight
- * 2. backlog
- * 3. prequeue
- * 4. receive_queue
- *
- * Each queue can be processed only if the next ones
- * are empty. At this point we have empty receive_queue.
- * But prequeue _can_ be not empty after 2nd iteration,
- * when we jumped to start of loop because backlog
- * processing added something to receive_queue.
- * We cannot release_sock(), because backlog contains
- * packets arrived _after_ prequeued ones.
- *
- * Shortly, algorithm is clear --- to process all
- * the queues in order. We could make it more directly,
- * requeueing packets from backlog to prequeue, if
- * is not empty. It is more elegant, but eats cycles,
- * unfortunately.
- */
- if (!skb_queue_empty(&tp->ucopy.prequeue))
- goto do_prequeue;
-
- /* __ Set realtime policy in scheduler __ */
- }
-
if (copied >= target) {
/* Do not sleep, just process backlog. */
release_sock(sk);
sk_wait_data(sk, &timeo, last);
}
- if (user_recv) {
- int chunk;
-
- /* __ Restore normal policy in scheduler __ */
-
- chunk = len - tp->ucopy.len;
- if (chunk != 0) {
- NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
- len -= chunk;
- copied += chunk;
- }
-
- if (tp->rcv_nxt == tp->copied_seq &&
- !skb_queue_empty(&tp->ucopy.prequeue)) {
-do_prequeue:
- tcp_prequeue_process(sk);
-
- chunk = len - tp->ucopy.len;
- if (chunk != 0) {
- NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
- len -= chunk;
- copied += chunk;
- }
- }
- }
if ((flags & MSG_PEEK) &&
(peek_seq - copied - urg_hole != tp->copied_seq)) {
net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
break;
} while (len > 0);
- if (user_recv) {
- if (!skb_queue_empty(&tp->ucopy.prequeue)) {
- int chunk;
-
- tp->ucopy.len = copied > 0 ? len : 0;
-
- tcp_prequeue_process(sk);
-
- if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
- NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
- len -= chunk;
- copied += chunk;
- }
- }
-
- tp->ucopy.task = NULL;
- tp->ucopy.len = 0;
- }
-
/* According to UNIX98, msg_name/msg_namelen are ignored
* on connected socket. I was just happy when found this 8) --ANK
*/
goto out_of_window;
/* Ok. In sequence. In window. */
- if (tp->ucopy.task == current &&
- tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
- sock_owned_by_user(sk) && !tp->urg_data) {
- int chunk = min_t(unsigned int, skb->len,
- tp->ucopy.len);
-
- __set_current_state(TASK_RUNNING);
-
- if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) {
- tp->ucopy.len -= chunk;
- tp->copied_seq += chunk;
- eaten = (chunk == skb->len);
- tcp_rcv_space_adjust(sk);
- }
- }
-
if (eaten <= 0) {
queue_and_out:
if (eaten < 0) {
}
}
-static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- int chunk = skb->len - hlen;
- int err;
-
- if (skb_csum_unnecessary(skb))
- err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
- else
- err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg);
-
- if (!err) {
- tp->ucopy.len -= chunk;
- tp->copied_seq += chunk;
- tcp_rcv_space_adjust(sk);
- }
-
- return err;
-}
-
/* Accept RST for rcv_nxt - 1 after a FIN.
* When tcp connections are abruptly terminated from Mac OSX (via ^C), a
* FIN is sent followed by a RST packet. The RST is sent with the same
int eaten = 0;
bool fragstolen = false;
- if (tp->ucopy.task == current &&
- tp->copied_seq == tp->rcv_nxt &&
- len - tcp_header_len <= tp->ucopy.len &&
- sock_owned_by_user(sk)) {
- __set_current_state(TASK_RUNNING);
-
- if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
- /* Predicted packet is in window by definition.
- * seq == rcv_nxt and rcv_wup <= rcv_nxt.
- * Hence, check seq<=rcv_wup reduces to:
- */
- if (tcp_header_len ==
- (sizeof(struct tcphdr) +
- TCPOLEN_TSTAMP_ALIGNED) &&
- tp->rcv_nxt == tp->rcv_wup)
- tcp_store_ts_recent(tp);
-
- tcp_rcv_rtt_measure_ts(sk, skb);
-
- __skb_pull(skb, tcp_header_len);
- tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
- NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPHPHITSTOUSER);
- eaten = 1;
- }
- }
if (!eaten) {
if (tcp_checksum_complete(skb))
goto csum_error;
}
}
-/* Packet is added to VJ-style prequeue for processing in process
- * context, if a reader task is waiting. Apparently, this exciting
- * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
- * failed somewhere. Latency? Burstiness? Well, at least now we will
- * see, why it failed. 8)8) --ANK
- *
- */
-bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (sysctl_tcp_low_latency || !tp->ucopy.task)
- return false;
-
- if (skb->len <= tcp_hdrlen(skb) &&
- skb_queue_len(&tp->ucopy.prequeue) == 0)
- return false;
-
- /* Before escaping RCU protected region, we need to take care of skb
- * dst. Prequeue is only enabled for established sockets.
- * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
- * Instead of doing full sk_rx_dst validity here, let's perform
- * an optimistic check.
- */
- if (likely(sk->sk_rx_dst))
- skb_dst_drop(skb);
- else
- skb_dst_force_safe(skb);
-
- __skb_queue_tail(&tp->ucopy.prequeue, skb);
- tp->ucopy.memory += skb->truesize;
- if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
- tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
- struct sk_buff *skb1;
-
- BUG_ON(sock_owned_by_user(sk));
- __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
- skb_queue_len(&tp->ucopy.prequeue));
-
- while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
- sk_backlog_rcv(sk, skb1);
-
- tp->ucopy.memory = 0;
- } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
- wake_up_interruptible_sync_poll(sk_sleep(sk),
- POLLIN | POLLRDNORM | POLLRDBAND);
- if (!inet_csk_ack_scheduled(sk))
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
- (3 * tcp_rto_min(sk)) / 4,
- TCP_RTO_MAX);
- }
- return true;
-}
-EXPORT_SYMBOL(tcp_prequeue);
-
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
{
u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
if (!sock_owned_by_user(sk)) {
- if (!tcp_prequeue(sk, skb))
- ret = tcp_v4_do_rcv(sk, skb);
+ ret = tcp_v4_do_rcv(sk, skb);
} else if (tcp_add_backlog(sk, skb)) {
goto discard_and_relse;
}
}
#endif
- /* Clean prequeue, it must be empty really */
- __skb_queue_purge(&tp->ucopy.prequeue);
-
/* Clean up a referenced TCP bind bucket. */
if (inet_csk(sk)->icsk_bind_hash)
inet_put_port(sk);
newtp->snd_sml = newtp->snd_una =
newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
- tcp_prequeue_init(newtp);
INIT_LIST_HEAD(&newtp->tsq_node);
tcp_init_wl(newtp, treq->rcv_isn);
/* Called with BH disabled */
void tcp_delack_timer_handler(struct sock *sk)
{
- struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
sk_mem_reclaim_partial(sk);
}
icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
- if (!skb_queue_empty(&tp->ucopy.prequeue)) {
- struct sk_buff *skb;
-
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
-
- while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
- sk_backlog_rcv(sk, skb);
-
- tp->ucopy.memory = 0;
- }
-
if (inet_csk_ack_scheduled(sk)) {
if (!icsk->icsk_ack.pingpong) {
/* Delayed ACK missed: inflate ATO. */
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
if (!sock_owned_by_user(sk)) {
- if (!tcp_prequeue(sk, skb))
- ret = tcp_v6_do_rcv(sk, skb);
+ ret = tcp_v6_do_rcv(sk, skb);
} else if (tcp_add_backlog(sk, skb)) {
goto discard_and_relse;
}