From ac16ca6412d9feb5b2f8fc76a4ed938b5d107f94 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 22 Nov 2006 20:26:11 -0800 Subject: [NET]: Fix kfifo_alloc() error check. The return value of kfifo_alloc() should be checked by IS_ERR(). Signed-off-by: Akinobu Mita Signed-off-by: David S. Miller --- net/dccp/probe.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/dccp') diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 146496fce2e2..fded1493c1dc 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -160,6 +160,8 @@ static __init int dccpprobe_init(void) init_waitqueue_head(&dccpw.wait); spin_lock_init(&dccpw.lock); dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock); + if (IS_ERR(dccpw.fifo)) + return PTR_ERR(dccpw.fifo); if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops)) goto err0; -- cgit v1.2.3 From 72a3effaf633bcae9034b7e176bdbd78d64a71db Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Nov 2006 02:30:37 -0800 Subject: [NET]: Size listen hash tables using backlog hint We currently allocate a fixed size (TCP_SYNQ_HSIZE=512) slots hash table for each LISTEN socket, regardless of various parameters (listen backlog for example) On x86_64, this means order-1 allocations (might fail), even for 'small' sockets, expecting few connections. On the contrary, a huge server wanting a backlog of 50000 is slowed down a bit because of this fixed limit. This patch makes the sizing of listen hash table a dynamic parameter, depending of : - net.core.somaxconn tunable (default is 128) - net.ipv4.tcp_max_syn_backlog tunable (default : 256, 1024 or 128) - backlog value given by user application (2nd parameter of listen()) For large allocations (bigger than PAGE_SIZE), we use vmalloc() instead of kmalloc(). We still limit memory allocation with the two existing tunables (somaxconn & tcp_max_syn_backlog). So for standard setups, this patch actually reduce RAM usage. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 +- net/dccp/proto.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index e08e7688a263..0a5d68dbb418 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1022,7 +1022,7 @@ static void dccp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -static struct request_sock_ops dccp_request_sock_ops = { +static struct request_sock_ops dccp_request_sock_ops _read_mostly = { .family = PF_INET, .obj_size = sizeof(struct dccp_request_sock), .rtx_syn_ack = dccp_v4_send_response, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 72cbdcfc2c65..047d170a363a 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -262,12 +262,12 @@ int dccp_destroy_sock(struct sock *sk) EXPORT_SYMBOL_GPL(dccp_destroy_sock); -static inline int dccp_listen_start(struct sock *sk) +static inline int dccp_listen_start(struct sock *sk, int backlog) { struct dccp_sock *dp = dccp_sk(sk); dp->dccps_role = DCCP_ROLE_LISTEN; - return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); + return inet_csk_listen_start(sk, backlog); } int dccp_disconnect(struct sock *sk, int flags) @@ -788,7 +788,7 @@ int inet_dccp_listen(struct socket *sock, int backlog) * FIXME: here it probably should be sk->sk_prot->listen_start * see tcp_listen_start */ - err = dccp_listen_start(sk); + err = dccp_listen_start(sk, backlog); if (err) goto out; } -- cgit v1.2.3 From 494b4e7d819246bad67c40897b9eeaf0ce18d5ff Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 9 Nov 2006 16:23:22 -0800 Subject: [DCCP]: Fix typo _read_mostly --> __read_mostly. Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 0a5d68dbb418..de64e6c7f93d 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1022,7 +1022,7 @@ static void dccp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -static struct request_sock_ops dccp_request_sock_ops _read_mostly = { +static struct request_sock_ops dccp_request_sock_ops __read_mostly = { .family = PF_INET, .obj_size = sizeof(struct dccp_request_sock), .rtx_syn_ack = dccp_v4_send_response, -- cgit v1.2.3 From 931731123a103cfb3f70ac4b7abfc71d94ba1f03 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 9 Nov 2006 19:58:25 -0800 Subject: [TCP]: Don't set SKB owner in tcp_transmit_skb(). The data itself is already charged to the SKB, doing the skb_set_owner_w() just generates a lot of noise and extra atomics we don't really need. Lmbench improvements on lat_tcp are minimal: before: TCP latency using localhost: 23.2701 microseconds TCP latency using localhost: 23.1994 microseconds TCP latency using localhost: 23.2257 microseconds after: TCP latency using localhost: 22.8380 microseconds TCP latency using localhost: 22.9465 microseconds TCP latency using localhost: 22.8462 microseconds Signed-off-by: David S. Miller --- net/dccp/output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/output.c b/net/dccp/output.c index 7102e3aed4ca..2cc4f4b2a9dd 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -125,7 +125,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) DCCP_INC_STATS(DCCP_MIB_OUTSEGS); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = icsk->icsk_af_ops->queue_xmit(skb, 0); + err = icsk->icsk_af_ops->queue_xmit(skb, sk, 0); if (err <= 0) return err; @@ -426,7 +426,7 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) code); if (skb != NULL) { memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, 0); + err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, sk, 0); if (err == NET_XMIT_CN) err = 0; } -- cgit v1.2.3 From 60361be1be7854cbffb6dc268d1bc094da33431c Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 10 Nov 2006 02:13:56 -0200 Subject: [DCCP]: set safe upper bound for option length This is a re-send from http://www.mail-archive.com/dccp@vger.kernel.org/msg00553.html It is the same patch as before, but I have built in Arnaldo's suggestions pointed out in that posting. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/dccp.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 272e8584564e..9f00dd807e04 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -40,12 +40,18 @@ extern void dccp_tw_deschedule(struct inet_timewait_sock *tw); extern void dccp_time_wait(struct sock *sk, int state, int timeo); -/* FIXME: Right size this */ -#define DCCP_MAX_OPT_LEN 128 - -#define DCCP_MAX_PACKET_HDR 32 - -#define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER) +/* + * Set safe upper bounds for header and option length. Since Data Offset is 8 + * bits (RFC 4340, sec. 5.1), the total header length can never be more than + * 4 * 255 = 1020 bytes. The largest possible header length is 28 bytes (X=1): + * - DCCP-Response with ACK Subheader and 4 bytes of Service code OR + * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields + * Hence a safe upper bound for the maximum option length is 1020-28 = 992 + */ +#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(int)) +#define DCCP_MAX_PACKET_HDR 28 +#define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR) +#define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER) #define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */ -- cgit v1.2.3 From 89e7e57778ecd8744fee97491300f05a9fb1388a Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 10 Nov 2006 11:13:33 -0200 Subject: [DCCPv6]: Add a FIXME for missing IPV6_PKTOPTIONS This refers to the possible memory leak pointed out in http://www.mail-archive.com/dccp@vger.kernel.org/msg00574.html, fixed by David Miller in http://www.mail-archive.com/netdev@vger.kernel.org/msg24881.html and adds a FIXME to point out where code is missing. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ipv6.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index fc4242c0767c..6f1c2ad88608 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -990,13 +990,17 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) + /* + * FIXME: Add handling of IPV6_PKTOPTIONS skb. See the comments below + * (wrt ipv6_pktopions) and net/ipv6/tcp_ipv6.c for an example. + */ opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) goto reset; if (opt_skb) { - /* This is where we would goto ipv6_pktoptions. */ + /* XXX This is where we would goto ipv6_pktoptions. */ __kfree_skb(opt_skb); } return 0; @@ -1024,7 +1028,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len)) goto reset; if (opt_skb) { - /* This is where we would goto ipv6_pktoptions. */ + /* XXX This is where we would goto ipv6_pktoptions. */ __kfree_skb(opt_skb); } return 0; -- cgit v1.2.3 From 9b42078ed6edfe04e9dc9a59b946ad912aeef717 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 10 Nov 2006 11:22:32 -0200 Subject: [DCCP]: Combine allocating & zeroing header space on skb This is a code simplification: it combines three often recurring operations into one inline function, * allocate `len' bytes header space in skb * fill these `len' bytes with zeroes * cast the start of this header space as dccp_hdr Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ipv4.c | 9 ++------- net/dccp/ipv6.c | 8 ++------ net/dccp/output.c | 14 +++----------- 3 files changed, 7 insertions(+), 24 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index de64e6c7f93d..ce8eed32dbeb 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -212,12 +212,9 @@ static void dccp_v4_reqsk_send_ack(struct sk_buff *rxskb, /* Reserve space for headers. */ skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header); - skb->dst = dst_clone(rxskb->dst); - skb->h.raw = skb_push(skb, dccp_hdr_ack_len); - dh = dccp_hdr(skb); - memset(dh, 0, dccp_hdr_ack_len); + dh = dccp_zeroed_hdr(skb, dccp_hdr_ack_len); /* Build DCCP header and checksum it. */ dh->dccph_type = DCCP_PKT_ACK; @@ -720,9 +717,7 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header); skb->dst = dst_clone(dst); - skb->h.raw = skb_push(skb, dccp_hdr_reset_len); - dh = dccp_hdr(skb); - memset(dh, 0, dccp_hdr_reset_len); + dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len); /* Build DCCP header and checksum it. */ dh->dccph_type = DCCP_PKT_RESET; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6f1c2ad88608..116bddb64b80 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -537,9 +537,7 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header); - skb->h.raw = skb_push(skb, dccp_hdr_reset_len); - dh = dccp_hdr(skb); - memset(dh, 0, dccp_hdr_reset_len); + dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len); /* Swap the send and the receive. */ dh->dccph_type = DCCP_PKT_RESET; @@ -601,9 +599,7 @@ static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb, skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header); - skb->h.raw = skb_push(skb, dccp_hdr_ack_len); - dh = dccp_hdr(skb); - memset(dh, 0, dccp_hdr_ack_len); + dh = dccp_zeroed_hdr(skb, dccp_hdr_ack_len); /* Build DCCP header and checksum it. */ dh->dccph_type = DCCP_PKT_ACK; diff --git a/net/dccp/output.c b/net/dccp/output.c index 2cc4f4b2a9dd..1ae2248557c6 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -88,11 +88,9 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) return -EPROTO; } - skb->h.raw = skb_push(skb, dccp_header_size); - dh = dccp_hdr(skb); /* Build DCCP header and checksum it. */ - memset(dh, 0, dccp_header_size); + dh = dccp_zeroed_hdr(skb, dccp_header_size); dh->dccph_type = dcb->dccpd_type; dh->dccph_sport = inet->sport; dh->dccph_dport = inet->dport; @@ -340,10 +338,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, return NULL; } - skb->h.raw = skb_push(skb, dccp_header_size); - - dh = dccp_hdr(skb); - memset(dh, 0, dccp_header_size); + dh = dccp_zeroed_hdr(skb, dccp_header_size); dh->dccph_sport = inet_sk(sk)->sport; dh->dccph_dport = inet_rsk(req)->rmt_port; @@ -392,10 +387,7 @@ static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, return NULL; } - skb->h.raw = skb_push(skb, dccp_header_size); - - dh = dccp_hdr(skb); - memset(dh, 0, dccp_header_size); + dh = dccp_zeroed_hdr(skb, dccp_header_size); dh->dccph_sport = inet_sk(sk)->sport; dh->dccph_dport = inet_sk(sk)->dport; -- cgit v1.2.3 From d23c7107bfbaac955289685c522c7ff99dad3780 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 10 Nov 2006 11:46:34 -0200 Subject: [DCCP]: Simplify jump labels in dccp_v{4,6}_rcv This is a code simplification and was singled out from the DCCPv6 Oops patch on http://www.mail-archive.com/dccp@vger.kernel.org/msg00600.html It mainly makes the code consistent between ipv{4,6}.c for the functions dccp_v4_rcv dccp_v6_rcv and removes the do_time_wait label to simplify code somewhat. Commiter note: fixed up a compile problem, trivial. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ipv4.c | 15 ++++----------- net/dccp/ipv6.c | 22 ++++++++++------------ 2 files changed, 14 insertions(+), 23 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ce8eed32dbeb..7107885ada7e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -910,8 +910,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) dccp_pr_debug_cat("\n"); } else { DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); - dccp_pr_debug_cat(", ack=%llu\n", - (unsigned long long) + dccp_pr_debug_cat(", ack=%llu\n", (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); } @@ -940,11 +939,10 @@ static int dccp_v4_rcv(struct sk_buff *skb) * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ - if (sk->sk_state == DCCP_TIME_WAIT) { - dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: " - "do_time_wait\n"); - goto do_time_wait; + dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n"); + inet_twsk_put(inet_twsk(sk)); + goto no_dccp_socket; } if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) @@ -968,17 +966,12 @@ no_dccp_socket: } discard_it: - /* Discard frame. */ kfree_skb(skb); return 0; discard_and_relse: sock_put(sk); goto discard_it; - -do_time_wait: - inet_twsk_put(inet_twsk(sk)); - goto no_dccp_socket; } static struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 116bddb64b80..dee085301576 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1071,8 +1071,11 @@ static int dccp_v6_rcv(struct sk_buff **pskb) * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ - if (sk == NULL) + if (sk == NULL) { + dccp_pr_debug("failed to look up flow ID in table and " + "get corresponding socket\n"); goto no_dccp_socket; + } /* * Step 2: @@ -1080,8 +1083,11 @@ static int dccp_v6_rcv(struct sk_buff **pskb) * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ - if (sk->sk_state == DCCP_TIME_WAIT) - goto do_time_wait; + if (sk->sk_state == DCCP_TIME_WAIT) { + dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n"); + inet_twsk_put(inet_twsk(sk)); + goto no_dccp_socket; + } if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; @@ -1101,22 +1107,14 @@ no_dccp_socket: DCCP_RESET_CODE_NO_CONNECTION; dccp_v6_ctl_send_reset(skb); } -discard_it: - - /* - * Discard frame - */ +discard_it: kfree_skb(skb); return 0; discard_and_relse: sock_put(sk); goto discard_it; - -do_time_wait: - inet_twsk_put(inet_twsk(sk)); - goto no_dccp_socket; } static struct inet_connection_sock_af_ops dccp_ipv6_af_ops = { -- cgit v1.2.3 From f6484f7c7ad22e4bb018875c386d6a7aaa441426 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 10 Nov 2006 12:01:52 -0200 Subject: [DCCP] timewait: Remove leftover extern declarations Gerrit Renker noticed dccp_tw_deschedule and submitted a patch with a FIXME, but as he suggests in the same patch the best thing is to just ditch this declaration, while doing that also noticed that tcp_tw_count is as well not defined anywhere, so ditch it too. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/dccp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 9f00dd807e04..8964b188aba3 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -35,8 +35,6 @@ extern int dccp_debug; extern struct inet_hashinfo dccp_hashinfo; extern atomic_t dccp_orphan_count; -extern int dccp_tw_count; -extern void dccp_tw_deschedule(struct inet_timewait_sock *tw); extern void dccp_time_wait(struct sock *sk, int state, int timeo); -- cgit v1.2.3 From 8a73cd09d96aa01743316657fc4e6864fe79b703 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 10 Nov 2006 12:32:01 -0200 Subject: [DCCP]: calling dccp_v{4,6}_reqsk_send_ack is a BUG This patch removes two functions, the send_ack functions of request_sock, which are not called/used by the DCCP code. It is correct that these functions are not called, below is a justification why calling these functions (on a passive socket in the LISTEN/RESPOND state) would mean a DCCP protocol violation. A) Background: using request_sock in TCP: --- net/dccp/dccp.h | 2 ++ net/dccp/ipv4.c | 48 +------------------------------------------ net/dccp/ipv6.c | 57 +--------------------------------------------------- net/dccp/minisocks.c | 10 +++++++++ 4 files changed, 14 insertions(+), 103 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 8964b188aba3..3d4b4a908d11 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -131,6 +131,8 @@ extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); extern void dccp_send_ack(struct sock *sk); extern void dccp_send_delayed_ack(struct sock *sk); +extern void dccp_reqsk_send_ack(struct sk_buff *sk, struct request_sock *rsk); + extern void dccp_send_sync(struct sock *sk, const u64 seq, const enum dccp_pkt_type pkt_type); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 7107885ada7e..8dd9f5aa27a2 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -193,52 +193,6 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, } /* else let the usual retransmit timer handle it */ } -static void dccp_v4_reqsk_send_ack(struct sk_buff *rxskb, - struct request_sock *req) -{ - int err; - struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; - const u32 dccp_hdr_ack_len = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_ack_bits); - struct sk_buff *skb; - - if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) - return; - - skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header, GFP_ATOMIC); - if (skb == NULL) - return; - - /* Reserve space for headers. */ - skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header); - skb->dst = dst_clone(rxskb->dst); - - dh = dccp_zeroed_hdr(skb, dccp_hdr_ack_len); - - /* Build DCCP header and checksum it. */ - dh->dccph_type = DCCP_PKT_ACK; - dh->dccph_sport = rxdh->dccph_dport; - dh->dccph_dport = rxdh->dccph_sport; - dh->dccph_doff = dccp_hdr_ack_len / 4; - dh->dccph_x = 1; - - dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), - DCCP_SKB_CB(rxskb)->dccpd_seq); - - bh_lock_sock(dccp_v4_ctl_socket->sk); - err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk, - rxskb->nh.iph->daddr, - rxskb->nh.iph->saddr, NULL); - bh_unlock_sock(dccp_v4_ctl_socket->sk); - - if (err == NET_XMIT_CN || err == 0) { - DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); - DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); - } -} - static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, struct dst_entry *dst) { @@ -1014,7 +968,7 @@ static struct request_sock_ops dccp_request_sock_ops __read_mostly = { .family = PF_INET, .obj_size = sizeof(struct dccp_request_sock), .rtx_syn_ack = dccp_v4_send_response, - .send_ack = dccp_v4_reqsk_send_ack, + .send_ack = dccp_reqsk_send_ack, .destructor = dccp_v4_reqsk_destructor, .send_reset = dccp_v4_ctl_send_reset, }; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index dee085301576..718509dcb24d 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -37,8 +37,6 @@ static struct socket *dccp_v6_ctl_socket; static void dccp_v6_ctl_send_reset(struct sk_buff *skb); -static void dccp_v6_reqsk_send_ack(struct sk_buff *skb, - struct request_sock *req); static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb); static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); @@ -493,7 +491,7 @@ static struct request_sock_ops dccp6_request_sock_ops = { .family = AF_INET6, .obj_size = sizeof(struct dccp6_request_sock), .rtx_syn_ack = dccp_v6_send_response, - .send_ack = dccp_v6_reqsk_send_ack, + .send_ack = dccp_reqsk_send_ack, .destructor = dccp_v6_reqsk_destructor, .send_reset = dccp_v6_ctl_send_reset, }; @@ -582,59 +580,6 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) kfree_skb(skb); } -static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb, - struct request_sock *req) -{ - struct flowi fl; - struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; - const u32 dccp_hdr_ack_len = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_ack_bits); - struct sk_buff *skb; - - skb = alloc_skb(dccp_v6_ctl_socket->sk->sk_prot->max_header, - GFP_ATOMIC); - if (skb == NULL) - return; - - skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header); - - dh = dccp_zeroed_hdr(skb, dccp_hdr_ack_len); - - /* Build DCCP header and checksum it. */ - dh->dccph_type = DCCP_PKT_ACK; - dh->dccph_sport = rxdh->dccph_dport; - dh->dccph_dport = rxdh->dccph_sport; - dh->dccph_doff = dccp_hdr_ack_len / 4; - dh->dccph_x = 1; - - dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), - DCCP_SKB_CB(rxskb)->dccpd_seq); - - memset(&fl, 0, sizeof(fl)); - ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr); - ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr); - - /* FIXME: calculate checksum, IPv4 also should... */ - - fl.proto = IPPROTO_DCCP; - fl.oif = inet6_iif(rxskb); - fl.fl_ip_dport = dh->dccph_dport; - fl.fl_ip_sport = dh->dccph_sport; - security_req_classify_flow(req, &fl); - - if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { - if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { - ip6_xmit(dccp_v6_ctl_socket->sk, skb, &fl, NULL, 0); - DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); - return; - } - } - - kfree_skb(skb); -} - static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 9045438d6b36..5f3e1a4c036b 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -11,6 +11,7 @@ */ #include +#include #include #include @@ -283,3 +284,12 @@ int dccp_child_process(struct sock *parent, struct sock *child, } EXPORT_SYMBOL_GPL(dccp_child_process); + +void dccp_reqsk_send_ack(struct sk_buff *skb, struct request_sock *rsk) +{ + pr_info(KERN_WARNING "DCCP: ACK packets are never sent in " + "LISTEN/RESPOND state\n"); + dump_stack(); +} + +EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack); -- cgit v1.2.3 From 3d2fe62b8d8522722c4fe46b8af13520b73848c4 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 10 Nov 2006 12:52:36 -0200 Subject: [DCCPv4]: remove forward declarations in ipv4.c This relates to Arnaldo's announcement in http://www.mail-archive.com/dccp@vger.kernel.org/msg00604.html Originally this had been part of the Oops fix and is a revised variant of http://www.mail-archive.com/dccp@vger.kernel.org/msg00598.html No code change, merely reshuffling, with the particular objective of having all request_sock_ops close(r) together for more clarity. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ipv4.c | 266 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 132 insertions(+), 134 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 8dd9f5aa27a2..ed6202652bcc 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -193,37 +193,6 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, } /* else let the usual retransmit timer handle it */ } -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, - struct dst_entry *dst) -{ - int err = -1; - struct sk_buff *skb; - - /* First, grab a route. */ - - if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) - goto out; - - skb = dccp_make_response(sk, dst, req); - if (skb != NULL) { - const struct inet_request_sock *ireq = inet_rsk(req); - struct dccp_hdr *dh = dccp_hdr(skb); - - dh->dccph_checksum = dccp_v4_checksum(skb, ireq->loc_addr, - ireq->rmt_addr); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, - ireq->rmt_addr, - ireq->opt); - if (err == NET_XMIT_CN) - err = 0; - } - -out: - dst_release(dst); - return err; -} - /* * This routine is called by the ICMP module when it gets some sort of error * condition. If err < 0 then the socket should be closed and the error @@ -400,95 +369,6 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk, dccp_hdr(skb)->dccph_sport); } -static struct request_sock_ops dccp_request_sock_ops; - -int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) -{ - struct inet_request_sock *ireq; - struct dccp_sock dp; - struct request_sock *req; - struct dccp_request_sock *dreq; - const __be32 saddr = skb->nh.iph->saddr; - const __be32 daddr = skb->nh.iph->daddr; - const __be32 service = dccp_hdr_request(skb)->dccph_req_service; - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; - - /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ - if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST | RTCF_MULTICAST)) { - reset_code = DCCP_RESET_CODE_NO_CONNECTION; - goto drop; - } - - if (dccp_bad_service_code(sk, service)) { - reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; - goto drop; - } - /* - * TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. - */ - if (inet_csk_reqsk_queue_is_full(sk)) - goto drop; - - /* - * Accept backlog is full. If we have already queued enough - * of warm entries in syn queue, drop request. It is better than - * clogging syn queue with openreqs with exponentially increasing - * timeout. - */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) - goto drop; - - req = reqsk_alloc(&dccp_request_sock_ops); - if (req == NULL) - goto drop; - - if (dccp_parse_options(sk, skb)) - goto drop_and_free; - - dccp_openreq_init(req, &dp, skb); - - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - - ireq = inet_rsk(req); - ireq->loc_addr = daddr; - ireq->rmt_addr = saddr; - req->rcv_wnd = dccp_feat_default_sequence_window; - ireq->opt = NULL; - - /* - * Step 3: Process LISTEN state - * - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie - * - * In fact we defer setting S.GSR, S.SWL, S.SWH to - * dccp_create_openreq_child. - */ - dreq = dccp_rsk(req); - dreq->dreq_isr = dcb->dccpd_seq; - dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); - dreq->dreq_service = service; - - if (dccp_v4_send_response(sk, req, NULL)) - goto drop_and_free; - - inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); - return 0; - -drop_and_free: - reqsk_free(req); -drop: - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); - dcb->dccpd_reset_code = reset_code; - return -1; -} - -EXPORT_SYMBOL_GPL(dccp_v4_conn_request); - /* * The three way handshake has completed - we got a valid ACK or DATAACK - * now create the new socket. @@ -640,6 +520,37 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, return &rt->u.dst; } +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, + struct dst_entry *dst) +{ + int err = -1; + struct sk_buff *skb; + + /* First, grab a route. */ + + if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) + goto out; + + skb = dccp_make_response(sk, dst, req); + if (skb != NULL) { + const struct inet_request_sock *ireq = inet_rsk(req); + struct dccp_hdr *dh = dccp_hdr(skb); + + dh->dccph_checksum = dccp_v4_checksum(skb, ireq->loc_addr, + ireq->rmt_addr); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, + ireq->rmt_addr, + ireq->opt); + if (err == NET_XMIT_CN) + err = 0; + } + +out: + dst_release(dst); + return err; +} + static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) { int err; @@ -708,6 +619,107 @@ out: dst_release(dst); } +static void dccp_v4_reqsk_destructor(struct request_sock *req) +{ + kfree(inet_rsk(req)->opt); +} + +static struct request_sock_ops dccp_request_sock_ops __read_mostly = { + .family = PF_INET, + .obj_size = sizeof(struct dccp_request_sock), + .rtx_syn_ack = dccp_v4_send_response, + .send_ack = dccp_reqsk_send_ack, + .destructor = dccp_v4_reqsk_destructor, + .send_reset = dccp_v4_ctl_send_reset, +}; + +int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) +{ + struct inet_request_sock *ireq; + struct dccp_sock dp; + struct request_sock *req; + struct dccp_request_sock *dreq; + const __be32 saddr = skb->nh.iph->saddr; + const __be32 daddr = skb->nh.iph->daddr; + const __be32 service = dccp_hdr_request(skb)->dccph_req_service; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; + + /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ + if (((struct rtable *)skb->dst)->rt_flags & + (RTCF_BROADCAST | RTCF_MULTICAST)) { + reset_code = DCCP_RESET_CODE_NO_CONNECTION; + goto drop; + } + + if (dccp_bad_service_code(sk, service)) { + reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; + goto drop; + } + /* + * TW buckets are converted to open requests without + * limitations, they conserve resources and peer is + * evidently real one. + */ + if (inet_csk_reqsk_queue_is_full(sk)) + goto drop; + + /* + * Accept backlog is full. If we have already queued enough + * of warm entries in syn queue, drop request. It is better than + * clogging syn queue with openreqs with exponentially increasing + * timeout. + */ + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + goto drop; + + req = reqsk_alloc(&dccp_request_sock_ops); + if (req == NULL) + goto drop; + + if (dccp_parse_options(sk, skb)) + goto drop_and_free; + + dccp_openreq_init(req, &dp, skb); + + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + + ireq = inet_rsk(req); + ireq->loc_addr = daddr; + ireq->rmt_addr = saddr; + req->rcv_wnd = dccp_feat_default_sequence_window; + ireq->opt = NULL; + + /* + * Step 3: Process LISTEN state + * + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * + * In fact we defer setting S.GSR, S.SWL, S.SWH to + * dccp_create_openreq_child. + */ + dreq = dccp_rsk(req); + dreq->dreq_isr = dcb->dccpd_seq; + dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); + dreq->dreq_service = service; + + if (dccp_v4_send_response(sk, req, NULL)) + goto drop_and_free; + + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); + return 0; + +drop_and_free: + reqsk_free(req); +drop: + DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + dcb->dccpd_reset_code = reset_code; + return -1; +} + +EXPORT_SYMBOL_GPL(dccp_v4_conn_request); + int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) { struct dccp_hdr *dh = dccp_hdr(skb); @@ -959,20 +971,6 @@ static int dccp_v4_init_sock(struct sock *sk) return err; } -static void dccp_v4_reqsk_destructor(struct request_sock *req) -{ - kfree(inet_rsk(req)->opt); -} - -static struct request_sock_ops dccp_request_sock_ops __read_mostly = { - .family = PF_INET, - .obj_size = sizeof(struct dccp_request_sock), - .rtx_syn_ack = dccp_v4_send_response, - .send_ack = dccp_reqsk_send_ack, - .destructor = dccp_v4_reqsk_destructor, - .send_reset = dccp_v4_ctl_send_reset, -}; - static struct timewait_sock_ops dccp_timewait_sock_ops = { .twsk_obj_size = sizeof(struct inet_timewait_sock), }; -- cgit v1.2.3 From 73c9e02c22e35e29db6d0fdec994d261fcfd82c0 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 10 Nov 2006 13:01:31 -0200 Subject: [DCCPv6]: remove forward declarations in ipv6.c This does the same for ipv6.c as the preceding one does for ipv4.c: Only the inet_connection_sock_af_ops forward declarations remain, since at least dccp_ipv6_mapped has a circular dependency to dccp_v6_request_recv_sock. No code change, merely re-ordering. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ipv6.c | 385 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 190 insertions(+), 195 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 718509dcb24d..ed4a50263802 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -36,11 +36,6 @@ /* Socket used for sending RSTs and ACKs */ static struct socket *dccp_v6_ctl_socket; -static void dccp_v6_ctl_send_reset(struct sk_buff *skb); -static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb); - -static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); - static struct inet_connection_sock_af_ops dccp_ipv6_mapped; static struct inet_connection_sock_af_ops dccp_ipv6_af_ops; @@ -87,183 +82,6 @@ static __u32 dccp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) dh->dccph_sport); } -static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, - int addr_len) -{ - struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr; - struct inet_connection_sock *icsk = inet_csk(sk); - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct dccp_sock *dp = dccp_sk(sk); - struct in6_addr *saddr = NULL, *final_p = NULL, final; - struct flowi fl; - struct dst_entry *dst; - int addr_type; - int err; - - dp->dccps_role = DCCP_ROLE_CLIENT; - - if (addr_len < SIN6_LEN_RFC2133) - return -EINVAL; - - if (usin->sin6_family != AF_INET6) - return -EAFNOSUPPORT; - - memset(&fl, 0, sizeof(fl)); - - if (np->sndflow) { - fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; - IP6_ECN_flow_init(fl.fl6_flowlabel); - if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) { - struct ip6_flowlabel *flowlabel; - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); - if (flowlabel == NULL) - return -EINVAL; - ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); - fl6_sock_release(flowlabel); - } - } - /* - * connect() to INADDR_ANY means loopback (BSD'ism). - */ - if (ipv6_addr_any(&usin->sin6_addr)) - usin->sin6_addr.s6_addr[15] = 1; - - addr_type = ipv6_addr_type(&usin->sin6_addr); - - if (addr_type & IPV6_ADDR_MULTICAST) - return -ENETUNREACH; - - if (addr_type & IPV6_ADDR_LINKLOCAL) { - if (addr_len >= sizeof(struct sockaddr_in6) && - usin->sin6_scope_id) { - /* If interface is set while binding, indices - * must coincide. - */ - if (sk->sk_bound_dev_if && - sk->sk_bound_dev_if != usin->sin6_scope_id) - return -EINVAL; - - sk->sk_bound_dev_if = usin->sin6_scope_id; - } - - /* Connect to link-local address requires an interface */ - if (!sk->sk_bound_dev_if) - return -EINVAL; - } - - ipv6_addr_copy(&np->daddr, &usin->sin6_addr); - np->flow_label = fl.fl6_flowlabel; - - /* - * DCCP over IPv4 - */ - if (addr_type == IPV6_ADDR_MAPPED) { - u32 exthdrlen = icsk->icsk_ext_hdr_len; - struct sockaddr_in sin; - - SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); - - if (__ipv6_only_sock(sk)) - return -ENETUNREACH; - - sin.sin_family = AF_INET; - sin.sin_port = usin->sin6_port; - sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; - - icsk->icsk_af_ops = &dccp_ipv6_mapped; - sk->sk_backlog_rcv = dccp_v4_do_rcv; - - err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); - if (err) { - icsk->icsk_ext_hdr_len = exthdrlen; - icsk->icsk_af_ops = &dccp_ipv6_af_ops; - sk->sk_backlog_rcv = dccp_v6_do_rcv; - goto failure; - } else { - ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF), - inet->saddr); - ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF), - inet->rcv_saddr); - } - - return err; - } - - if (!ipv6_addr_any(&np->rcv_saddr)) - saddr = &np->rcv_saddr; - - fl.proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = usin->sin6_port; - fl.fl_ip_sport = inet->sport; - security_sk_classify_flow(sk, &fl); - - if (np->opt != NULL && np->opt->srcrt != NULL) { - const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; - - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } - - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) - goto failure; - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - err = xfrm_lookup(&dst, &fl, sk, 0); - if (err < 0) - goto failure; - - if (saddr == NULL) { - saddr = &fl.fl6_src; - ipv6_addr_copy(&np->rcv_saddr, saddr); - } - - /* set the source address */ - ipv6_addr_copy(&np->saddr, saddr); - inet->rcv_saddr = LOOPBACK4_IPV6; - - __ip6_dst_store(sk, dst, NULL, NULL); - - icsk->icsk_ext_hdr_len = 0; - if (np->opt != NULL) - icsk->icsk_ext_hdr_len = (np->opt->opt_flen + - np->opt->opt_nflen); - - inet->dport = usin->sin6_port; - - dccp_set_state(sk, DCCP_REQUESTING); - err = inet6_hash_connect(&dccp_death_row, sk); - if (err) - goto late_failure; - /* FIXME */ -#if 0 - dp->dccps_gar = secure_dccp_v6_sequence_number(np->saddr.s6_addr32, - np->daddr.s6_addr32, - inet->sport, - inet->dport); -#endif - err = dccp_connect(sk); - if (err) - goto late_failure; - - return 0; - -late_failure: - dccp_set_state(sk, DCCP_CLOSED); - __sk_dst_reset(sk); -failure: - inet->dport = 0; - sk->sk_route_caps = 0; - return err; -} - static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { @@ -487,19 +305,6 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req) kfree_skb(inet6_rsk(req)->pktopts); } -static struct request_sock_ops dccp6_request_sock_ops = { - .family = AF_INET6, - .obj_size = sizeof(struct dccp6_request_sock), - .rtx_syn_ack = dccp_v6_send_response, - .send_ack = dccp_reqsk_send_ack, - .destructor = dccp_v6_reqsk_destructor, - .send_reset = dccp_v6_ctl_send_reset, -}; - -static struct timewait_sock_ops dccp6_timewait_sock_ops = { - .twsk_obj_size = sizeof(struct dccp6_timewait_sock), -}; - static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -580,6 +385,15 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) kfree_skb(skb); } +static struct request_sock_ops dccp6_request_sock_ops = { + .family = AF_INET6, + .obj_size = sizeof(struct dccp6_request_sock), + .rtx_syn_ack = dccp_v6_send_response, + .send_ack = dccp_reqsk_send_ack, + .destructor = dccp_v6_reqsk_destructor, + .send_reset = dccp_v6_ctl_send_reset, +}; + static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); @@ -1062,6 +876,183 @@ discard_and_relse: goto discard_it; } +static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr; + struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct dccp_sock *dp = dccp_sk(sk); + struct in6_addr *saddr = NULL, *final_p = NULL, final; + struct flowi fl; + struct dst_entry *dst; + int addr_type; + int err; + + dp->dccps_role = DCCP_ROLE_CLIENT; + + if (addr_len < SIN6_LEN_RFC2133) + return -EINVAL; + + if (usin->sin6_family != AF_INET6) + return -EAFNOSUPPORT; + + memset(&fl, 0, sizeof(fl)); + + if (np->sndflow) { + fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; + IP6_ECN_flow_init(fl.fl6_flowlabel); + if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) { + struct ip6_flowlabel *flowlabel; + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if (flowlabel == NULL) + return -EINVAL; + ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); + fl6_sock_release(flowlabel); + } + } + /* + * connect() to INADDR_ANY means loopback (BSD'ism). + */ + if (ipv6_addr_any(&usin->sin6_addr)) + usin->sin6_addr.s6_addr[15] = 1; + + addr_type = ipv6_addr_type(&usin->sin6_addr); + + if (addr_type & IPV6_ADDR_MULTICAST) + return -ENETUNREACH; + + if (addr_type & IPV6_ADDR_LINKLOCAL) { + if (addr_len >= sizeof(struct sockaddr_in6) && + usin->sin6_scope_id) { + /* If interface is set while binding, indices + * must coincide. + */ + if (sk->sk_bound_dev_if && + sk->sk_bound_dev_if != usin->sin6_scope_id) + return -EINVAL; + + sk->sk_bound_dev_if = usin->sin6_scope_id; + } + + /* Connect to link-local address requires an interface */ + if (!sk->sk_bound_dev_if) + return -EINVAL; + } + + ipv6_addr_copy(&np->daddr, &usin->sin6_addr); + np->flow_label = fl.fl6_flowlabel; + + /* + * DCCP over IPv4 + */ + if (addr_type == IPV6_ADDR_MAPPED) { + u32 exthdrlen = icsk->icsk_ext_hdr_len; + struct sockaddr_in sin; + + SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); + + if (__ipv6_only_sock(sk)) + return -ENETUNREACH; + + sin.sin_family = AF_INET; + sin.sin_port = usin->sin6_port; + sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; + + icsk->icsk_af_ops = &dccp_ipv6_mapped; + sk->sk_backlog_rcv = dccp_v4_do_rcv; + + err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); + if (err) { + icsk->icsk_ext_hdr_len = exthdrlen; + icsk->icsk_af_ops = &dccp_ipv6_af_ops; + sk->sk_backlog_rcv = dccp_v6_do_rcv; + goto failure; + } else { + ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF), + inet->saddr); + ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF), + inet->rcv_saddr); + } + + return err; + } + + if (!ipv6_addr_any(&np->rcv_saddr)) + saddr = &np->rcv_saddr; + + fl.proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); + fl.oif = sk->sk_bound_dev_if; + fl.fl_ip_dport = usin->sin6_port; + fl.fl_ip_sport = inet->sport; + security_sk_classify_flow(sk, &fl); + + if (np->opt != NULL && np->opt->srcrt != NULL) { + const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; + + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + final_p = &final; + } + + err = ip6_dst_lookup(sk, &dst, &fl); + if (err) + goto failure; + + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + + err = xfrm_lookup(&dst, &fl, sk, 0); + if (err < 0) + goto failure; + + if (saddr == NULL) { + saddr = &fl.fl6_src; + ipv6_addr_copy(&np->rcv_saddr, saddr); + } + + /* set the source address */ + ipv6_addr_copy(&np->saddr, saddr); + inet->rcv_saddr = LOOPBACK4_IPV6; + + __ip6_dst_store(sk, dst, NULL, NULL); + + icsk->icsk_ext_hdr_len = 0; + if (np->opt != NULL) + icsk->icsk_ext_hdr_len = (np->opt->opt_flen + + np->opt->opt_nflen); + + inet->dport = usin->sin6_port; + + dccp_set_state(sk, DCCP_REQUESTING); + err = inet6_hash_connect(&dccp_death_row, sk); + if (err) + goto late_failure; + /* FIXME */ +#if 0 + dp->dccps_gar = secure_dccp_v6_sequence_number(np->saddr.s6_addr32, + np->daddr.s6_addr32, + inet->sport, + inet->dport); +#endif + err = dccp_connect(sk); + if (err) + goto late_failure; + + return 0; + +late_failure: + dccp_set_state(sk, DCCP_CLOSED); + __sk_dst_reset(sk); +failure: + inet->dport = 0; + sk->sk_route_caps = 0; + return err; +} + static struct inet_connection_sock_af_ops dccp_ipv6_af_ops = { .queue_xmit = inet6_csk_xmit, .send_check = dccp_v6_send_check, @@ -1122,6 +1113,10 @@ static int dccp_v6_destroy_sock(struct sock *sk) return inet6_destroy_sock(sk); } +static struct timewait_sock_ops dccp6_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct dccp6_timewait_sock), +}; + static struct proto dccp_v6_prot = { .name = "DCCPv6", .owner = THIS_MODULE, -- cgit v1.2.3 From fec5b80e4924f638418c21b09165dce8b79fee86 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Fri, 10 Nov 2006 13:04:52 -0200 Subject: [DCCP]: Fix DCCP Probe Typo Fixes a typo in Kconfig, patch is by Ian McDonald and is re-sent from http://www.mail-archive.com/dccp@vger.kernel.org/msg00579.html Signed-off-by: Ian McDonald Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/dccp') diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index ef8919cca74b..2fc5e55d2a8d 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -49,7 +49,7 @@ config NET_DCCPPROBE DCCP congestion avoidance modules. If you don't understand what was just said, you don't need it: say N. - Documentation on how to use the packet generator can be found + Documentation on how to use DCCP connection probing can be found at http://linux-net.osdl.org/index.php/DccpProbe To compile this code as a module, choose M here: the -- cgit v1.2.3 From f45b3ec481581f24719d8ab0bc812c02fcedc2bc Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Fri, 10 Nov 2006 13:09:10 -0200 Subject: [DCCP]: Fix logfile overflow This patch fixes data being spewed into the logs continually. As the code stood if there was a large queue and long delays timeo would go down to zero and never get reset. This fixes it by resetting timeo. Put constant into header as well. Signed-off-by: Ian McDonald Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/dccp.h | 2 ++ net/dccp/output.c | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 3d4b4a908d11..7b859a723826 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -62,6 +62,8 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ +#define DCCP_XMIT_TIMEO 30000 /* Time/msecs for blocking transmit per packet */ + /* is seq1 < seq2 ? */ static inline int before48(const u64 seq1, const u64 seq2) { diff --git a/net/dccp/output.c b/net/dccp/output.c index 1ae2248557c6..51654975e8ea 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -249,8 +249,8 @@ void dccp_write_xmit(struct sock *sk, int block) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; - long timeo = 30000; /* If a packet is taking longer than 2 secs - we have other issues */ + long timeo = DCCP_XMIT_TIMEO; /* If a packet is taking longer than + this we have other issues */ while ((skb = skb_peek(&sk->sk_write_queue))) { int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, @@ -261,8 +261,10 @@ void dccp_write_xmit(struct sock *sk, int block) sk_reset_timer(sk, &dp->dccps_xmit_timer, msecs_to_jiffies(err)+jiffies); break; - } else + } else { err = dccp_wait_for_ccid(sk, skb, &timeo); + timeo = DCCP_XMIT_TIMEO; + } if (err) { printk(KERN_CRIT "%s:err at dccp_wait_for_ccid" " %d\n", __FUNCTION__, err); -- cgit v1.2.3 From cf557926f6955b4c3fa55e81fdb3675e752e8eed Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 10 Nov 2006 16:08:37 -0200 Subject: [DCCP]: tidy up dccp_v{4,6}_conn_request This is a code simplification to remove reduplicated code by concentrating and abstracting shared code. Detailed Changes: --- net/dccp/dccp.h | 13 +------------ net/dccp/ipv4.c | 9 +++------ net/dccp/ipv6.c | 7 +++---- net/dccp/minisocks.c | 9 +++++++++ 4 files changed, 16 insertions(+), 22 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 7b859a723826..2990bfb12587 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -155,18 +155,7 @@ extern const char *dccp_state_name(const int state); extern void dccp_set_state(struct sock *sk, const int state); extern void dccp_done(struct sock *sk); -static inline void dccp_openreq_init(struct request_sock *req, - struct dccp_sock *dp, - struct sk_buff *skb) -{ - /* - * FIXME: fill in the other req fields from the DCCP options - * received - */ - inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; - inet_rsk(req)->acked = 0; - req->rcv_wnd = 0; -} +extern void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb); extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ed6202652bcc..d75ce8c7e488 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -636,11 +636,8 @@ static struct request_sock_ops dccp_request_sock_ops __read_mostly = { int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct inet_request_sock *ireq; - struct dccp_sock dp; struct request_sock *req; struct dccp_request_sock *dreq; - const __be32 saddr = skb->nh.iph->saddr; - const __be32 daddr = skb->nh.iph->daddr; const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; @@ -680,14 +677,14 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_parse_options(sk, skb)) goto drop_and_free; - dccp_openreq_init(req, &dp, skb); + dccp_reqsk_init(req, skb); if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; ireq = inet_rsk(req); - ireq->loc_addr = daddr; - ireq->rmt_addr = saddr; + ireq->loc_addr = skb->nh.iph->daddr; + ireq->rmt_addr = skb->nh.iph->saddr; req->rcv_wnd = dccp_feat_default_sequence_window; ireq->opt = NULL; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index ed4a50263802..19a4f763099d 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -427,7 +427,6 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { - struct dccp_sock dp; struct request_sock *req; struct dccp_request_sock *dreq; struct inet6_request_sock *ireq6; @@ -459,9 +458,10 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (req == NULL) goto drop; - /* FIXME: process options */ + if (dccp_parse_options(sk, skb)) + goto drop_and_free; - dccp_openreq_init(req, &dp, skb); + dccp_reqsk_init(req, skb); if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; @@ -469,7 +469,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ireq6 = inet6_rsk(req); ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr); ipv6_addr_copy(&ireq6->loc_addr, &skb->nh.ipv6h->daddr); - req->rcv_wnd = dccp_feat_default_sequence_window; ireq6->pktopts = NULL; if (ipv6_opt_accepted(sk, skb) || diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 5f3e1a4c036b..0f228ab5169d 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -293,3 +293,12 @@ void dccp_reqsk_send_ack(struct sk_buff *skb, struct request_sock *rsk) } EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack); + +void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) +{ + inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; + inet_rsk(req)->acked = 0; + req->rcv_wnd = dccp_feat_default_sequence_window; +} + +EXPORT_SYMBOL_GPL(dccp_reqsk_init); -- cgit v1.2.3 From d83ca5accb256de1b44835cd222bfdc3207bd7dc Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 10 Nov 2006 16:29:14 -0200 Subject: [DCCP]: Update code comments for Step 2/3 Sorts out the comments for processing steps 2,3 in section 8.5 of RFC 4340. All comments have been updated against this document, and the reference to step 2 has been made consistent throughout the files. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/input.c | 38 +++++++++++++++++--------------------- net/dccp/ipv4.c | 38 ++++++++++++++++++-------------------- net/dccp/ipv6.c | 33 ++++++++++++++++++++++++++++----- net/dccp/minisocks.c | 10 +++++----- 4 files changed, 68 insertions(+), 51 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/input.c b/net/dccp/input.c index 1d24881ac0ab..97ccdc30fd89 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -431,29 +431,25 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* * Step 3: Process LISTEN state - * (Continuing from dccp_v4_do_rcv and dccp_v6_do_rcv) * * If S.state == LISTEN, - * If P.type == Request or P contains a valid Init Cookie - * option, - * * Must scan the packet's options to check for an Init - * Cookie. Only the Init Cookie is processed here, - * however; other options are processed in Step 8. This - * scan need only be performed if the endpoint uses Init - * Cookies * - * * Generate a new socket and switch to that socket * - * Set S := new socket for this port pair - * S.state = RESPOND - * Choose S.ISS (initial seqno) or set from Init Cookie - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie - * Continue with S.state == RESPOND - * * A Response packet will be generated in Step 11 * - * Otherwise, - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return - * - * NOTE: the check for the packet types is done in - * dccp_rcv_state_process + * If P.type == Request or P contains a valid Init Cookie option, + * (* Must scan the packet's options to check for Init + * Cookies. Only Init Cookies are processed here, + * however; other options are processed in Step 8. This + * scan need only be performed if the endpoint uses Init + * Cookies *) + * (* Generate a new socket and switch to that socket *) + * Set S := new socket for this port pair + * S.state = RESPOND + * Choose S.ISS (initial seqno) or set from Init Cookies + * Initialize S.GAR := S.ISS + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init + * Cookies Continue with S.state == RESPOND + * (* A Response packet will be generated in Step 11 *) + * Otherwise, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return */ if (sk->sk_state == DCCP_LISTEN) { if (dh->dccph_type == DCCP_PKT_REQUEST) { diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d75ce8c7e488..91bffaa761a6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -729,24 +729,23 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) /* * Step 3: Process LISTEN state - * If S.state == LISTEN, - * If P.type == Request or P contains a valid Init Cookie - * option, - * * Must scan the packet's options to check for an Init - * Cookie. Only the Init Cookie is processed here, - * however; other options are processed in Step 8. This - * scan need only be performed if the endpoint uses Init - * Cookies * - * * Generate a new socket and switch to that socket * - * Set S := new socket for this port pair - * S.state = RESPOND - * Choose S.ISS (initial seqno) or set from Init Cookie - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie - * Continue with S.state == RESPOND - * * A Response packet will be generated in Step 11 * - * Otherwise, - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return + * If P.type == Request or P contains a valid Init Cookie option, + * (* Must scan the packet's options to check for Init + * Cookies. Only Init Cookies are processed here, + * however; other options are processed in Step 8. This + * scan need only be performed if the endpoint uses Init + * Cookies *) + * (* Generate a new socket and switch to that socket *) + * Set S := new socket for this port pair + * S.state = RESPOND + * Choose S.ISS (initial seqno) or set from Init Cookies + * Initialize S.GAR := S.ISS + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies + * Continue with S.state == RESPOND + * (* A Response packet will be generated in Step 11 *) + * Otherwise, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return * * NOTE: the check for the packet types is done in * dccp_rcv_state_process @@ -887,8 +886,6 @@ static int dccp_v4_rcv(struct sk_buff *skb) /* * Step 2: * If no socket ... - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return */ if (sk == NULL) { dccp_pr_debug("failed to look up flow ID in table and " @@ -919,6 +916,7 @@ no_dccp_socket: goto discard_it; /* * Step 2: + * If no socket ... * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 19a4f763099d..201801e1532d 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -487,10 +487,10 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) /* * Step 3: Process LISTEN state * - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie * - * In fact we defer setting S.GSR, S.SWL, S.SWH to - * dccp_create_openreq_child. + * In fact we defer setting S.GSR, S.SWL, S.SWH to + * dccp_create_openreq_child. */ dreq = dccp_rsk(req); dreq->dreq_isr = dcb->dccpd_seq; @@ -760,6 +760,30 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } + /* + * Step 3: Process LISTEN state + * If S.state == LISTEN, + * If P.type == Request or P contains a valid Init Cookie option, + * (* Must scan the packet's options to check for Init + * Cookies. Only Init Cookies are processed here, + * however; other options are processed in Step 8. This + * scan need only be performed if the endpoint uses Init + * Cookies *) + * (* Generate a new socket and switch to that socket *) + * Set S := new socket for this port pair + * S.state = RESPOND + * Choose S.ISS (initial seqno) or set from Init Cookies + * Initialize S.GAR := S.ISS + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies + * Continue with S.state == RESPOND + * (* A Response packet will be generated in Step 11 *) + * Otherwise, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return + * + * NOTE: the check for the packet types is done in + * dccp_rcv_state_process + */ if (sk->sk_state == DCCP_LISTEN) { struct sock *nsk = dccp_v6_hnd_req(sk, skb); @@ -826,8 +850,6 @@ static int dccp_v6_rcv(struct sk_buff **pskb) /* * Step 2: * If no socket ... - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return */ if (sk == NULL) { dccp_pr_debug("failed to look up flow ID in table and " @@ -857,6 +879,7 @@ no_dccp_socket: goto discard_it; /* * Step 2: + * If no socket ... * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 0f228ab5169d..d3de696fe4bf 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -98,8 +98,8 @@ struct sock *dccp_create_openreq_child(struct sock *sk, /* * Step 3: Process LISTEN state * - * // Generate a new socket and switch to that socket - * Set S := new socket for this port pair + * (* Generate a new socket and switch to that socket *) + * Set S := new socket for this port pair */ struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); @@ -148,9 +148,9 @@ out_free: /* * Step 3: Process LISTEN state * - * Choose S.ISS (initial seqno) or set from Init Cookie - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init - * Cookie + * Choose S.ISS (initial seqno) or set from Init Cookies + * Initialize S.GAR := S.ISS + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies */ /* See dccp_v4_conn_request */ -- cgit v1.2.3 From 6f4e5fff1e4d46714ea554fd83e44eab534e8b11 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 10 Nov 2006 17:43:06 -0200 Subject: [DCCP]: Support for partial checksums (RFC 4340, sec. 9.2) This patch does the following: a) introduces variable-length checksums as specified in [RFC 4340, sec. 9.2] b) provides necessary socket options and documentation as to how to use them c) basic support and infrastructure for the Minimum Checksum Coverage feature [RFC 4340, sec. 9.2.1]: acceptability tests, user notification and user interface In addition, it (1) fixes two bugs in the DCCPv4 checksum computation: * pseudo-header used checksum_len instead of skb->len * incorrect checksum coverage calculation based on dccph_x (2) removes dccp_v4_verify_checksum() since it reduplicates code of the checksum computation; code calling this function is updated accordingly. (3) now uses skb_checksum(), which is safer than checksum_partial() if the sk_buff has is a non-linear buffer (has pages attached to it). (4) fixes an outstanding TODO item: * If P.CsCov is too large for the packet size, drop packet and return. The code has been tested with applications, the latest version of tcpdump now comes with support for partial DCCP checksums. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/dccp.h | 29 ++++++++++++--- net/dccp/ipv4.c | 105 +++++++++++++++++++++++++++--------------------------- net/dccp/ipv6.c | 74 ++++++++++++++++++++++++-------------- net/dccp/output.c | 13 +++---- net/dccp/proto.c | 26 +++++++++++--- 5 files changed, 150 insertions(+), 97 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 2990bfb12587..d5c414bf7819 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -129,6 +129,30 @@ DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); #define DCCP_ADD_STATS_USER(field, val) \ SNMP_ADD_STATS_USER(dccp_statistics, field, val) +/* + * Checksumming routines + */ +static inline int dccp_csum_coverage(const struct sk_buff *skb) +{ + const struct dccp_hdr* dh = dccp_hdr(skb); + + if (dh->dccph_cscov == 0) + return skb->len; + return (dh->dccph_doff + dh->dccph_cscov - 1) * sizeof(u32); +} + +static inline void dccp_csum_outgoing(struct sk_buff *skb) +{ + int cov = dccp_csum_coverage(skb); + + if (cov >= skb->len) + dccp_hdr(skb)->dccph_cscov = 0; + + skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0); +} + +extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); + extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); extern void dccp_send_ack(struct sock *sk); @@ -214,14 +238,9 @@ extern void dccp_shutdown(struct sock *sk, int how); extern int inet_dccp_listen(struct socket *sock, int backlog); extern unsigned int dccp_poll(struct file *file, struct socket *sock, poll_table *wait); -extern void dccp_v4_send_check(struct sock *sk, int len, - struct sk_buff *skb); extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); -extern int dccp_v4_checksum(const struct sk_buff *skb, - const __be32 saddr, const __be32 daddr); - extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); extern void dccp_send_close(struct sock *sk, const int active); extern int dccp_invalid_packet(struct sk_buff *skb); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 91bffaa761a6..496112080f3d 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -349,13 +349,19 @@ out: sock_put(sk); } -/* This routine computes an IPv4 DCCP checksum. */ -void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) +static inline u16 dccp_v4_csum_finish(struct sk_buff *skb, + __be32 src, __be32 dst) +{ + return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum); +} + +void dccp_v4_send_check(struct sock *sk, int unused, struct sk_buff *skb) { const struct inet_sock *inet = inet_sk(sk); struct dccp_hdr *dh = dccp_hdr(skb); - dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr); + dccp_csum_outgoing(skb); + dh->dccph_checksum = dccp_v4_csum_finish(skb, inet->saddr, inet->daddr); } EXPORT_SYMBOL_GPL(dccp_v4_send_check); @@ -454,47 +460,6 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) return sk; } -int dccp_v4_checksum(const struct sk_buff *skb, const __be32 saddr, - const __be32 daddr) -{ - const struct dccp_hdr* dh = dccp_hdr(skb); - int checksum_len; - u32 tmp; - - if (dh->dccph_cscov == 0) - checksum_len = skb->len; - else { - checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); - checksum_len = checksum_len < skb->len ? checksum_len : - skb->len; - } - - tmp = csum_partial((unsigned char *)dh, checksum_len, 0); - return csum_tcpudp_magic(saddr, daddr, checksum_len, - IPPROTO_DCCP, tmp); -} - -EXPORT_SYMBOL_GPL(dccp_v4_checksum); - -static int dccp_v4_verify_checksum(struct sk_buff *skb, - const __be32 saddr, const __be32 daddr) -{ - struct dccp_hdr *dh = dccp_hdr(skb); - int checksum_len; - u32 tmp; - - if (dh->dccph_cscov == 0) - checksum_len = skb->len; - else { - checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); - checksum_len = checksum_len < skb->len ? checksum_len : - skb->len; - } - tmp = csum_partial((unsigned char *)dh, checksum_len, 0); - return csum_tcpudp_magic(saddr, daddr, checksum_len, - IPPROTO_DCCP, tmp) == 0 ? 0 : -1; -} - static struct dst_entry* dccp_v4_route_skb(struct sock *sk, struct sk_buff *skb) { @@ -536,8 +501,8 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, const struct inet_request_sock *ireq = inet_rsk(req); struct dccp_hdr *dh = dccp_hdr(skb); - dh->dccph_checksum = dccp_v4_checksum(skb, ireq->loc_addr, - ireq->rmt_addr); + dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr, + ireq->rmt_addr); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, ireq->rmt_addr, @@ -602,8 +567,9 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); - dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr, - rxskb->nh.iph->daddr); + dccp_csum_outgoing(skb); + dh->dccph_checksum = dccp_v4_csum_finish(skb, rxskb->nh.iph->saddr, + rxskb->nh.iph->daddr); bh_lock_sock(dccp_v4_ctl_socket->sk); err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk, @@ -779,6 +745,7 @@ EXPORT_SYMBOL_GPL(dccp_v4_do_rcv); int dccp_invalid_packet(struct sk_buff *skb) { const struct dccp_hdr *dh; + unsigned int cscov; if (skb->pkt_type != PACKET_HOST) return 1; @@ -830,6 +797,22 @@ int dccp_invalid_packet(struct sk_buff *skb) return 1; } + /* + * If P.CsCov is too large for the packet size, drop packet and return. + * This must come _before_ checksumming (not as RFC 4340 suggests). + */ + cscov = dccp_csum_coverage(skb); + if (cscov > skb->len) { + LIMIT_NETDEBUG(KERN_WARNING + "DCCP: P.CsCov %u exceeds packet length %d\n", + dh->dccph_cscov, skb->len); + return 1; + } + + /* If header checksum is incorrect, drop packet and return. + * (This step is completed in the AF-dependent functions.) */ + skb->csum = skb_checksum(skb, 0, cscov, 0); + return 0; } @@ -840,16 +823,17 @@ static int dccp_v4_rcv(struct sk_buff *skb) { const struct dccp_hdr *dh; struct sock *sk; + int min_cov; - /* Step 1: Check header basics: */ + /* Step 1: Check header basics */ if (dccp_invalid_packet(skb)) goto discard_it; - /* If the header checksum is incorrect, drop packet and return */ - if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, - skb->nh.iph->daddr) < 0) { - LIMIT_NETDEBUG(KERN_WARNING "%s: incorrect header checksum\n", + /* Step 1: If header checksum is incorrect, drop packet and return */ + if (dccp_v4_csum_finish(skb, skb->nh.iph->saddr, skb->nh.iph->daddr)) { + LIMIT_NETDEBUG(KERN_WARNING + "%s: dropped packet with invalid checksum\n", __FUNCTION__); goto discard_it; } @@ -905,6 +889,21 @@ static int dccp_v4_rcv(struct sk_buff *skb) goto no_dccp_socket; } + /* + * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage + * o if MinCsCov = 0, only packets with CsCov = 0 are accepted + * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov + */ + min_cov = dccp_sk(sk)->dccps_pcrlen; + if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) { + dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n", + dh->dccph_cscov, min_cov); + /* FIXME: "Such packets SHOULD be reported using Data Dropped + * options (Section 11.7) with Drop Code 0, Protocol + * Constraints." */ + goto discard_and_relse; + } + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; nf_reset(skb); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 201801e1532d..193b946fd039 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -58,12 +58,22 @@ static void dccp_v6_hash(struct sock *sk) } } -static inline u16 dccp_v6_check(struct dccp_hdr *dh, int len, - struct in6_addr *saddr, - struct in6_addr *daddr, - unsigned long base) +/* add pseudo-header to DCCP checksum stored in skb->csum */ +static inline u16 dccp_v6_csum_finish(struct sk_buff *skb, + struct in6_addr *saddr, + struct in6_addr *daddr) { - return csum_ipv6_magic(saddr, daddr, len, IPPROTO_DCCP, base); + return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum); +} + +static inline void dccp_v6_send_check(struct sock *sk, int unused_value, + struct sk_buff *skb) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct dccp_hdr *dh = dccp_hdr(skb); + + dccp_csum_outgoing(skb); + dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr); } static __u32 dccp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) @@ -280,12 +290,9 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, if (skb != NULL) { struct dccp_hdr *dh = dccp_hdr(skb); - dh->dccph_checksum = dccp_v6_check(dh, skb->len, - &ireq6->loc_addr, - &ireq6->rmt_addr, - csum_partial((char *)dh, - skb->len, - skb->csum)); + dh->dccph_checksum = dccp_v6_csum_finish(skb, + &ireq6->loc_addr, + &ireq6->rmt_addr); ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); err = ip6_xmit(sk, skb, &fl, opt, 0); if (err == NET_XMIT_CN) @@ -305,18 +312,6 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req) kfree_skb(inet6_rsk(req)->pktopts); } -static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct dccp_hdr *dh = dccp_hdr(skb); - - dh->dccph_checksum = csum_ipv6_magic(&np->saddr, &np->daddr, - len, IPPROTO_DCCP, - csum_partial((char *)dh, - dh->dccph_doff << 2, - skb->csum)); -} - static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) { struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; @@ -360,12 +355,14 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); + dccp_csum_outgoing(skb); + dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxskb->nh.ipv6h->saddr, + &rxskb->nh.ipv6h->daddr); + memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr); ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr); - dh->dccph_checksum = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst, - sizeof(*dh), IPPROTO_DCCP, - skb->csum); + fl.proto = IPPROTO_DCCP; fl.oif = inet6_iif(rxskb); fl.fl_ip_dport = dh->dccph_dport; @@ -825,12 +822,22 @@ static int dccp_v6_rcv(struct sk_buff **pskb) const struct dccp_hdr *dh; struct sk_buff *skb = *pskb; struct sock *sk; + int min_cov; - /* Step 1: Check header basics: */ + /* Step 1: Check header basics */ if (dccp_invalid_packet(skb)) goto discard_it; + /* Step 1: If header checksum is incorrect, drop packet and return. */ + if (dccp_v6_csum_finish(skb, &skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr)) { + LIMIT_NETDEBUG(KERN_WARNING + "%s: dropped packet with invalid checksum\n", + __FUNCTION__); + goto discard_it; + } + dh = dccp_hdr(skb); DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); @@ -869,6 +876,19 @@ static int dccp_v6_rcv(struct sk_buff **pskb) goto no_dccp_socket; } + /* + * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage + * o if MinCsCov = 0, only packets with CsCov = 0 are accepted + * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov + */ + min_cov = dccp_sk(sk)->dccps_pcrlen; + if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) { + dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n", + dh->dccph_cscov, min_cov); + /* FIXME: send Data Dropped option (see also dccp_v4_rcv) */ + goto discard_and_relse; + } + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; diff --git a/net/dccp/output.c b/net/dccp/output.c index 51654975e8ea..992caedd7725 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -96,6 +96,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) dh->dccph_dport = inet->dport; dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; dh->dccph_ccval = dcb->dccpd_ccval; + dh->dccph_cscov = dp->dccps_pcslen; /* XXX For now we're using only 48 bits sequence numbers */ dh->dccph_x = 1; @@ -115,7 +116,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) break; } - icsk->icsk_af_ops->send_check(sk, skb->len, skb); + icsk->icsk_af_ops->send_check(sk, 0, skb); if (set_ack) dccp_event_ack_sent(sk); @@ -329,7 +330,6 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, skb_reserve(skb, sk->sk_prot->max_header); skb->dst = dst_clone(dst); - skb->csum = 0; dreq = dccp_rsk(req); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; @@ -352,6 +352,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr); dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service; + dccp_csum_outgoing(skb); + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); return skb; } @@ -376,7 +378,6 @@ static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, skb_reserve(skb, sk->sk_prot->max_header); skb->dst = dst_clone(dst); - skb->csum = 0; dccp_inc_seqno(&dp->dccps_gss); @@ -401,7 +402,7 @@ static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr); dccp_hdr_reset(skb)->dccph_reset_code = code; - inet_csk(sk)->icsk_af_ops->send_check(sk, skb->len, skb); + inet_csk(sk)->icsk_af_ops->send_check(sk, 0, skb); DCCP_INC_STATS(DCCP_MIB_OUTSEGS); return skb; @@ -475,7 +476,6 @@ int dccp_connect(struct sock *sk) skb_reserve(skb, sk->sk_prot->max_header); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; - skb->csum = 0; dccp_skb_entail(sk, skb); dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); @@ -507,7 +507,6 @@ void dccp_send_ack(struct sock *sk) /* Reserve space for headers */ skb_reserve(skb, sk->sk_prot->max_header); - skb->csum = 0; DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK; dccp_transmit_skb(sk, skb); } @@ -561,7 +560,6 @@ void dccp_send_sync(struct sock *sk, const u64 seq, /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, sk->sk_prot->max_header); - skb->csum = 0; DCCP_SKB_CB(skb)->dccpd_type = pkt_type; DCCP_SKB_CB(skb)->dccpd_seq = seq; @@ -587,7 +585,6 @@ void dccp_send_close(struct sock *sk, const int active) /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, sk->sk_prot->max_header); - skb->csum = 0; DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 047d170a363a..db54e557eff1 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -472,7 +472,6 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_PACKET_SIZE: dp->dccps_packet_size = val; break; - case DCCP_SOCKOPT_CHANGE_L: if (optlen != sizeof(struct dccp_so_feat)) err = -EINVAL; @@ -481,7 +480,6 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, (struct dccp_so_feat __user *) optval); break; - case DCCP_SOCKOPT_CHANGE_R: if (optlen != sizeof(struct dccp_so_feat)) err = -EINVAL; @@ -490,12 +488,26 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, (struct dccp_so_feat __user *) optval); break; - + case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */ + if (val < 0 || val > 15) + err = -EINVAL; + else + dp->dccps_pcslen = val; + break; + case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */ + if (val < 0 || val > 15) + err = -EINVAL; + else { + dp->dccps_pcrlen = val; + /* FIXME: add feature negotiation, + * ChangeL(MinimumChecksumCoverage, val) */ + } + break; default: err = -ENOPROTOOPT; break; } - + release_sock(sk); return err; } @@ -575,6 +587,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_SERVICE: return dccp_getsockopt_service(sk, len, (__be32 __user *)optval, optlen); + case DCCP_SOCKOPT_SEND_CSCOV: + val = dp->dccps_pcslen; + break; + case DCCP_SOCKOPT_RECV_CSCOV: + val = dp->dccps_pcrlen; + break; case 128 ... 191: return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, len, (u32 __user *)optval, optlen); -- cgit v1.2.3 From 08a29e41bb6d6516b0f65e19381f537168d1768e Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 13 Nov 2006 13:07:51 -0200 Subject: [DCCP]: Update comments on precisely which packets can be retransmitted This updates program documentation: spell out precise conditions about which packets are eligible for retransmission (which is actually quite hard to extract from RFC 4340). It is based on the following table derived from RFC 4340: +-----------+---------------------------------+---------------------+ | Type | Retransmit? | Remark | +-----------+---------------------------------+---------------------+ | Request | in client-REQUEST state | sec. 8.1.1 | | Response | NEVER | SHOULD NOT, 8.1.3 | | Data | NEVER | unreliable protocol | | Ack | possible in client-PARTOPEN | sec. 8.1.5 | | DataAck | NEVER | unreliable protocol | | CloseReq | only in server-CLOSEREQ state | MUST, sec. 8.3 | | Close | in node-CLOSING state | MUST, sec. 8.3 | +-----------+-------------------------------------------------------+ | Reset | only in response to other packets | | Sync | only in response to sequence-invalid packets (7.5.4) | | SyncAck | only in response to Sync packets | +-----------+-------------------------------------------------------+ Hence the only packets eligible for retransmission are: * Requests in client-REQUEST state (sec. 8.1.1) * Acks in client-PARTOPEN state (sec. 8.1.5) * CloseReq in server-CLOSEREQ state (sec. 8.3) * Close in node-CLOSING state (sec. 8.3) I had meant to put in a check for these types too, but have left that for later. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/timer.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 8447742f5615..bda0af639ae4 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -142,7 +142,7 @@ static void dccp_retransmit_timer(struct sock *sk) /* retransmit timer is used for feature negotiation throughout * connection. In this case, no packet is re-transmitted, but rather an - * ack is generated and pending changes are splaced into its options. + * ack is generated and pending changes are placed into its options. */ if (sk->sk_send_head == NULL) { dccp_pr_debug("feat negotiation retransmit timeout %p\n", sk); @@ -154,9 +154,11 @@ static void dccp_retransmit_timer(struct sock *sk) /* * sk->sk_send_head has to have one skb with * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP - * packet types (REQUEST, RESPONSE, the ACK in the 3way handshake - * (PARTOPEN timer), etc). - */ + * packet types. The only packets eligible for retransmission are: + * -- Requests in client-REQUEST state (sec. 8.1.1) + * -- Acks in client-PARTOPEN state (sec. 8.1.5) + * -- CloseReq in server-CLOSEREQ state (sec. 8.3) + * -- Close in node-CLOSING state (sec. 8.3) */ BUG_TRAP(sk->sk_send_head != NULL); /* -- cgit v1.2.3 From e11d9d30802278af22e78d8c10f348b683670cd9 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 13 Nov 2006 13:12:07 -0200 Subject: [DCCP]: Increment sequence numbers on retransmitted Response packets Problem: --- net/dccp/minisocks.c | 16 +++++++++------- net/dccp/output.c | 4 ++++ 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index d3de696fe4bf..5b2773efd7c7 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -196,15 +196,17 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, /* Check for retransmitted REQUEST */ if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { - if (after48(DCCP_SKB_CB(skb)->dccpd_seq, - dccp_rsk(req)->dreq_isr)) { - struct dccp_request_sock *dreq = dccp_rsk(req); + struct dccp_request_sock *dreq = dccp_rsk(req); + if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_isr)) { dccp_pr_debug("Retransmitted REQUEST\n"); - /* Send another RESPONSE packet */ - dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1); - dccp_set_seqno(&dreq->dreq_isr, - DCCP_SKB_CB(skb)->dccpd_seq); + dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; + /* + * Send another RESPONSE packet + * To protect against Request floods, increment retrans + * counter (backoff, monitored by dccp_response_timer). + */ + req->retrans++; req->rsk_ops->rtx_syn_ack(sk, req, NULL); } /* Network Duplicate, discard packet */ diff --git a/net/dccp/output.c b/net/dccp/output.c index 992caedd7725..08ee5547a2f2 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -332,6 +332,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, skb->dst = dst_clone(dst); dreq = dccp_rsk(req); + if (inet_rsk(req)->acked) /* increase ISS upon retransmission */ + dccp_inc_seqno(&dreq->dreq_iss); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; @@ -354,6 +356,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, dccp_csum_outgoing(skb); + /* We use `acked' to remember that a Response was already sent. */ + inet_rsk(req)->acked = 1; DCCP_INC_STATS(DCCP_MIB_OUTSEGS); return skb; } -- cgit v1.2.3 From 2e2e9e92bd723244ea20fa488b1780111f2b05e1 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 13 Nov 2006 13:23:52 -0200 Subject: [DCCP]: Add sysctls to control retransmission behaviour This adds 3 sysctls which govern the retransmission behaviour of DCCP control packets (3way handshake, feature negotiation). It removes 4 FIXMEs from the code. The close resemblance of sysctl variables to their TCP analogues is emphasised not only by their name, but also by giving them the same initial values. This is useful since there is not much practical experience with DCCP yet. Furthermore, with regard to the previous patch, it is now possible to limit the number of keepalive-Responses by setting net.dccp.default.request_retries (also a bit like in TCP). Lastly, added documentation of all existing DCCP sysctls. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/dccp.h | 11 +++++++++++ net/dccp/feat.h | 7 ------- net/dccp/proto.c | 1 + net/dccp/sysctl.c | 25 +++++++++++++++++++++++++ net/dccp/timer.c | 16 ++++++++++------ 5 files changed, 47 insertions(+), 13 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index d5c414bf7819..e7b96074a1b1 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -64,6 +64,17 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); #define DCCP_XMIT_TIMEO 30000 /* Time/msecs for blocking transmit per packet */ +/* sysctl variables for DCCP */ +extern int sysctl_dccp_request_retries; +extern int sysctl_dccp_retries1; +extern int sysctl_dccp_retries2; +extern int dccp_feat_default_sequence_window; +extern int dccp_feat_default_rx_ccid; +extern int dccp_feat_default_tx_ccid; +extern int dccp_feat_default_ack_ratio; +extern int dccp_feat_default_send_ack_vector; +extern int dccp_feat_default_send_ndp_count; + /* is seq1 < seq2 ? */ static inline int before48(const u64 seq1, const u64 seq2) { diff --git a/net/dccp/feat.h b/net/dccp/feat.h index cee553d416ca..6048373c7186 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -26,11 +26,4 @@ extern void dccp_feat_clean(struct dccp_minisock *dmsk); extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk); extern int dccp_feat_init(struct dccp_minisock *dmsk); -extern int dccp_feat_default_sequence_window; -extern int dccp_feat_default_rx_ccid; -extern int dccp_feat_default_tx_ccid; -extern int dccp_feat_default_ack_ratio; -extern int dccp_feat_default_send_ack_vector; -extern int dccp_feat_default_send_ndp_count; - #endif /* _DCCP_FEAT_H */ diff --git a/net/dccp/proto.c b/net/dccp/proto.c index db54e557eff1..9c9c08cffdaf 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -212,6 +212,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) dccp_init_xmit_timers(sk); icsk->icsk_rto = DCCP_TIMEOUT_INIT; + icsk->icsk_syn_retries = sysctl_dccp_request_retries; sk->sk_state = DCCP_CLOSED; sk->sk_write_space = dccp_write_space; icsk->icsk_sync_mss = dccp_sync_mss; diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 38bc157876f3..7b09f2179985 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -11,6 +11,7 @@ #include #include +#include "dccp.h" #include "feat.h" #ifndef CONFIG_SYSCTL @@ -66,6 +67,30 @@ static struct ctl_table dccp_default_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .ctl_name = NET_DCCP_DEFAULT_REQ_RETRIES, + .procname = "request_retries", + .data = &sysctl_dccp_request_retries, + .maxlen = sizeof(sysctl_dccp_request_retries), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .ctl_name = NET_DCCP_DEFAULT_RETRIES1, + .procname = "retries1", + .data = &sysctl_dccp_retries1, + .maxlen = sizeof(sysctl_dccp_retries1), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .ctl_name = NET_DCCP_DEFAULT_RETRIES2, + .procname = "retries2", + .data = &sysctl_dccp_retries2, + .maxlen = sizeof(sysctl_dccp_retries2), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .ctl_name = 0, } }; diff --git a/net/dccp/timer.c b/net/dccp/timer.c index bda0af639ae4..7b3f16e29a97 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -15,6 +15,11 @@ #include "dccp.h" +/* sysctl variables governing numbers of retransmission attempts */ +int sysctl_dccp_request_retries __read_mostly = TCP_SYN_RETRIES; +int sysctl_dccp_retries1 __read_mostly = TCP_RETR1; +int sysctl_dccp_retries2 __read_mostly = TCP_RETR2; + static void dccp_write_timer(unsigned long data); static void dccp_keepalive_timer(unsigned long data); static void dccp_delack_timer(unsigned long data); @@ -44,11 +49,10 @@ static int dccp_write_timeout(struct sock *sk) if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) { if (icsk->icsk_retransmits != 0) dst_negative_advice(&sk->sk_dst_cache); - retry_until = icsk->icsk_syn_retries ? : - /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */; + retry_until = icsk->icsk_syn_retries ? + : sysctl_dccp_request_retries; } else { - if (icsk->icsk_retransmits >= - /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) { + if (icsk->icsk_retransmits >= sysctl_dccp_retries1) { /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black hole detection. :-( @@ -72,7 +76,7 @@ static int dccp_write_timeout(struct sock *sk) dst_negative_advice(&sk->sk_dst_cache); } - retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */; + retry_until = sysctl_dccp_retries2; /* * FIXME: see tcp_write_timout and tcp_out_of_resources */ @@ -196,7 +200,7 @@ backoff: icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, DCCP_RTO_MAX); - if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */) + if (icsk->icsk_retransmits > sysctl_dccp_retries1) __sk_dst_reset(sk); out:; } -- cgit v1.2.3 From afb0a34dd3e20b3f534de19993271b8664cf10bb Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 13 Nov 2006 13:25:41 -0200 Subject: [DCCP]: Introduce a consistent naming scheme for sysctls In order to make their function clearer and obtain a consistent naming scheme to identify sysctls, all existing DCCP sysctls have been prefixed with `sysctl_dccp', following the same convention as used by TCP. Feature-specific sysctls retain the `feat' in the middle, although the `default' has been dropped, since it is obvious from use. Also removed a duplicate `dccp_feat_default_sequence_window' in ipv4.c. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/dccp.h | 12 ++++++------ net/dccp/ipv4.c | 1 - net/dccp/minisocks.c | 2 +- net/dccp/options.c | 26 +++++++++++++------------- net/dccp/sysctl.c | 24 ++++++++++++------------ 5 files changed, 32 insertions(+), 33 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index e7b96074a1b1..363fa520056e 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -68,12 +68,12 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); extern int sysctl_dccp_request_retries; extern int sysctl_dccp_retries1; extern int sysctl_dccp_retries2; -extern int dccp_feat_default_sequence_window; -extern int dccp_feat_default_rx_ccid; -extern int dccp_feat_default_tx_ccid; -extern int dccp_feat_default_ack_ratio; -extern int dccp_feat_default_send_ack_vector; -extern int dccp_feat_default_send_ndp_count; +extern int sysctl_dccp_feat_sequence_window; +extern int sysctl_dccp_feat_rx_ccid; +extern int sysctl_dccp_feat_tx_ccid; +extern int sysctl_dccp_feat_ack_ratio; +extern int sysctl_dccp_feat_send_ack_vector; +extern int sysctl_dccp_feat_send_ndp_count; /* is seq1 < seq2 ? */ static inline int before48(const u64 seq1, const u64 seq2) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 496112080f3d..84c05405984e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -651,7 +651,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->loc_addr = skb->nh.iph->daddr; ireq->rmt_addr = skb->nh.iph->saddr; - req->rcv_wnd = dccp_feat_default_sequence_window; ireq->opt = NULL; /* diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 5b2773efd7c7..0c49733f5be1 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -300,7 +300,7 @@ void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) { inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; inet_rsk(req)->acked = 0; - req->rcv_wnd = dccp_feat_default_sequence_window; + req->rcv_wnd = sysctl_dccp_feat_sequence_window; } EXPORT_SYMBOL_GPL(dccp_reqsk_init); diff --git a/net/dccp/options.c b/net/dccp/options.c index fb0db1f7cd7b..121e794fe454 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -22,23 +22,23 @@ #include "dccp.h" #include "feat.h" -int dccp_feat_default_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW; -int dccp_feat_default_rx_ccid = DCCPF_INITIAL_CCID; -int dccp_feat_default_tx_ccid = DCCPF_INITIAL_CCID; -int dccp_feat_default_ack_ratio = DCCPF_INITIAL_ACK_RATIO; -int dccp_feat_default_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; -int dccp_feat_default_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; +int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW; +int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID; +int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID; +int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO; +int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; +int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; -EXPORT_SYMBOL_GPL(dccp_feat_default_sequence_window); +EXPORT_SYMBOL_GPL(sysctl_dccp_feat_sequence_window); void dccp_minisock_init(struct dccp_minisock *dmsk) { - dmsk->dccpms_sequence_window = dccp_feat_default_sequence_window; - dmsk->dccpms_rx_ccid = dccp_feat_default_rx_ccid; - dmsk->dccpms_tx_ccid = dccp_feat_default_tx_ccid; - dmsk->dccpms_ack_ratio = dccp_feat_default_ack_ratio; - dmsk->dccpms_send_ack_vector = dccp_feat_default_send_ack_vector; - dmsk->dccpms_send_ndp_count = dccp_feat_default_send_ndp_count; + dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; + dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid; + dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid; + dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio; + dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector; + dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count; } static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 7b09f2179985..8b62061e5701 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -22,48 +22,48 @@ static struct ctl_table dccp_default_table[] = { { .ctl_name = NET_DCCP_DEFAULT_SEQ_WINDOW, .procname = "seq_window", - .data = &dccp_feat_default_sequence_window, - .maxlen = sizeof(dccp_feat_default_sequence_window), + .data = &sysctl_dccp_feat_sequence_window, + .maxlen = sizeof(sysctl_dccp_feat_sequence_window), .mode = 0644, .proc_handler = proc_dointvec, }, { .ctl_name = NET_DCCP_DEFAULT_RX_CCID, .procname = "rx_ccid", - .data = &dccp_feat_default_rx_ccid, - .maxlen = sizeof(dccp_feat_default_rx_ccid), + .data = &sysctl_dccp_feat_rx_ccid, + .maxlen = sizeof(sysctl_dccp_feat_rx_ccid), .mode = 0644, .proc_handler = proc_dointvec, }, { .ctl_name = NET_DCCP_DEFAULT_TX_CCID, .procname = "tx_ccid", - .data = &dccp_feat_default_tx_ccid, - .maxlen = sizeof(dccp_feat_default_tx_ccid), + .data = &sysctl_dccp_feat_tx_ccid, + .maxlen = sizeof(sysctl_dccp_feat_tx_ccid), .mode = 0644, .proc_handler = proc_dointvec, }, { .ctl_name = NET_DCCP_DEFAULT_ACK_RATIO, .procname = "ack_ratio", - .data = &dccp_feat_default_ack_ratio, - .maxlen = sizeof(dccp_feat_default_ack_ratio), + .data = &sysctl_dccp_feat_ack_ratio, + .maxlen = sizeof(sysctl_dccp_feat_ack_ratio), .mode = 0644, .proc_handler = proc_dointvec, }, { .ctl_name = NET_DCCP_DEFAULT_SEND_ACKVEC, .procname = "send_ackvec", - .data = &dccp_feat_default_send_ack_vector, - .maxlen = sizeof(dccp_feat_default_send_ack_vector), + .data = &sysctl_dccp_feat_send_ack_vector, + .maxlen = sizeof(sysctl_dccp_feat_send_ack_vector), .mode = 0644, .proc_handler = proc_dointvec, }, { .ctl_name = NET_DCCP_DEFAULT_SEND_NDP, .procname = "send_ndp", - .data = &dccp_feat_default_send_ndp_count, - .maxlen = sizeof(dccp_feat_default_send_ndp_count), + .data = &sysctl_dccp_feat_send_ndp_count, + .maxlen = sizeof(sysctl_dccp_feat_send_ndp_count), .mode = 0644, .proc_handler = proc_dointvec, }, -- cgit v1.2.3 From 4ed800d02cfb639b4f8375a0f0f04f0efea64e7f Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 13 Nov 2006 13:26:51 -0200 Subject: [DCCP]: Remove forward declarations in timer.c This removes 3 forward declarations by reordering 2 functions. No code change at all. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/timer.c | 110 +++++++++++++++++++++++++++---------------------------- 1 file changed, 53 insertions(+), 57 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 7b3f16e29a97..e8f519e7f481 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -20,16 +20,6 @@ int sysctl_dccp_request_retries __read_mostly = TCP_SYN_RETRIES; int sysctl_dccp_retries1 __read_mostly = TCP_RETR1; int sysctl_dccp_retries2 __read_mostly = TCP_RETR2; -static void dccp_write_timer(unsigned long data); -static void dccp_keepalive_timer(unsigned long data); -static void dccp_delack_timer(unsigned long data); - -void dccp_init_xmit_timers(struct sock *sk) -{ - inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, - &dccp_keepalive_timer); -} - static void dccp_write_err(struct sock *sk) { sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; @@ -90,53 +80,6 @@ static int dccp_write_timeout(struct sock *sk) return 0; } -/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ -static void dccp_delack_timer(unsigned long data) -{ - struct sock *sk = (struct sock *)data; - struct inet_connection_sock *icsk = inet_csk(sk); - - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - /* Try again later. */ - icsk->icsk_ack.blocked = 1; - NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); - sk_reset_timer(sk, &icsk->icsk_delack_timer, - jiffies + TCP_DELACK_MIN); - goto out; - } - - if (sk->sk_state == DCCP_CLOSED || - !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) - goto out; - if (time_after(icsk->icsk_ack.timeout, jiffies)) { - sk_reset_timer(sk, &icsk->icsk_delack_timer, - icsk->icsk_ack.timeout); - goto out; - } - - icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; - - if (inet_csk_ack_scheduled(sk)) { - if (!icsk->icsk_ack.pingpong) { - /* Delayed ACK missed: inflate ATO. */ - icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, - icsk->icsk_rto); - } else { - /* Delayed ACK missed: leave pingpong mode and - * deflate ATO. - */ - icsk->icsk_ack.pingpong = 0; - icsk->icsk_ack.ato = TCP_ATO_MIN; - } - dccp_send_ack(sk); - NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); - } -out: - bh_unlock_sock(sk); - sock_put(sk); -} - /* * The DCCP retransmit timer. */ @@ -270,3 +213,56 @@ out: bh_unlock_sock(sk); sock_put(sk); } + +/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ +static void dccp_delack_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + struct inet_connection_sock *icsk = inet_csk(sk); + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + icsk->icsk_ack.blocked = 1; + NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); + sk_reset_timer(sk, &icsk->icsk_delack_timer, + jiffies + TCP_DELACK_MIN); + goto out; + } + + if (sk->sk_state == DCCP_CLOSED || + !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) + goto out; + if (time_after(icsk->icsk_ack.timeout, jiffies)) { + sk_reset_timer(sk, &icsk->icsk_delack_timer, + icsk->icsk_ack.timeout); + goto out; + } + + icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; + + if (inet_csk_ack_scheduled(sk)) { + if (!icsk->icsk_ack.pingpong) { + /* Delayed ACK missed: inflate ATO. */ + icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, + icsk->icsk_rto); + } else { + /* Delayed ACK missed: leave pingpong mode and + * deflate ATO. + */ + icsk->icsk_ack.pingpong = 0; + icsk->icsk_ack.ato = TCP_ATO_MIN; + } + dccp_send_ack(sk); + NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); + } +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +void dccp_init_xmit_timers(struct sock *sk) +{ + inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, + &dccp_keepalive_timer); +} -- cgit v1.2.3 From 865e9022d88ceedd89fa1079a6e1f9266ccd3711 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 13 Nov 2006 13:31:50 -0200 Subject: [DCCP]: Remove redundant statements in init_sequence (ISS) This patch removes the following redundancies: 1) The test skb->protocol == htons(ETH_P_IPV6) in dccp_v6_init_sequence is always true since * dccp_v6_conn_request() is the only calling function * dccp_v6_conn_request() redirects all skb's with ETH_P_IP to dccp_v4_conn_request() 2) The first argument, `struct sock *sk', of dccp_v{4,6}_init_sequence() is never used. (This is similar for tcp_v{4,6}_init_sequence, an analogous patch has been submitted to netdev and merged.) By the way - are the `sport' / `dport' arguments in the right order? I have made them consistent among calls but they seem to be in the reverse order. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ipv4.c | 5 ++--- net/dccp/ipv6.c | 20 ++++++-------------- 2 files changed, 8 insertions(+), 17 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 84c05405984e..b56a2fa2694a 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -366,8 +366,7 @@ void dccp_v4_send_check(struct sock *sk, int unused, struct sk_buff *skb) EXPORT_SYMBOL_GPL(dccp_v4_send_check); -static inline u64 dccp_v4_init_sequence(const struct sock *sk, - const struct sk_buff *skb) +static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb) { return secure_dccp_sequence_number(skb->nh.iph->daddr, skb->nh.iph->saddr, @@ -663,7 +662,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ dreq = dccp_rsk(req); dreq->dreq_isr = dcb->dccpd_seq; - dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); + dreq->dreq_iss = dccp_v4_init_sequence(skb); dreq->dreq_service = service; if (dccp_v4_send_response(sk, req, NULL)) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 193b946fd039..a08af75ddc9e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -76,20 +76,12 @@ static inline void dccp_v6_send_check(struct sock *sk, int unused_value, dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr); } -static __u32 dccp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) +static inline __u32 dccp_v6_init_sequence(const struct sk_buff *skb) { - const struct dccp_hdr *dh = dccp_hdr(skb); - - if (skb->protocol == htons(ETH_P_IPV6)) - return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32, - skb->nh.ipv6h->saddr.s6_addr32, - dh->dccph_dport, - dh->dccph_sport); - - return secure_dccp_sequence_number(skb->nh.iph->daddr, - skb->nh.iph->saddr, - dh->dccph_dport, - dh->dccph_sport); + return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32, + skb->nh.ipv6h->saddr.s6_addr32, + dccp_hdr(skb)->dccph_dport, + dccp_hdr(skb)->dccph_sport ); } static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, @@ -491,7 +483,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) */ dreq = dccp_rsk(req); dreq->dreq_isr = dcb->dccpd_seq; - dreq->dreq_iss = dccp_v6_init_sequence(sk, skb); + dreq->dreq_iss = dccp_v6_init_sequence(skb); dreq->dreq_service = service; if (dccp_v6_send_response(sk, req, NULL)) -- cgit v1.2.3 From d7f7365f5776723da6df73540d855069c2daaa5c Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 13 Nov 2006 13:34:38 -0200 Subject: [DCCPv6]: Choose a genuine initial sequence number This * resolves a FIXME - DCCPv6 connections started all with an initial sequence number of 1; * provides a redirection `secure_dccpv6_sequence_number' in case the init_sequence_v6 code should be updated later; * concentrates the update of S.GAR into dccp_connect_init(); * removes a duplicate dccp_update_gss() in ipv4.c; * uses inet->dport instead of usin->sin_port, due to the following assignment in dccp_v4_connect(): inet->dport = usin->sin_port; Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ipv4.c | 9 ++------- net/dccp/ipv6.c | 28 ++++++++++++++++------------ net/dccp/output.c | 6 +++++- 3 files changed, 23 insertions(+), 20 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b56a2fa2694a..bc400b2ba25e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -113,13 +113,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* OK, now commit destination to socket. */ sk_setup_caps(sk, &rt->u.dst); - dp->dccps_gar = - dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, - inet->daddr, - inet->sport, - usin->sin_port); - dccp_update_gss(sk, dp->dccps_iss); - + dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, inet->daddr, + inet->sport, inet->dport); inet->id = dp->dccps_iss ^ jiffies; err = dccp_connect(sk); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index a08af75ddc9e..8d6ddb6389a7 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -76,12 +76,19 @@ static inline void dccp_v6_send_check(struct sock *sk, int unused_value, dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr); } -static inline __u32 dccp_v6_init_sequence(const struct sk_buff *skb) +static inline __u32 secure_dccpv6_sequence_number(__u32 *saddr, __u32 *daddr, + __u16 sport, __u16 dport ) { - return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32, - skb->nh.ipv6h->saddr.s6_addr32, - dccp_hdr(skb)->dccph_dport, - dccp_hdr(skb)->dccph_sport ); + return secure_tcpv6_sequence_number(saddr, daddr, sport, dport); +} + +static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb) +{ + return secure_dccpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32, + skb->nh.ipv6h->saddr.s6_addr32, + dccp_hdr(skb)->dccph_dport, + dccp_hdr(skb)->dccph_sport ); + } static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, @@ -1065,13 +1072,10 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, err = inet6_hash_connect(&dccp_death_row, sk); if (err) goto late_failure; - /* FIXME */ -#if 0 - dp->dccps_gar = secure_dccp_v6_sequence_number(np->saddr.s6_addr32, - np->daddr.s6_addr32, - inet->sport, - inet->dport); -#endif + + dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32, + np->daddr.s6_addr32, + inet->sport, inet->dport); err = dccp_connect(sk); if (err) goto late_failure; diff --git a/net/dccp/output.c b/net/dccp/output.c index 08ee5547a2f2..0994b13f0f15 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -448,7 +448,6 @@ static inline void dccp_connect_init(struct sock *sk) dccp_sync_mss(sk, dst_mtu(dst)); - dccp_update_gss(sk, dp->dccps_iss); /* * SWL and AWL are initially adjusted so that they are not less than * the initial Sequence Numbers received and sent, respectively: @@ -457,8 +456,13 @@ static inline void dccp_connect_init(struct sock *sk) * These adjustments MUST be applied only at the beginning of the * connection. */ + dccp_update_gss(sk, dp->dccps_iss); dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); + /* S.GAR - greatest valid acknowledgement number received on a non-Sync; + * initialized to S.ISS (sec. 8.5) */ + dp->dccps_gar = dp->dccps_iss; + icsk->icsk_retransmits = 0; init_timer(&dp->dccps_xmit_timer); dp->dccps_xmit_timer.data = (unsigned long)sk; -- cgit v1.2.3 From b9df3cb8cf9a96e63dfdcd3056a9cbc71f2459e7 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 14 Nov 2006 11:21:36 -0200 Subject: [TCP/DCCP]: Introduce net_xmit_eval Throughout the TCP/DCCP (and tunnelling) code, it often happens that the return code of a transmit function needs to be tested against NET_XMIT_CN which is a value that does not indicate a strict error condition. This patch uses a macro for these recurring situations which is consistent with the already existing macro net_xmit_errno, saving on duplicated code. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ipv4.c | 5 ++--- net/dccp/ipv6.c | 3 +-- net/dccp/output.c | 14 ++------------ 3 files changed, 5 insertions(+), 17 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index bc400b2ba25e..61c09014dade 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -501,8 +501,7 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, ireq->rmt_addr, ireq->opt); - if (err == NET_XMIT_CN) - err = 0; + err = net_xmit_eval(err); } out: @@ -571,7 +570,7 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) rxskb->nh.iph->saddr, NULL); bh_unlock_sock(dccp_v4_ctl_socket->sk); - if (err == NET_XMIT_CN || err == 0) { + if (net_xmit_eval(err) == 0) { DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 8d6ddb6389a7..2165b1740c7c 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -294,8 +294,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, &ireq6->rmt_addr); ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); err = ip6_xmit(sk, skb, &fl, opt, 0); - if (err == NET_XMIT_CN) - err = 0; + err = net_xmit_eval(err); } done: diff --git a/net/dccp/output.c b/net/dccp/output.c index 0994b13f0f15..ef22f3cc791a 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -125,16 +125,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); err = icsk->icsk_af_ops->queue_xmit(skb, sk, 0); - if (err <= 0) - return err; - - /* NET_XMIT_CN is special. It does not guarantee, - * that this packet is lost. It tells that device - * is about to start to drop packets or already - * drops some packets of the same priority and - * invokes us to send less aggressively. - */ - return err == NET_XMIT_CN ? 0 : err; + return net_xmit_eval(err); } return -ENOBUFS; } @@ -426,8 +417,7 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) if (skb != NULL) { memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, sk, 0); - if (err == NET_XMIT_CN) - err = 0; + return net_xmit_eval(err); } } -- cgit v1.2.3 From 6a128e053e75a5f1be9fb53d0d53159f88197c61 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 14 Nov 2006 11:50:36 -0200 Subject: [DCCPv6]: Resolve conditional build problem Resolves the problem that if IPv6 was configured `y' and DCCP `m' then dccp_ipv6 was not built as a module. With this change, dccp_ipv6 is built as a module whenever DCCP *OR* IPv6 are configured as modules; it will be built-in only if both DCCP = `y' and IPV6 = `y'. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 17ed99c46617..f4f8793aafff 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -1,13 +1,13 @@ -obj-$(CONFIG_IPV6) += dccp_ipv6.o - -dccp_ipv6-y := ipv6.o - obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o dccp_ipv4-y := ipv4.o +# build dccp_ipv6 as module whenever either IPv6 or DCCP is a module +obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o +dccp_ipv6-y := ipv6.o + dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o -- cgit v1.2.3 From c02fdc0e81e9c735d8d895af1e201b235df326d8 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 14 Nov 2006 12:48:10 -0200 Subject: [DCCP]: Make feature negotiation more readable This patch replaces cryptic feature negotiation messages of type Oct 31 15:42:20 kernel: dccp_feat_change: feat change type=32 feat=1 Oct 31 15:42:21 kernel: dccp_feat_change: feat change type=34 feat=1 Oct 31 15:42:21 kernel: dccp_feat_change: feat change type=32 feat=5 into ones of type: Nov 2 13:54:45 kernel: dccp_feat_change: ChangeL(CCID (1), 3) Nov 2 13:54:45 kernel: dccp_feat_change: ChangeR(CCID (1), 3) Nov 2 13:54:45 kernel: dccp_feat_change: ChangeL(Ack Ratio (5), 2) Also, * completed the feature number list wrt RFC 4340 sec. 6.4 * annotating which ones have been implemented so far * implemented rudimentary sanity checking in feat.c (FIXMEs) * some minor fixes Commiter note: uninlined dccp_feat_name and dccp_feat_typename, for consistency with dccp_{state,packet}_name, that, BTW, should be compiled only if CONFIG_IP_DCCP_DEBUG is selected, leaving this to another cset tho. Also shortened dccp_feat_negotiation_debug to dccp_feat_debug. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/feat.c | 116 ++++++++++++++++++++++++++++++++++++++++------------- net/dccp/feat.h | 41 ++++++++++++++++++- net/dccp/options.c | 4 +- 3 files changed, 131 insertions(+), 30 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/feat.c b/net/dccp/feat.c index a1b0682ee77c..12cde2f2f13b 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -12,7 +12,6 @@ #include -#include "dccp.h" #include "ccid.h" #include "feat.h" @@ -23,9 +22,17 @@ int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, { struct dccp_opt_pend *opt; - dccp_pr_debug("feat change type=%d feat=%d\n", type, feature); + dccp_feat_debug(type, feature, *val); - /* XXX sanity check feat change request */ + if (!dccp_feat_is_valid_type(type)) { + pr_info("option type %d invalid in negotiation\n", type); + return 1; + } + if (!dccp_feat_is_valid_length(type, feature, len)) { + pr_info("invalid length %d\n", len); + return 1; + } + /* XXX add further sanity checks */ /* check if that feature is already being negotiated */ list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { @@ -95,14 +102,14 @@ static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr) /* XXX taking only u8 vals */ static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val) { - dccp_pr_debug("changing [%d] feat %d to %d\n", type, feat, val); + dccp_feat_debug(type, feat, val); switch (feat) { case DCCPF_CCID: return dccp_feat_update_ccid(sk, type, val); default: - dccp_pr_debug("IMPLEMENT changing [%d] feat %d to %d\n", - type, feat, val); + dccp_pr_debug("UNIMPLEMENTED: %s(%d, ...)\n", + dccp_feat_typename(type), feat); break; } return 0; @@ -265,10 +272,10 @@ static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) u8 *copy; int rc; - /* NN features must be change L */ - if (type == DCCPO_CHANGE_R) { - dccp_pr_debug("received CHANGE_R %d for NN feat %d\n", - type, feature); + /* NN features must be Change L (sec. 6.3.2) */ + if (type != DCCPO_CHANGE_L) { + dccp_pr_debug("received %s for NN feature %d\n", + dccp_feat_typename(type), feature); return -EFAULT; } @@ -299,7 +306,8 @@ static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) return rc; } - dccp_pr_debug("Confirming NN feature %d (val=%d)\n", feature, *copy); + dccp_feat_debug(type, feature, *copy); + list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf); return 0; @@ -318,14 +326,19 @@ static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk, return; } - opt->dccpop_type = type == DCCPO_CHANGE_L ? DCCPO_CONFIRM_R : - DCCPO_CONFIRM_L; + switch (type) { + case DCCPO_CHANGE_L: opt->dccpop_type = DCCPO_CONFIRM_R; break; + case DCCPO_CHANGE_R: opt->dccpop_type = DCCPO_CONFIRM_L; break; + default: pr_info("invalid type %d\n", type); return; + + } opt->dccpop_feat = feature; opt->dccpop_val = NULL; opt->dccpop_len = 0; /* change feature */ - dccp_pr_debug("Empty confirm feature %d type %d\n", feature, type); + dccp_pr_debug("Empty %s(%d)\n", dccp_feat_typename(type), feature); + list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf); } @@ -359,7 +372,7 @@ int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) { int rc; - dccp_pr_debug("got feat change type=%d feat=%d\n", type, feature); + dccp_feat_debug(type, feature, *val); /* figure out if it's SP or NN feature */ switch (feature) { @@ -375,6 +388,8 @@ int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) /* XXX implement other features */ default: + dccp_pr_debug("UNIMPLEMENTED: not handling %s(%d, ...)\n", + dccp_feat_typename(type), feature); rc = -EFAULT; break; } @@ -403,20 +418,27 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, u8 t; struct dccp_opt_pend *opt; struct dccp_minisock *dmsk = dccp_msk(sk); - int rc = 1; + int found = 0; int all_confirmed = 1; - dccp_pr_debug("got feat confirm type=%d feat=%d\n", type, feature); - - /* XXX sanity check type & feat */ + dccp_feat_debug(type, feature, *val); /* locate our change request */ - t = type == DCCPO_CONFIRM_L ? DCCPO_CHANGE_R : DCCPO_CHANGE_L; + switch (type) { + case DCCPO_CONFIRM_L: t = DCCPO_CHANGE_R; break; + case DCCPO_CONFIRM_R: t = DCCPO_CHANGE_L; break; + default: pr_info("invalid type %d\n", type); + return 1; + + } + /* XXX sanity check feature value */ list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { if (!opt->dccpop_conf && opt->dccpop_type == t && opt->dccpop_feat == feature) { - /* we found it */ + found = 1; + dccp_pr_debug("feature %d found\n", opt->dccpop_feat); + /* XXX do sanity check */ opt->dccpop_conf = 1; @@ -425,9 +447,7 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, dccp_feat_update(sk, opt->dccpop_type, opt->dccpop_feat, *val); - dccp_pr_debug("feat %d type %d confirmed %d\n", - feature, type, *val); - rc = 0; + /* XXX check the return value of dccp_feat_update */ break; } @@ -446,9 +466,9 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); } - if (rc) - dccp_pr_debug("feat %d type %d never requested\n", - feature, type); + if (!found) + dccp_pr_debug("%s(%d, ...) never requested\n", + dccp_feat_typename(type), feature); return 0; } @@ -583,3 +603,45 @@ out: } EXPORT_SYMBOL_GPL(dccp_feat_init); + +#ifdef CONFIG_IP_DCCP_DEBUG +const char *dccp_feat_typename(const u8 type) +{ + switch(type) { + case DCCPO_CHANGE_L: return("ChangeL"); + case DCCPO_CONFIRM_L: return("ConfirmL"); + case DCCPO_CHANGE_R: return("ChangeR"); + case DCCPO_CONFIRM_R: return("ConfirmR"); + /* the following case must not appear in feature negotation */ + default: dccp_pr_debug("unknown type %d [BUG!]\n", type); + } + return NULL; +} + +EXPORT_SYMBOL_GPL(dccp_feat_typename); + +const char *dccp_feat_name(const u8 feat) +{ + static const char *feature_names[] = { + [DCCPF_RESERVED] = "Reserved", + [DCCPF_CCID] = "CCID", + [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos", + [DCCPF_SEQUENCE_WINDOW] = "Sequence Window", + [DCCPF_ECN_INCAPABLE] = "ECN Incapable", + [DCCPF_ACK_RATIO] = "Ack Ratio", + [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector", + [DCCPF_SEND_NDP_COUNT] = "Send NDP Count", + [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage", + [DCCPF_DATA_CHECKSUM] = "Send Data Checksum", + }; + if (feat >= DCCPF_MIN_CCID_SPECIFIC) + return "CCID-specific"; + + if (dccp_feat_is_reserved(feat)) + return feature_names[DCCPF_RESERVED]; + + return feature_names[feat]; +} + +EXPORT_SYMBOL_GPL(dccp_feat_name); +#endif /* CONFIG_IP_DCCP_DEBUG */ diff --git a/net/dccp/feat.h b/net/dccp/feat.h index 6048373c7186..2c373ad7edcf 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -12,9 +12,46 @@ */ #include +#include "dccp.h" -struct sock; -struct dccp_minisock; +static inline int dccp_feat_is_valid_length(u8 type, u8 feature, u8 len) +{ + /* sec. 6.1: Confirm has at least length 3, + * sec. 6.2: Change has at least length 4 */ + if (len < 3) + return 1; + if (len < 4 && (type == DCCPO_CHANGE_L || type == DCCPO_CHANGE_R)) + return 1; + /* XXX: add per-feature length validation (sec. 6.6.8) */ + return 0; +} + +static inline int dccp_feat_is_reserved(const u8 feat) +{ + return (feat > DCCPF_DATA_CHECKSUM && + feat < DCCPF_MIN_CCID_SPECIFIC) || + feat == DCCPF_RESERVED; +} + +/* feature negotiation knows only these four option types (RFC 4340, sec. 6) */ +static inline int dccp_feat_is_valid_type(const u8 optnum) +{ + return optnum >= DCCPO_CHANGE_L && optnum <= DCCPO_CONFIRM_R; + +} + +#ifdef CONFIG_IP_DCCP_DEBUG +extern const char *dccp_feat_typename(const u8 type); +extern const char *dccp_feat_name(const u8 feat); + +static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val) +{ + dccp_pr_debug("%s(%s (%d), %d)\n", dccp_feat_typename(type), + dccp_feat_name(feat), feat, val); +} +#else +#define dccp_feat_debug(type, feat, val) +#endif /* CONFIG_IP_DCCP_DEBUG */ extern int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, u8 *val, u8 len, gfp_t gfp); diff --git a/net/dccp/options.c b/net/dccp/options.c index 121e794fe454..2d0ef27f4ab9 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -465,8 +465,10 @@ static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat, if (len) memcpy(to, val, len); - dccp_pr_debug("option %d feat %d len %d\n", type, feat, len); + dccp_pr_debug("%s(%s (%d), ...), length %d\n", + dccp_feat_typename(type), + dccp_feat_name(feat), feat, len); return 0; } -- cgit v1.2.3 From 09dbc3895e3242346bd434dae743c456fd28fc6a Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 14 Nov 2006 12:57:34 -0200 Subject: [DCCP]: Miscellaneous code tidy-ups This patch does not change code; it performs some trivial clean/tidy-ups: * removal of a `debug_prefix' string in favour of the already existing dccp_role(sk) * add documentation of structures and constants * separated out the cases for invalid packets (step 1 of the packet validation) * removing duplicate statements * combining declaration & initialisation Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ackvec.c | 27 ++++++--------------------- net/dccp/ipv4.c | 39 +++++++++++++++++++-------------------- net/dccp/ipv6.c | 6 ++---- net/dccp/options.c | 18 +++++++----------- net/dccp/output.c | 1 + net/dccp/proto.c | 8 ++------ 6 files changed, 37 insertions(+), 62 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index f8208874ac7d..0c54b89a4e9b 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -67,10 +67,6 @@ static void dccp_ackvec_insert_avr(struct dccp_ackvec *av, int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); -#ifdef CONFIG_IP_DCCP_DEBUG - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT tx: " : "server tx: "; -#endif struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; int len = av->dccpav_vec_len + 2; struct timeval now; @@ -129,9 +125,9 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) dccp_ackvec_insert_avr(av, avr); - dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " + dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, " "ack_ackno=%llu\n", - debug_prefix, avr->dccpavr_sent_len, + dccp_role(sk), avr->dccpavr_sent_len, (unsigned long long)avr->dccpavr_ack_seqno, (unsigned long long)avr->dccpavr_ack_ackno); return 0; @@ -380,14 +376,9 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, */ list_for_each_entry_reverse(avr, &av->dccpav_records, dccpavr_node) { if (ackno == avr->dccpavr_ack_seqno) { -#ifdef CONFIG_IP_DCCP_DEBUG - struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT rx ack: " : "server rx ack: "; -#endif - dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " + dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, " "ack_ackno=%llu, ACKED!\n", - debug_prefix, 1, + dccp_role(sk), 1, (unsigned long long)avr->dccpavr_ack_seqno, (unsigned long long)avr->dccpavr_ack_ackno); dccp_ackvec_throw_record(av, avr); @@ -437,16 +428,10 @@ found: if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) { const u8 state = *vector & DCCP_ACKVEC_STATE_MASK; if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { -#ifdef CONFIG_IP_DCCP_DEBUG - struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = - dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT rx ack: " : "server rx ack: "; -#endif - dccp_pr_debug("%sACK vector 0, len=%d, " + dccp_pr_debug("%s ACK vector 0, len=%d, " "ack_seqno=%llu, ack_ackno=%llu, " "ACKED!\n", - debug_prefix, len, + dccp_role(sk), len, (unsigned long long) avr->dccpavr_ack_seqno, (unsigned long long) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 61c09014dade..34d6d197c3b2 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -518,7 +518,7 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) sizeof(struct dccp_hdr_reset); struct sk_buff *skb; struct dst_entry *dst; - u64 seqno; + u64 seqno = 0; /* Never send a reset in response to a reset. */ if (rxdh->dccph_type == DCCP_PKT_RESET) @@ -552,13 +552,11 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) DCCP_SKB_CB(rxskb)->dccpd_reset_code; /* See "8.3.1. Abnormal Termination" in RFC 4340 */ - seqno = 0; if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); dccp_hdr_set_seq(dh, seqno); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), - DCCP_SKB_CB(rxskb)->dccpd_seq); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); dccp_csum_outgoing(skb); dh->dccph_checksum = dccp_v4_csum_finish(skb, rxskb->nh.iph->saddr, @@ -734,6 +732,11 @@ discard: EXPORT_SYMBOL_GPL(dccp_v4_do_rcv); +/** + * dccp_invalid_packet - check for malformed packets + * Implements RFC 4340, 8.5: Step 1: Check header basics + * Packets that fail these checks are ignored and do not receive Resets. + */ int dccp_invalid_packet(struct sk_buff *skb) { const struct dccp_hdr *dh; @@ -742,6 +745,7 @@ int dccp_invalid_packet(struct sk_buff *skb) if (skb->pkt_type != PACKET_HOST) return 1; + /* If the packet is shorter than 12 bytes, drop packet and return */ if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n"); return 1; @@ -749,42 +753,37 @@ int dccp_invalid_packet(struct sk_buff *skb) dh = dccp_hdr(skb); - /* If the packet type is not understood, drop packet and return */ + /* If P.type is not understood, drop packet and return */ if (dh->dccph_type >= DCCP_PKT_INVALID) { LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n"); return 1; } /* - * If P.Data Offset is too small for packet type, or too large for - * packet, drop packet and return + * If P.Data Offset is too small for packet type, drop packet and return */ if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " - "too small 1\n", - dh->dccph_doff); + "too small\n", dh->dccph_doff); return 1; } - + /* + * If P.Data Offset is too too large for packet, drop packet and return + */ if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " - "too small 2\n", - dh->dccph_doff); + "too large\n", dh->dccph_doff); return 1; } - dh = dccp_hdr(skb); - /* * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet * has short sequence numbers), drop packet and return */ - if (dh->dccph_x == 0 && - dh->dccph_type != DCCP_PKT_DATA && - dh->dccph_type != DCCP_PKT_ACK && - dh->dccph_type != DCCP_PKT_DATAACK) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack " - "nor DataAck and P.X == 0\n", + if (dh->dccph_type >= DCCP_PKT_DATA && + dh->dccph_type <= DCCP_PKT_DATAACK && dh->dccph_x == 0) { + LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data||Ack||" + "DataAck, while P.X == 0\n", dccp_packet_name(dh->dccph_type)); return 1; } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 2165b1740c7c..fc326173c215 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -318,7 +318,7 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) sizeof(struct dccp_hdr_reset); struct sk_buff *skb; struct flowi fl; - u64 seqno; + u64 seqno = 0; if (rxdh->dccph_type == DCCP_PKT_RESET) return; @@ -345,13 +345,11 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) DCCP_SKB_CB(rxskb)->dccpd_reset_code; /* See "8.3.1. Abnormal Termination" in RFC 4340 */ - seqno = 0; if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); dccp_hdr_set_seq(dh, seqno); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), - DCCP_SKB_CB(rxskb)->dccpd_seq); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); dccp_csum_outgoing(skb); dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxskb->nh.ipv6h->saddr, diff --git a/net/dccp/options.c b/net/dccp/options.c index 2d0ef27f4ab9..7e50678e2471 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -60,10 +60,6 @@ static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) int dccp_parse_options(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); -#ifdef CONFIG_IP_DCCP_DEBUG - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT rx opt: " : "server rx opt: "; -#endif const struct dccp_hdr *dh = dccp_hdr(skb); const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); @@ -119,7 +115,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) goto out_invalid_option; opt_recv->dccpor_ndp = dccp_decode_value_var(value, len); - dccp_pr_debug("%sNDP count=%d\n", debug_prefix, + dccp_pr_debug("%s rx opt: NDP count=%d\n", dccp_role(sk), opt_recv->dccpor_ndp); break; case DCCPO_CHANGE_L: @@ -165,8 +161,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; dccp_timestamp(sk, &dp->dccps_timestamp_time); - dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", - debug_prefix, opt_recv->dccpor_timestamp, + dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n", + dccp_role(sk), opt_recv->dccpor_timestamp, (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); break; @@ -176,8 +172,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_timestamp_echo = ntohl(*(__be32 *)value); - dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, ", - debug_prefix, + dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, " + "ackno=%llu, ", dccp_role(sk), opt_recv->dccpor_timestamp_echo, len + 2, (unsigned long long) @@ -211,8 +207,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) if (elapsed_time > opt_recv->dccpor_elapsed_time) opt_recv->dccpor_elapsed_time = elapsed_time; - dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, - elapsed_time); + dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", + dccp_role(sk), elapsed_time); break; /* * From RFC 4340, sec. 10.3: diff --git a/net/dccp/output.c b/net/dccp/output.c index ef22f3cc791a..c34eada7f025 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -333,6 +333,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, return NULL; } + /* Build and checksum header */ dh = dccp_zeroed_hdr(skb, dccp_header_size); dh->dccph_sport = inet_sk(sk)->sport; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 9c9c08cffdaf..0225bdacd3b1 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -452,9 +452,8 @@ out_free_val: static int do_dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - struct dccp_sock *dp; - int err; - int val; + struct dccp_sock *dp = dccp_sk(sk); + int val, err = 0; if (optlen < sizeof(int)) return -EINVAL; @@ -466,9 +465,6 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, return dccp_setsockopt_service(sk, val, optval, optlen); lock_sock(sk); - dp = dccp_sk(sk); - err = 0; - switch (optname) { case DCCP_SOCKOPT_PACKET_SIZE: dp->dccps_packet_size = val; -- cgit v1.2.3 From d23ca15a21804631d8f787a0cc5646df81b9c2ea Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Tue, 14 Nov 2006 13:19:45 -0200 Subject: [DCCP] ACKVEC: Optimization - Do not traverse records if none will be found Do not traverse the list of ack vector records [proportional to window size] when we know we will not find what we are looking for. This is especially useful because ack vectors are checked twice: 1) Upon parsing of options. 2) Upon notification of a new ack. All of the work will occur during check #1. Therefore, when check #2 is performed, no new work will be done. This is now "detected" and there is no performance hit when doing #2. Signed-off-by: Andrea Bittau Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ackvec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/dccp') diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 0c54b89a4e9b..bd20a2c55e25 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -383,7 +383,8 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, (unsigned long long)avr->dccpavr_ack_ackno); dccp_ackvec_throw_record(av, avr); break; - } + } else if (avr->dccpavr_ack_seqno > ackno) + break; /* old news */ } } -- cgit v1.2.3 From cfb6eeb4c860592edd123fdea908d23c6ad1c7dc Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 14 Nov 2006 19:07:45 -0800 Subject: [TCP]: MD5 Signature Option (RFC2385) support. Based on implementation by Rick Payne. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 6 +++--- net/dccp/ipv6.c | 6 +++--- net/dccp/minisocks.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 34d6d197c3b2..35985334daee 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -509,7 +509,7 @@ out: return err; } -static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) +static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { int err; struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; @@ -724,7 +724,7 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - dccp_v4_ctl_send_reset(skb); + dccp_v4_ctl_send_reset(sk, skb); discard: kfree_skb(skb); return 0; @@ -913,7 +913,7 @@ no_dccp_socket: if (dh->dccph_type != DCCP_PKT_RESET) { DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - dccp_v4_ctl_send_reset(skb); + dccp_v4_ctl_send_reset(sk, skb); } discard_it: diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index fc326173c215..e0a0607862ef 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -310,7 +310,7 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req) kfree_skb(inet6_rsk(req)->pktopts); } -static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) +static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) + @@ -805,7 +805,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - dccp_v6_ctl_send_reset(skb); + dccp_v6_ctl_send_reset(sk, skb); discard: if (opt_skb != NULL) __kfree_skb(opt_skb); @@ -902,7 +902,7 @@ no_dccp_socket: if (dh->dccph_type != DCCP_PKT_RESET) { DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - dccp_v6_ctl_send_reset(skb); + dccp_v6_ctl_send_reset(sk, skb); } discard_it: diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 0c49733f5be1..3975048d8094 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -246,7 +246,7 @@ listen_overflow: DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; drop: if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) - req->rsk_ops->send_reset(skb); + req->rsk_ops->send_reset(sk, skb); inet_csk_reqsk_queue_drop(sk, req, prev); goto out; -- cgit v1.2.3 From 2bda2853150e20ca2a44627d00b5f85af0b24a42 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Nov 2006 21:28:51 -0800 Subject: [NET]: Annotate csum_tcpudp_magic() callers in net/* Signed-off-by: Al Viro Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 35985334daee..6bca71929de5 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -344,7 +344,7 @@ out: sock_put(sk); } -static inline u16 dccp_v4_csum_finish(struct sk_buff *skb, +static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb, __be32 src, __be32 dst) { return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum); -- cgit v1.2.3 From 868c86bcb5bdea7ed8d45979b17bb919af9254db Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Nov 2006 21:35:48 -0800 Subject: [NET]: annotate csum_ipv6_magic() callers in net/* Signed-off-by: Al Viro Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index e0a0607862ef..f28e406a4a1f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -59,7 +59,7 @@ static void dccp_v6_hash(struct sock *sk) } /* add pseudo-header to DCCP checksum stored in skb->csum */ -static inline u16 dccp_v6_csum_finish(struct sk_buff *skb, +static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb, struct in6_addr *saddr, struct in6_addr *daddr) { -- cgit v1.2.3 From 7d533f941856d7d192bfec28581e130dda9c6688 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Nov 2006 21:51:36 -0800 Subject: [NET]: More dccp endianness annotations. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index f28e406a4a1f..6eda430ae929 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -76,8 +76,8 @@ static inline void dccp_v6_send_check(struct sock *sk, int unused_value, dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr); } -static inline __u32 secure_dccpv6_sequence_number(__u32 *saddr, __u32 *daddr, - __u16 sport, __u16 dport ) +static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport ) { return secure_tcpv6_sequence_number(saddr, daddr, sport, dport); } -- cgit v1.2.3 From 3c6952624a8f600f9a0fbc1f5db5560a7ef9b13e Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 15 Nov 2006 21:27:47 -0200 Subject: [DCCP]: Introduce DCCP_{BUG{_ON},CRIT} macros, use enum:8 for the ccid3 states This patch tackles the following problem: * the ccid3_hc_{t,r}x_sock define ccid3hc{t,r}x_state as `u8', but in reality there can only be a few, pre-defined enum names * this necessitates addiditional checking for unexpected values which would otherwise be caught by the compiler Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 45 ++++++++++++--------------------------------- net/dccp/ccids/ccid3.h | 19 +++++++++++++++++-- net/dccp/dccp.h | 8 ++++++++ 3 files changed, 37 insertions(+), 35 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index cec23ad286de..2fa0c6d1fbee 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -75,14 +75,6 @@ static struct dccp_tx_hist *ccid3_tx_hist; static struct dccp_rx_hist *ccid3_rx_hist; static struct dccp_li_hist *ccid3_li_hist; -/* TFRC sender states */ -enum ccid3_hc_tx_states { - TFRC_SSTATE_NO_SENT = 1, - TFRC_SSTATE_NO_FBACK, - TFRC_SSTATE_FBACK, - TFRC_SSTATE_TERM, -}; - #ifdef CCID3_DEBUG static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) { @@ -251,9 +243,8 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) hctx->ccid3hctx_x)); break; default: - printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", - __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); - dump_stack(); + DCCP_BUG("%s, sk=%p, Illegal state (%d)!", dccp_role(sk), sk, + hctx->ccid3hctx_state); goto out; } @@ -329,9 +320,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, rc = delay > 0 ? delay : 0; break; default: - printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", - __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); - dump_stack(); + DCCP_BUG("%s, sk=%p, Illegal state (%d)!", dccp_role(sk), sk, + hctx->ccid3hctx_state); rc = -EINVAL; break; } @@ -423,9 +413,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) } break; default: - printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", - __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); - dump_stack(); + DCCP_BUG("%s, sk=%p, Illegal state (%d)!", dccp_role(sk), sk, + hctx->ccid3hctx_state); break; } } @@ -568,9 +557,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) hctx->ccid3hctx_idle = 1; break; default: - printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", - __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); - dump_stack(); + DCCP_BUG("%s, sk=%p, Illegal state (%d)!", dccp_role(sk), sk, + hctx->ccid3hctx_state); break; } } @@ -688,13 +676,6 @@ static void ccid3_hc_tx_exit(struct sock *sk) * RX Half Connection methods */ -/* TFRC receiver states */ -enum ccid3_hc_rx_states { - TFRC_RSTATE_NO_DATA = 1, - TFRC_RSTATE_DATA, - TFRC_RSTATE_TERM = 127, -}; - #ifdef CCID3_DEBUG static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) { @@ -744,9 +725,8 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) } break; default: - printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", - __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); - dump_stack(); + DCCP_BUG("%s, sk=%p, Illegal state (%d)!", dccp_role(sk), sk, + hcrx->ccid3hcrx_state); return; } @@ -1088,9 +1068,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } return; default: - printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", - __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); - dump_stack(); + DCCP_BUG("%s, sk=%p, Illegal state (%d)!", dccp_role(sk), sk, + hcrx->ccid3hcrx_state); return; } diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 0a2cb7536d26..c122e757eb7f 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -73,6 +73,14 @@ struct ccid3_options_received { u32 ccid3or_receive_rate; }; +/* TFRC sender states */ +enum ccid3_hc_tx_states { + TFRC_SSTATE_NO_SENT = 1, + TFRC_SSTATE_NO_FBACK, + TFRC_SSTATE_FBACK, + TFRC_SSTATE_TERM, +}; + /** struct ccid3_hc_tx_sock - CCID3 sender half connection sock * * @ccid3hctx_state - Sender state @@ -103,7 +111,7 @@ struct ccid3_hc_tx_sock { #define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto #define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi u16 ccid3hctx_s; - u8 ccid3hctx_state; + enum ccid3_hc_tx_states ccid3hctx_state:8; u8 ccid3hctx_last_win_count; u8 ccid3hctx_idle; struct timeval ccid3hctx_t_last_win_count; @@ -115,6 +123,13 @@ struct ccid3_hc_tx_sock { struct ccid3_options_received ccid3hctx_options_received; }; +/* TFRC receiver states */ +enum ccid3_hc_rx_states { + TFRC_RSTATE_NO_DATA = 1, + TFRC_RSTATE_DATA, + TFRC_RSTATE_TERM = 127, +}; + struct ccid3_hc_rx_sock { struct tfrc_rx_info ccid3hcrx_tfrc; #define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv @@ -122,8 +137,8 @@ struct ccid3_hc_rx_sock { #define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p u64 ccid3hcrx_seqno_nonloss:48, ccid3hcrx_ccval_nonloss:4, - ccid3hcrx_state:8, ccid3hcrx_ccval_last_counter:4; + enum ccid3_hc_rx_states ccid3hcrx_state:8; u32 ccid3hcrx_bytes_recv; struct timeval ccid3hcrx_tstamp_last_feedback; struct timeval ccid3hcrx_tstamp_last_ack; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 363fa520056e..ecb4e7bd1a84 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -18,6 +18,14 @@ #include #include "ackvec.h" +#define DCCP_CRIT(fmt, a...) LIMIT_NETDEBUG(KERN_CRIT fmt " at %s:%d/%s()\n", \ + ##a, __FILE__, __LINE__, __FUNCTION__) +#define DCCP_BUG(fmt, a...) do { DCCP_CRIT(fmt, ##a); dump_stack(); } while (0) +#define DCCP_BUG_ON(cond) do { if (unlikely((cond) == 0)) \ + DCCP_BUG("BUG: condition \"%s\" fails",\ + __stringify((cond))); \ + } while (0) + #ifdef CONFIG_IP_DCCP_DEBUG extern int dccp_debug; -- cgit v1.2.3 From e523a1550e877f8a8ff87a50269b7ee7bfb43464 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 16 Nov 2006 12:23:58 -0200 Subject: [DCCP]: One NET_INC_STATS() could be NET_INC_STATS_BH in dccp_v4_err() Spotted by Eric Dumazet in tcp_v4_rcv(). Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 6bca71929de5..a20eb71d45db 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -244,7 +244,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) seq = dccp_hdr_seq(skb); if (sk->sk_state != DCCP_LISTEN && !between48(seq, dp->dccps_swl, dp->dccps_swh)) { - NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS); + NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); goto out; } -- cgit v1.2.3 From 58a5a7b9555ea231b557ebef5cabeaf8e951df0b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 16 Nov 2006 14:06:06 -0200 Subject: [NET]: Conditionally use bh_lock_sock_nested in sk_receive_skb Spotted by Ian McDonald, tentatively fixed by Gerrit Renker: http://www.mail-archive.com/dccp%40vger.kernel.org/msg00599.html Rewritten not to unroll sk_receive_skb, in the common case, i.e. no lock debugging, its optimized away. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index a20eb71d45db..7114befe7d50 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -899,7 +899,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) goto discard_and_relse; nf_reset(skb); - return sk_receive_skb(sk, skb); + return sk_receive_skb(sk, skb, 1); no_dccp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6eda430ae929..03bb8298250a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -888,7 +888,7 @@ static int dccp_v6_rcv(struct sk_buff **pskb) if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - return sk_receive_skb(sk, skb) ? -1 : 0; + return sk_receive_skb(sk, skb, 1) ? -1 : 0; no_dccp_socket: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) -- cgit v1.2.3 From 32aac18dfa0963fde40cc074ba97ebbae8b755f2 Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Thu, 16 Nov 2006 14:28:40 -0200 Subject: [DCCP] CCID2: Code optimizations These are code optimizations which are relevant when dealing with large windows. They are not coded the way I would like to, but they do the job for the short-term. This patch should be more neat. Commiter note: Changed the seqno comparisions to use {after,before}48 to handle wrapping. Signed-off-by: Andrea Bittau Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid2.c | 22 ++++++++++++++++++++-- net/dccp/ccids/ccid2.h | 1 + 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 162032baeac0..6533cb2f27bb 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -619,7 +619,17 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; - seqp = hctx->ccid2hctx_seqh->ccid2s_prev; + if (after48(ackno, hctx->ccid2hctx_high_ack)) + hctx->ccid2hctx_high_ack = ackno; + + seqp = hctx->ccid2hctx_seqt; + while (before48(seqp->ccid2s_seq, ackno)) { + seqp = seqp->ccid2s_next; + if (seqp == hctx->ccid2hctx_seqh) { + seqp = hctx->ccid2hctx_seqh->ccid2s_prev; + break; + } + } /* If in slow-start, cwnd can increase at most Ack Ratio / 2 packets for * this single ack. I round up. @@ -697,7 +707,14 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* The state about what is acked should be correct now * Check for NUMDUPACK */ - seqp = hctx->ccid2hctx_seqh->ccid2s_prev; + seqp = hctx->ccid2hctx_seqt; + while (before48(seqp->ccid2s_seq, hctx->ccid2hctx_high_ack)) { + seqp = seqp->ccid2s_next; + if (seqp == hctx->ccid2hctx_seqh) { + seqp = hctx->ccid2hctx_seqh->ccid2s_prev; + break; + } + } done = 0; while (1) { if (seqp->ccid2s_acked) { @@ -771,6 +788,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hctx->ccid2hctx_lastrtt = 0; hctx->ccid2hctx_rpdupack = -1; hctx->ccid2hctx_last_cong = jiffies; + hctx->ccid2hctx_high_ack = 0; hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire; hctx->ccid2hctx_rtotimer.data = (unsigned long)sk; diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 5b2ef4acb300..a97a89927308 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -72,6 +72,7 @@ struct ccid2_hc_tx_sock { int ccid2hctx_rpdupack; int ccid2hctx_sendwait; unsigned long ccid2hctx_last_cong; + u64 ccid2hctx_high_ack; }; struct ccid2_hc_rx_sock { -- cgit v1.2.3 From eed73417d501c2c7bdef1bc8a1f7a1548a635b09 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 17 Nov 2006 12:21:43 -0200 Subject: [DCCP]: Use kmemdup Code diff stats: [acme@newtoy net-2.6.20]$ codiff /tmp/dccp.ko.before /tmp/dccp.ko.after /pub/scm/linux/kernel/git/acme/net-2.6.20/net/dccp/feat.c: __dccp_feat_init | -16 dccp_feat_change_recv | -55 dccp_feat_clone | -56 3 functions changed, 127 bytes removed [acme@newtoy net-2.6.20]$ Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/feat.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 12cde2f2f13b..e808c418c992 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -286,12 +286,11 @@ static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) if (opt == NULL) return -ENOMEM; - copy = kmalloc(len, GFP_ATOMIC); + copy = kmemdup(val, len, GFP_ATOMIC); if (copy == NULL) { kfree(opt); return -ENOMEM; } - memcpy(copy, val, len); opt->dccpop_type = DCCPO_CONFIRM_R; /* NN can only confirm R */ opt->dccpop_feat = feature; @@ -521,20 +520,18 @@ int dccp_feat_clone(struct sock *oldsk, struct sock *newsk) list_for_each_entry(opt, &olddmsk->dccpms_pending, dccpop_node) { struct dccp_opt_pend *newopt; /* copy the value of the option */ - u8 *val = kmalloc(opt->dccpop_len, GFP_ATOMIC); + u8 *val = kmemdup(opt->dccpop_val, opt->dccpop_len, GFP_ATOMIC); if (val == NULL) goto out_clean; - memcpy(val, opt->dccpop_val, opt->dccpop_len); - newopt = kmalloc(sizeof(*newopt), GFP_ATOMIC); + newopt = kmemdup(opt, sizeof(*newopt), GFP_ATOMIC); if (newopt == NULL) { kfree(val); goto out_clean; } /* insert the option */ - memcpy(newopt, opt, sizeof(*newopt)); newopt->dccpop_val = val; list_add_tail(&newopt->dccpop_node, &newdmsk->dccpms_pending); @@ -565,10 +562,9 @@ static int __dccp_feat_init(struct dccp_minisock *dmsk, u8 type, u8 feat, u8 *val, u8 len) { int rc = -ENOMEM; - u8 *copy = kmalloc(len, GFP_KERNEL); + u8 *copy = kmemdup(val, len, GFP_KERNEL); if (copy != NULL) { - memcpy(copy, val, len); rc = dccp_feat_change(dmsk, type, feat, copy, len, GFP_KERNEL); if (rc) kfree(copy); -- cgit v1.2.3 From 84116716cc9404356f775443b460f76766f08f65 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 20 Nov 2006 18:26:03 -0200 Subject: [DCCP]: enable debug messages also for static builds This patch * makes debugging (when configured) work both for static / module build * provides generic debugging macros for use in other DCCP / CCID modules * adds missing information about debug parameters to Kconfig * performs some code tidy-up Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/Kconfig | 3 +++ net/dccp/ackvec.c | 11 ++++------- net/dccp/ccids/Kconfig | 14 ++++++++++++-- net/dccp/ccids/ccid2.c | 16 ++++++---------- net/dccp/dccp.h | 19 ++++++++++++------- 5 files changed, 37 insertions(+), 26 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 2fc5e55d2a8d..b8a68dd41000 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -38,6 +38,9 @@ config IP_DCCP_DEBUG ---help--- Only use this if you're hacking DCCP. + When compiling DCCP as a module, this debugging output can be toggled + by setting the parameter dccp_debug of the `dccp' module to 0 or 1. + Just say N. config NET_DCCPPROBE diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index bd20a2c55e25..d34badcd012e 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -318,21 +318,18 @@ out_duplicate: #ifdef CONFIG_IP_DCCP_DEBUG void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) { - if (!dccp_debug) - return; - - printk("ACK vector len=%d, ackno=%llu |", len, - (unsigned long long)ackno); + dccp_pr_debug_cat("ACK vector len=%d, ackno=%llu |", len, + (unsigned long long)ackno); while (len--) { const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6; const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; - printk("%d,%d|", state, rl); + dccp_pr_debug_cat("%d,%d|", state, rl); ++vector; } - printk("\n"); + dccp_pr_debug_cat("\n"); } void dccp_ackvec_print(const struct dccp_ackvec *av) diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index 8533dabfb9f8..ba826d99ed03 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -28,13 +28,20 @@ config IP_DCCP_CCID2 This text was extracted from RFC 4340 (sec. 10.1), http://www.ietf.org/rfc/rfc4340.txt + To compile this CCID as a module, choose M here: the module will be + called dccp_ccid2. + If in doubt, say M. config IP_DCCP_CCID2_DEBUG - bool "CCID2 debug" + bool "CCID2 debugging messages" depends on IP_DCCP_CCID2 ---help--- - Enable CCID2 debug messages. + Enable CCID2-specific debugging messages. + + When compiling CCID2 as a module, this debugging output can + additionally be toggled by setting the ccid2_debug module + parameter to 0 or 1. If in doubt, say N. @@ -62,6 +69,9 @@ config IP_DCCP_CCID3 This text was extracted from RFC 4340 (sec. 10.2), http://www.ietf.org/rfc/rfc4340.txt + To compile this CCID as a module, choose M here: the module will be + called dccp_ccid3. + If in doubt, say M. config IP_DCCP_TFRC_LIB diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 6533cb2f27bb..0fb0d66544a2 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -33,18 +33,11 @@ #include "../dccp.h" #include "ccid2.h" -static int ccid2_debug; #ifdef CONFIG_IP_DCCP_CCID2_DEBUG -#define ccid2_pr_debug(format, a...) \ - do { if (ccid2_debug) \ - printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ - } while (0) -#else -#define ccid2_pr_debug(format, a...) -#endif +static int ccid2_debug; +#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) -#ifdef CONFIG_IP_DCCP_CCID2_DEBUG static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) { int len = 0; @@ -86,7 +79,8 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN); } #else -#define ccid2_hc_tx_check_sanity(hctx) do {} while (0) +#define ccid2_pr_debug(format, a...) +#define ccid2_hc_tx_check_sanity(hctx) #endif static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, @@ -841,8 +835,10 @@ static struct ccid_operations ccid2 = { .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, }; +#ifdef CONFIG_IP_DCCP_CCID2_DEBUG module_param(ccid2_debug, int, 0444); MODULE_PARM_DESC(ccid2_debug, "Enable debug messages"); +#endif static __init int ccid2_module_init(void) { diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index ecb4e7bd1a84..33d86f53138b 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -26,15 +26,20 @@ __stringify((cond))); \ } while (0) +#ifdef MODULE +#define DCCP_PRINTK(enable, fmt, args...) do { if (enable) \ + printk(fmt, ##args); \ + } while(0) +#else +#define DCCP_PRINTK(enable, fmt, args...) printk(fmt, ##args) +#endif +#define DCCP_PR_DEBUG(enable, fmt, a...) DCCP_PRINTK(enable, KERN_DEBUG \ + "%s: " fmt, __FUNCTION__, ##a) + #ifdef CONFIG_IP_DCCP_DEBUG extern int dccp_debug; - -#define dccp_pr_debug(format, a...) \ - do { if (dccp_debug) \ - printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \ - } while (0) -#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) \ - printk(format, ##a); } while (0) +#define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a) +#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a) #else #define dccp_pr_debug(format, a...) #define dccp_pr_debug_cat(format, a...) -- cgit v1.2.3 From 56724aa434e9b4d73548021ede7a1474f533f3fe Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 20 Nov 2006 18:28:09 -0200 Subject: [DCCP]: Add CCID3 debug support to Kconfig This adds a CCID3 debug option to the configuration menu which is missing in Kconfig, but already used by the code. CCID 2 already provides such an entry. To enable debugging, set CONFIG_IP_DCCP_CCID3_DEBUG=y NOTE: The use of ccid3_{t,r}x_state_name is safe, since now only enum values can appear. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/Kconfig | 11 +++++++++++ net/dccp/ccids/ccid3.c | 16 ++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index ba826d99ed03..dac89166eb18 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -78,4 +78,15 @@ config IP_DCCP_TFRC_LIB depends on IP_DCCP_CCID3 def_tristate IP_DCCP_CCID3 +config IP_DCCP_CCID3_DEBUG + bool "CCID3 debugging messages" + depends on IP_DCCP_CCID3 + ---help--- + Enable CCID3-specific debugging messages. + + When compiling CCID3 as a module, this debugging output can + additionally be toggled by setting the ccid3_debug module + parameter to 0 or 1. + + If in doubt, say N. endmenu diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 2fa0c6d1fbee..7db801ec1ab9 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -60,13 +60,11 @@ static u32 usecs_div(const u32 a, const u32 b) return (b >= 2 * div) ? tmp / (b / div) : tmp; } -static int ccid3_debug; -#ifdef CCID3_DEBUG -#define ccid3_pr_debug(format, a...) \ - do { if (ccid3_debug) \ - printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ - } while (0) + +#ifdef CONFIG_IP_DCCP_CCID3_DEBUG +static int ccid3_debug; +#define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a) #else #define ccid3_pr_debug(format, a...) #endif @@ -75,7 +73,7 @@ static struct dccp_tx_hist *ccid3_tx_hist; static struct dccp_rx_hist *ccid3_rx_hist; static struct dccp_li_hist *ccid3_li_hist; -#ifdef CCID3_DEBUG +#ifdef CONFIG_IP_DCCP_CCID3_DEBUG static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) { static char *ccid3_state_names[] = { @@ -676,7 +674,7 @@ static void ccid3_hc_tx_exit(struct sock *sk) * RX Half Connection methods */ -#ifdef CCID3_DEBUG +#ifdef CONFIG_IP_DCCP_CCID3_DEBUG static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) { static char *ccid3_rx_state_names[] = { @@ -1240,8 +1238,10 @@ static struct ccid_operations ccid3 = { .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, }; +#ifdef CONFIG_IP_DCCP_CCID3_DEBUG module_param(ccid3_debug, int, 0444); MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); +#endif static __init int ccid3_module_init(void) { -- cgit v1.2.3 From b1308dc015eb09cf094ca169296738a13ae049ad Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Mon, 20 Nov 2006 18:30:17 -0200 Subject: [DCCP]: Set TX Queue Length Bounds via Sysctl Previously the transmit queue was unbounded. This patch: * puts a limit on transmit queue length and sends back EAGAIN if the buffer is full * sets the TX queue length to a sensible default * implements tx buffer sysctls for DCCP Signed-off-by: Ian McDonald Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/dccp.h | 1 + net/dccp/proto.c | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'net/dccp') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 33d86f53138b..3a94625a1af3 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -87,6 +87,7 @@ extern int sysctl_dccp_feat_tx_ccid; extern int sysctl_dccp_feat_ack_ratio; extern int sysctl_dccp_feat_send_ack_vector; extern int sysctl_dccp_feat_send_ndp_count; +extern int sysctl_dccp_tx_qlen; /* is seq1 < seq2 ? */ static inline int before48(const u64 seq1, const u64 seq2) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 0225bdacd3b1..a7f345c8d0db 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -52,6 +52,9 @@ struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { EXPORT_SYMBOL_GPL(dccp_hashinfo); +/* the maximum queue length for tx in packets. 0 is no limit */ +int sysctl_dccp_tx_qlen __read_mostly = 5; + void dccp_set_state(struct sock *sk, const int state) { const int oldstate = sk->sk_state; @@ -645,6 +648,13 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return -EMSGSIZE; lock_sock(sk); + + if (sysctl_dccp_tx_qlen && + (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) { + rc = -EAGAIN; + goto out_release; + } + timeo = sock_sndtimeo(sk, noblock); /* -- cgit v1.2.3 From 59348b19efebfd6a8d0791ff81d207b16594c94b Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 20 Nov 2006 18:39:23 -0200 Subject: [DCCP]: Simplified conditions due to use of enum:8 states This reaps the benefit of the earlier patch, which changed the type of CCID 3 states to use enums, in that many conditions are now simplified and the number of possible (unexpected) values is greatly reduced. In a few instances, this also allowed to simplify pre-conditions; where care has been taken to retain logical equivalence. [DCCP]: Introduce a consistent BUG/WARN message scheme This refines the existing set of DCCP messages so that * BUG(), BUG_ON(), WARN_ON() have meaningful DCCP-specific counterparts * DCCP_CRIT (for severe warnings) is not rate-limited * DCCP_WARN() is introduced as rate-limited wrapper Using these allows a faster and cleaner transition to their original counterparts once the code has matured into a full DCCP implementation. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ackvec.c | 5 +- net/dccp/ccids/ccid2.c | 2 +- net/dccp/ccids/ccid3.c | 132 ++++++++++++++++--------------------- net/dccp/ccids/lib/loss_interval.c | 6 +- net/dccp/ccids/lib/tfrc_equation.c | 7 +- net/dccp/dccp.h | 17 +++-- net/dccp/feat.c | 11 ++-- net/dccp/input.c | 25 ++++--- net/dccp/ipv4.c | 24 +++---- net/dccp/ipv6.c | 4 +- net/dccp/minisocks.c | 7 +- net/dccp/options.c | 10 ++- net/dccp/output.c | 16 ++--- net/dccp/proto.c | 5 +- 14 files changed, 115 insertions(+), 156 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index d34badcd012e..1b4b60d8bdec 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -461,9 +461,6 @@ int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, return 0; } -static char dccp_ackvec_slab_msg[] __initdata = - KERN_CRIT "DCCP: Unable to create ack vectors slab caches\n"; - int __init dccp_ackvec_init(void) { dccp_ackvec_slab = kmem_cache_create("dccp_ackvec", @@ -485,7 +482,7 @@ out_destroy_slab: kmem_cache_destroy(dccp_ackvec_slab); dccp_ackvec_slab = NULL; out_err: - printk(dccp_ackvec_slab_msg); + DCCP_CRIT("Unable to create Ack Vector slab cache"); return -ENOBUFS; } diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 0fb0d66544a2..207f7f9b36ca 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -420,7 +420,7 @@ static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset, return -1; out_invalid_option: - BUG_ON(1); /* should never happen... options were previously parsed ! */ + DCCP_BUG("Invalid option - this should not happen (previous parsing)!"); return -1; } diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 7db801ec1ab9..4eada515b773 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -176,8 +176,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) ccid3_tx_state_name(hctx->ccid3hctx_state)); switch (hctx->ccid3hctx_state) { - case TFRC_SSTATE_TERM: - goto out; case TFRC_SSTATE_NO_FBACK: /* Halve send rate */ hctx->ccid3hctx_x /= 2; @@ -240,9 +238,10 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) 2 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_x)); break; - default: - DCCP_BUG("%s, sk=%p, Illegal state (%d)!", dccp_role(sk), sk, - hctx->ccid3hctx_state); + case TFRC_SSTATE_NO_SENT: + DCCP_BUG("Illegal %s state NO_SENT, sk=%p", dccp_role(sk), sk); + /* fall through */ + case TFRC_SSTATE_TERM: goto out; } @@ -264,7 +263,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, long delay; int rc = -ENOTCONN; - BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM); + BUG_ON(hctx == NULL); /* Check if pure ACK or Terminating*/ /* @@ -282,9 +281,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, rc = -ENOBUFS; if (unlikely(new_packet == NULL)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, not enough " - "mem to add to history, send refused\n", - __FUNCTION__, dccp_role(sk), sk); + DCCP_WARN("%s, sk=%p, not enough mem to add to history," + "send refused\n", dccp_role(sk), sk); goto out; } @@ -317,9 +315,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, /* divide by -1000 is to convert to ms and get sign right */ rc = delay > 0 ? delay : 0; break; - default: - DCCP_BUG("%s, sk=%p, Illegal state (%d)!", dccp_role(sk), sk, - hctx->ccid3hctx_state); + case TFRC_SSTATE_TERM: + DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk); rc = -EINVAL; break; } @@ -343,7 +340,7 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct timeval now; - BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM); + BUG_ON(hctx == NULL); dccp_timestamp(sk, &now); @@ -354,13 +351,11 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); if (unlikely(packet == NULL)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: packet doesn't " - "exists in history!\n", __FUNCTION__); + DCCP_WARN("packet doesn't exist in history!\n"); return; } if (unlikely(packet->dccphtx_sent)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: no unsent packet in " - "history!\n", __FUNCTION__); + DCCP_WARN("no unsent packet in history!\n"); return; } packet->dccphtx_tstamp = now; @@ -395,9 +390,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) case TFRC_SSTATE_NO_SENT: /* if first wasn't pure ack */ if (len != 0) - printk(KERN_CRIT "%s: %s, First packet sent is noted " - "as a data packet\n", - __FUNCTION__, dccp_role(sk)); + DCCP_CRIT("%s, First packet sent is noted " + "as a data packet", dccp_role(sk)); return; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: @@ -410,9 +404,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) hctx->ccid3hctx_t_ipi); } break; - default: - DCCP_BUG("%s, sk=%p, Illegal state (%d)!", dccp_role(sk), sk, - hctx->ccid3hctx_state); + case TFRC_SSTATE_TERM: + DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk); break; } } @@ -430,7 +423,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) u32 x_recv; u32 r_sample; - BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM); + BUG_ON(hctx == NULL); /* we are only interested in ACKs */ if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || @@ -455,11 +448,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, DCCP_SKB_CB(skb)->dccpd_ack_seq); if (unlikely(packet == NULL)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, seqno " - "%llu(%s) does't exist in history!\n", - __FUNCTION__, dccp_role(sk), sk, + DCCP_WARN("%s, sk=%p, seqno %llu(%s) does't exist " + "in history!\n", dccp_role(sk), sk, (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq, - dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); + dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); return; } @@ -467,9 +459,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) dccp_timestamp(sk, &now); r_sample = timeval_delta(&now, &packet->dccphtx_tstamp); if (unlikely(r_sample <= t_elapsed)) - LIMIT_NETDEBUG(KERN_WARNING "%s: r_sample=%uus, " - "t_elapsed=%uus\n", - __FUNCTION__, r_sample, t_elapsed); + DCCP_WARN("r_sample=%uus,t_elapsed=%uus\n", + r_sample, t_elapsed); else r_sample -= t_elapsed; @@ -554,9 +545,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* set idle flag */ hctx->ccid3hctx_idle = 1; break; - default: - DCCP_BUG("%s, sk=%p, Illegal state (%d)!", dccp_role(sk), sk, - hctx->ccid3hctx_state); + case TFRC_SSTATE_TERM: + DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk); break; } } @@ -596,9 +586,9 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, switch (option) { case TFRC_OPT_LOSS_EVENT_RATE: if (unlikely(len != 4)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, invalid " - "len for TFRC_OPT_LOSS_EVENT_RATE\n", - __FUNCTION__, dccp_role(sk), sk); + DCCP_WARN("%s, sk=%p, invalid len %d " + "for TFRC_OPT_LOSS_EVENT_RATE\n", + dccp_role(sk), sk, len); rc = -EINVAL; } else { opt_recv->ccid3or_loss_event_rate = ntohl(*(__be32 *)value); @@ -617,9 +607,9 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, break; case TFRC_OPT_RECEIVE_RATE: if (unlikely(len != 4)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, invalid " - "len for TFRC_OPT_RECEIVE_RATE\n", - __FUNCTION__, dccp_role(sk), sk); + DCCP_WARN("%s, sk=%p, invalid len %d " + "for TFRC_OPT_RECEIVE_RATE\n", + dccp_role(sk), sk, len); rc = -EINVAL; } else { opt_recv->ccid3or_receive_rate = ntohl(*(__be32 *)value); @@ -722,17 +712,15 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) delta); } break; - default: - DCCP_BUG("%s, sk=%p, Illegal state (%d)!", dccp_role(sk), sk, - hcrx->ccid3hcrx_state); + case TFRC_RSTATE_TERM: + DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk); return; } packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); if (unlikely(packet == NULL)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, no data packet " - "in history!\n", - __FUNCTION__, dccp_role(sk), sk); + DCCP_WARN("%s, sk=%p, no data packet in history!\n", + dccp_role(sk), sk); return; } @@ -820,29 +808,29 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) } if (unlikely(step == 0)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, packet history " - "contains no data packets!\n", - __FUNCTION__, dccp_role(sk), sk); + DCCP_WARN("%s, sk=%p, packet history has no data packets!\n", + dccp_role(sk), sk); return ~0; } if (unlikely(interval == 0)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, Could not find a " - "win_count interval > 0. Defaulting to 1\n", - __FUNCTION__, dccp_role(sk), sk); + DCCP_WARN("%s, sk=%p, Could not find a win_count interval > 0." + "Defaulting to 1\n", dccp_role(sk), sk); interval = 1; } found: if (!tail) { - LIMIT_NETDEBUG(KERN_WARNING "%s: tail is null\n", - __FUNCTION__); + DCCP_CRIT("tail is null\n"); return ~0; } rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval; ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", dccp_role(sk), sk, rtt); - if (rtt == 0) - rtt = 1; + + if (rtt == 0) { + DCCP_WARN("RTT==0, setting to 1\n"); + rtt = 1; + } dccp_timestamp(sk, &tstamp); delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); @@ -856,9 +844,7 @@ found: tmp2 = (u32)tmp1; if (!tmp2) { - LIMIT_NETDEBUG(KERN_WARNING "tmp2 = 0 " - "%s: x_recv = %u, rtt =%u\n", - __FUNCTION__, x_recv, rtt); + DCCP_CRIT("tmp2 = 0, x_recv = %u, rtt =%u\n", x_recv, rtt); return ~0; } @@ -904,8 +890,7 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) entry = dccp_li_hist_entry_new(ccid3_li_hist, SLAB_ATOMIC); if (entry == NULL) { - printk(KERN_CRIT "%s: out of memory\n",__FUNCTION__); - dump_stack(); + DCCP_BUG("out of memory - can not allocate entry"); return; } @@ -984,9 +969,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) u32 p_prev, rtt_prev, r_sample, t_elapsed; int loss; - BUG_ON(hcrx == NULL || - !(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || - hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); + BUG_ON(hcrx == NULL); opt_recv = &dccp_sk(sk)->dccps_options_received; @@ -1004,9 +987,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) t_elapsed = opt_recv->dccpor_elapsed_time * 10; if (unlikely(r_sample <= t_elapsed)) - LIMIT_NETDEBUG(KERN_WARNING "%s: r_sample=%uus, " - "t_elapsed=%uus\n", - __FUNCTION__, r_sample, t_elapsed); + DCCP_WARN("r_sample=%uus, t_elapsed=%uus\n", + r_sample, t_elapsed); else r_sample -= t_elapsed; @@ -1030,9 +1012,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp, skb, SLAB_ATOMIC); if (unlikely(packet == NULL)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, Not enough mem to " - "add rx packet to history, consider it lost!\n", - __FUNCTION__, dccp_role(sk), sk); + DCCP_WARN("%s, sk=%p, Not enough mem to add rx packet " + "to history, consider it lost!\n", dccp_role(sk), sk); return; } @@ -1065,9 +1046,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_hc_rx_send_feedback(sk); } return; - default: - DCCP_BUG("%s, sk=%p, Illegal state (%d)!", dccp_role(sk), sk, - hcrx->ccid3hcrx_state); + case TFRC_RSTATE_TERM: + DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk); return; } @@ -1084,10 +1064,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) /* Scaling up by 1000000 as fixed decimal */ if (i_mean != 0) hcrx->ccid3hcrx_p = 1000000 / i_mean; - } else { - printk(KERN_CRIT "%s: empty loss hist\n",__FUNCTION__); - dump_stack(); - } + } else + DCCP_BUG("empty loss history"); if (hcrx->ccid3hcrx_p > p_prev) { ccid3_hc_rx_send_feedback(sk); diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 906c81ab9d4f..48b9b93f8acb 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -13,7 +13,7 @@ #include #include - +#include "../../dccp.h" #include "loss_interval.h" struct dccp_li_hist *dccp_li_hist_new(const char *name) @@ -109,7 +109,7 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) i_tot = max(i_tot0, i_tot1); if (!w_tot) { - LIMIT_NETDEBUG(KERN_WARNING "%s: w_tot = 0\n", __FUNCTION__); + DCCP_WARN("w_tot = 0\n"); return 1; } @@ -128,7 +128,7 @@ int dccp_li_hist_interval_new(struct dccp_li_hist *hist, entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC); if (entry == NULL) { dccp_li_hist_purge(hist, list); - dump_stack(); + DCCP_BUG("loss interval list entry is NULL"); return 0; } entry->dccplih_interval = ~0; diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index 44076e0c6591..2601012383fb 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -13,9 +13,8 @@ */ #include - #include - +#include "../../dccp.h" #include "tfrc.h" #define TFRC_CALC_X_ARRSIZE 500 @@ -588,8 +587,10 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) /* p should be 0 unless there is a bug in my code */ index = 0; - if (R == 0) + if (R == 0) { + DCCP_WARN("RTT==0, setting to 1\n"); R = 1; /* RTT can't be zero or else divide by zero */ + } BUG_ON(index >= TFRC_CALC_X_ARRSIZE); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 3a94625a1af3..68886986c8e4 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -18,12 +18,17 @@ #include #include "ackvec.h" -#define DCCP_CRIT(fmt, a...) LIMIT_NETDEBUG(KERN_CRIT fmt " at %s:%d/%s()\n", \ - ##a, __FILE__, __LINE__, __FUNCTION__) -#define DCCP_BUG(fmt, a...) do { DCCP_CRIT(fmt, ##a); dump_stack(); } while (0) -#define DCCP_BUG_ON(cond) do { if (unlikely((cond) == 0)) \ - DCCP_BUG("BUG: condition \"%s\" fails",\ - __stringify((cond))); \ +/* + * DCCP - specific warning and debugging macros. + */ +#define DCCP_WARN(fmt, a...) LIMIT_NETDEBUG(KERN_WARNING "%s: " fmt, \ + __FUNCTION__, ##a) +#define DCCP_CRIT(fmt, a...) printk(KERN_CRIT fmt " at %s:%d/%s()\n", ##a, \ + __FILE__, __LINE__, __FUNCTION__) +#define DCCP_BUG(a...) do { DCCP_CRIT("BUG: " a); dump_stack(); } while(0) +#define DCCP_BUG_ON(cond) do { if (unlikely((cond) != 0)) \ + DCCP_BUG("\"%s\" holds (exception!)", \ + __stringify(cond)); \ } while (0) #ifdef MODULE diff --git a/net/dccp/feat.c b/net/dccp/feat.c index e808c418c992..4dc487f27a1f 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -25,11 +25,11 @@ int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, dccp_feat_debug(type, feature, *val); if (!dccp_feat_is_valid_type(type)) { - pr_info("option type %d invalid in negotiation\n", type); + DCCP_WARN("option type %d invalid in negotiation\n", type); return 1; } if (!dccp_feat_is_valid_length(type, feature, len)) { - pr_info("invalid length %d\n", len); + DCCP_WARN("invalid length %d\n", len); return 1; } /* XXX add further sanity checks */ @@ -169,7 +169,8 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt, break; default: - WARN_ON(1); /* XXX implement res */ + DCCP_BUG("Fell through, feat=%d", opt->dccpop_feat); + /* XXX implement res */ return -EFAULT; } @@ -328,7 +329,7 @@ static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk, switch (type) { case DCCPO_CHANGE_L: opt->dccpop_type = DCCPO_CONFIRM_R; break; case DCCPO_CHANGE_R: opt->dccpop_type = DCCPO_CONFIRM_L; break; - default: pr_info("invalid type %d\n", type); return; + default: DCCP_WARN("invalid type %d\n", type); return; } opt->dccpop_feat = feature; @@ -426,7 +427,7 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, switch (type) { case DCCPO_CONFIRM_L: t = DCCPO_CHANGE_R; break; case DCCPO_CONFIRM_R: t = DCCPO_CHANGE_L; break; - default: pr_info("invalid type %d\n", type); + default: DCCP_WARN("invalid type %d\n", type); return 1; } diff --git a/net/dccp/input.c b/net/dccp/input.c index 97ccdc30fd89..7371a2f3acf4 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -128,21 +128,18 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) DCCP_PKT_WITHOUT_ACK_SEQ)) dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; } else { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed for %s packet, " - "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " - "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " - "sending SYNC...\n", - dccp_packet_name(dh->dccph_type), - (unsigned long long) lswl, - (unsigned long long) - DCCP_SKB_CB(skb)->dccpd_seq, - (unsigned long long) dp->dccps_swh, - (DCCP_SKB_CB(skb)->dccpd_ack_seq == + DCCP_WARN("DCCP: Step 6 failed for %s packet, " + "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " + "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " + "sending SYNC...\n", dccp_packet_name(dh->dccph_type), + (unsigned long long) lswl, + (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq, + (unsigned long long) dp->dccps_swh, + (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists", - (unsigned long long) lawl, - (unsigned long long) - DCCP_SKB_CB(skb)->dccpd_ack_seq, - (unsigned long long) dp->dccps_awh); + (unsigned long long) lawl, + (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, + (unsigned long long) dp->dccps_awh); dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); return -1; } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 7114befe7d50..ff81679c9f17 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -747,7 +747,7 @@ int dccp_invalid_packet(struct sk_buff *skb) /* If the packet is shorter than 12 bytes, drop packet and return */ if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n"); + DCCP_WARN("pskb_may_pull failed\n"); return 1; } @@ -755,7 +755,7 @@ int dccp_invalid_packet(struct sk_buff *skb) /* If P.type is not understood, drop packet and return */ if (dh->dccph_type >= DCCP_PKT_INVALID) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n"); + DCCP_WARN("invalid packet type\n"); return 1; } @@ -763,16 +763,14 @@ int dccp_invalid_packet(struct sk_buff *skb) * If P.Data Offset is too small for packet type, drop packet and return */ if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " - "too small\n", dh->dccph_doff); + DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff); return 1; } /* * If P.Data Offset is too too large for packet, drop packet and return */ if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " - "too large\n", dh->dccph_doff); + DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff); return 1; } @@ -782,9 +780,8 @@ int dccp_invalid_packet(struct sk_buff *skb) */ if (dh->dccph_type >= DCCP_PKT_DATA && dh->dccph_type <= DCCP_PKT_DATAACK && dh->dccph_x == 0) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data||Ack||" - "DataAck, while P.X == 0\n", - dccp_packet_name(dh->dccph_type)); + DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n", + dccp_packet_name(dh->dccph_type)); return 1; } @@ -794,9 +791,8 @@ int dccp_invalid_packet(struct sk_buff *skb) */ cscov = dccp_csum_coverage(skb); if (cscov > skb->len) { - LIMIT_NETDEBUG(KERN_WARNING - "DCCP: P.CsCov %u exceeds packet length %d\n", - dh->dccph_cscov, skb->len); + DCCP_WARN("P.CsCov %u exceeds packet length %d\n", + dh->dccph_cscov, skb->len); return 1; } @@ -823,9 +819,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) /* Step 1: If header checksum is incorrect, drop packet and return */ if (dccp_v4_csum_finish(skb, skb->nh.iph->saddr, skb->nh.iph->daddr)) { - LIMIT_NETDEBUG(KERN_WARNING - "%s: dropped packet with invalid checksum\n", - __FUNCTION__); + DCCP_WARN("dropped packet with invalid checksum\n"); goto discard_it; } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 03bb8298250a..c7aaa2574f52 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -828,9 +828,7 @@ static int dccp_v6_rcv(struct sk_buff **pskb) /* Step 1: If header checksum is incorrect, drop packet and return. */ if (dccp_v6_csum_finish(skb, &skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr)) { - LIMIT_NETDEBUG(KERN_WARNING - "%s: dropped packet with invalid checksum\n", - __FUNCTION__); + DCCP_WARN("dropped packet with invalid checksum\n"); goto discard_it; } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 3975048d8094..7b52f2a03eef 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -84,8 +84,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) * socket up. We've got bigger problems than * non-graceful socket closings. */ - LIMIT_NETDEBUG(KERN_INFO "DCCP: time wait bucket " - "table overflow\n"); + DCCP_WARN("time wait bucket table overflow\n"); } dccp_done(sk); @@ -289,9 +288,7 @@ EXPORT_SYMBOL_GPL(dccp_child_process); void dccp_reqsk_send_ack(struct sk_buff *skb, struct request_sock *rsk) { - pr_info(KERN_WARNING "DCCP: ACK packets are never sent in " - "LISTEN/RESPOND state\n"); - dump_stack(); + DCCP_BUG("DCCP-ACK packets are never sent in LISTEN/RESPOND state"); } EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack); diff --git a/net/dccp/options.c b/net/dccp/options.c index 7e50678e2471..ee709ae0a97f 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -238,9 +238,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) } break; default: - pr_info("DCCP(%p): option %d(len=%d) not " - "implemented, ignoring\n", - sk, opt, len); + DCCP_CRIT("DCCP(%p): option %d(len=%d) not " + "implemented, ignoring", sk, opt, len); break; } @@ -257,7 +256,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) out_invalid_option: DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; - pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len); + DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len); return -1; } @@ -447,8 +446,7 @@ static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat, u8 *to; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small" - " to insert feature %d option!\n", feat); + DCCP_WARN("packet too small for feature %d option!\n", feat); return -1; } diff --git a/net/dccp/output.c b/net/dccp/output.c index c34eada7f025..bfd9c5757897 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -257,11 +257,8 @@ void dccp_write_xmit(struct sock *sk, int block) err = dccp_wait_for_ccid(sk, skb, &timeo); timeo = DCCP_XMIT_TIMEO; } - if (err) { - printk(KERN_CRIT "%s:err at dccp_wait_for_ccid" - " %d\n", __FUNCTION__, err); - dump_stack(); - } + if (err) + DCCP_BUG("err=%d after dccp_wait_for_ccid", err); } skb_dequeue(&sk->sk_write_queue); @@ -283,12 +280,9 @@ void dccp_write_xmit(struct sock *sk, int block) err = dccp_transmit_skb(sk, skb); ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); - if (err) { - printk(KERN_CRIT "%s:err from " - "ccid_hc_tx_packet_sent %d\n", - __FUNCTION__, err); - dump_stack(); - } + if (err) + DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", + err); } else kfree(skb); } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a7f345c8d0db..3c44d502e5c1 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1033,8 +1033,7 @@ static int __init dccp_init(void) } while (!dccp_hashinfo.ehash && --ehash_order > 0); if (!dccp_hashinfo.ehash) { - printk(KERN_CRIT "Failed to allocate DCCP " - "established hash table\n"); + DCCP_CRIT("Failed to allocate DCCP established hash table"); goto out_free_bind_bucket_cachep; } @@ -1056,7 +1055,7 @@ static int __init dccp_init(void) } while (!dccp_hashinfo.bhash && --bhash_order >= 0); if (!dccp_hashinfo.bhash) { - printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n"); + DCCP_CRIT("Failed to allocate DCCP bind hash table"); goto out_free_dccp_ehash; } -- cgit v1.2.3 From 23ea8945f6be2287fec67c85abcf24736c1ded80 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 20 Nov 2006 18:40:42 -0200 Subject: [CCID 3]: Add annotations for socket structures This adds documentation to the CCID 3 rx/tx socket fields, plus some minor re-formatting. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.h | 90 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 35 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index c122e757eb7f..e2e43c1a4726 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -81,26 +81,28 @@ enum ccid3_hc_tx_states { TFRC_SSTATE_TERM, }; -/** struct ccid3_hc_tx_sock - CCID3 sender half connection sock +/** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket * - * @ccid3hctx_state - Sender state - * @ccid3hctx_x - Current sending rate - * @ccid3hctx_x_recv - Receive rate - * @ccid3hctx_x_calc - Calculated send (?) rate - * @ccid3hctx_s - Packet size - * @ccid3hctx_rtt - Estimate of current round trip time in usecs - * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 - * @ccid3hctx_last_win_count - Last window counter sent - * @ccid3hctx_t_last_win_count - Timestamp of earliest packet - * with last_win_count value sent - * @ccid3hctx_no_feedback_timer - Handle to no feedback timer - * @ccid3hctx_idle - FIXME - * @ccid3hctx_t_ld - Time last doubled during slow start - * @ccid3hctx_t_nom - Nominal send time of next packet - * @ccid3hctx_t_ipi - Interpacket (send) interval - * @ccid3hctx_delta - Send timer delta - * @ccid3hctx_hist - Packet history - */ + * @ccid3hctx_x - Current sending rate + * @ccid3hctx_x_recv - Receive rate + * @ccid3hctx_x_calc - Calculated send rate (RFC 3448, 3.1) + * @ccid3hctx_rtt - Estimate of current round trip time in usecs + * @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 + * @ccid3hctx_s - Packet size + * @ccid3hctx_t_rto - Retransmission Timeout (RFC 3448, 3.1) + * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) + * @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states + * @ccid3hctx_last_win_count - Last window counter sent + * @ccid3hctx_t_last_win_count - Timestamp of earliest packet + * with last_win_count value sent + * @ccid3hctx_no_feedback_timer - Handle to no feedback timer + * @ccid3hctx_idle - Flag indicating that sender is idling + * @ccid3hctx_t_ld - Time last doubled during slow start + * @ccid3hctx_t_nom - Nominal send time of next packet + * @ccid3hctx_delta - Send timer delta + * @ccid3hctx_hist - Packet history + * @ccid3hctx_options_received - Parsed set of retrieved options + */ struct ccid3_hc_tx_sock { struct tfrc_tx_info ccid3hctx_tfrc; #define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x @@ -130,23 +132,41 @@ enum ccid3_hc_rx_states { TFRC_RSTATE_TERM = 127, }; +/** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket + * + * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3) + * @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard) + * @ccid3hcrx_p - current loss event rate (RFC 3448 5.4) + * @ccid3hcrx_seqno_nonloss - Last received non-loss sequence number + * @ccid3hcrx_ccval_nonloss - Last received non-loss Window CCVal + * @ccid3hcrx_ccval_last_counter - Tracks window counter (RFC 4342, 8.1) + * @ccid3hcrx_state - receiver state, one of %ccid3_hc_rx_states + * @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes + * @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent + * @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent + * @ccid3hcrx_hist - Packet history + * @ccid3hcrx_li_hist - Loss Interval History + * @ccid3hcrx_s - Received packet size in bytes + * @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5) + * @ccid3hcrx_elapsed_time - Time since packet reception + */ struct ccid3_hc_rx_sock { - struct tfrc_rx_info ccid3hcrx_tfrc; -#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv -#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt -#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p - u64 ccid3hcrx_seqno_nonloss:48, - ccid3hcrx_ccval_nonloss:4, - ccid3hcrx_ccval_last_counter:4; - enum ccid3_hc_rx_states ccid3hcrx_state:8; - u32 ccid3hcrx_bytes_recv; - struct timeval ccid3hcrx_tstamp_last_feedback; - struct timeval ccid3hcrx_tstamp_last_ack; - struct list_head ccid3hcrx_hist; - struct list_head ccid3hcrx_li_hist; - u16 ccid3hcrx_s; - u32 ccid3hcrx_pinv; - u32 ccid3hcrx_elapsed_time; + struct tfrc_rx_info ccid3hcrx_tfrc; +#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv +#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt +#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p + u64 ccid3hcrx_seqno_nonloss:48, + ccid3hcrx_ccval_nonloss:4, + ccid3hcrx_ccval_last_counter:4; + enum ccid3_hc_rx_states ccid3hcrx_state:8; + u32 ccid3hcrx_bytes_recv; + struct timeval ccid3hcrx_tstamp_last_feedback; + struct timeval ccid3hcrx_tstamp_last_ack; + struct list_head ccid3hcrx_hist; + struct list_head ccid3hcrx_li_hist; + u16 ccid3hcrx_s; + u32 ccid3hcrx_pinv; + u32 ccid3hcrx_elapsed_time; }; static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) -- cgit v1.2.3 From e1b7441e803442828780a6d6d792643f79d5665b Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Mon, 20 Nov 2006 18:41:37 -0200 Subject: [DCCP]: Make dccp_probe more portable This makes the code of the dccp_probe module more portable. Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/probe.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/probe.c b/net/dccp/probe.c index fded1493c1dc..f81e37de35d5 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -106,8 +106,10 @@ static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk, } static struct jprobe dccp_send_probe = { - .kp = { .addr = (kprobe_opcode_t *)&dccp_sendmsg, }, - .entry = (kprobe_opcode_t *)&jdccp_sendmsg, + .kp = { + .symbol_name = "dccp_sendmsg", + }, + .entry = JPROBE_ENTRY(jdccp_sendmsg), }; static int dccpprobe_open(struct inode *inode, struct file *file) -- cgit v1.2.3 From 455431739ca2f4c7f02d0a5979559ac5a68a6f95 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Mon, 20 Nov 2006 18:44:03 -0200 Subject: [DCCP] CCID3: Remove non-referenced variable This removes a non-referenced variable. Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 4eada515b773..fb21f2d9ffc6 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -965,7 +965,6 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; struct timeval now; - u8 win_count; u32 p_prev, rtt_prev, r_sample, t_elapsed; int loss; @@ -1017,8 +1016,6 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) return; } - win_count = packet->dccphrx_ccval; - loss = ccid3_hc_rx_detect_loss(sk, packet); if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) -- cgit v1.2.3 From 82e3ab9dbeebd5c8d5402ad1607d22086271a56d Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Mon, 20 Nov 2006 19:19:32 -0200 Subject: [DCCP]: Adds the tx buffer sysctls This one got lost on the way from Ian to Gerrit to me, fix it. Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/sysctl.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net/dccp') diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 8b62061e5701..4775ba3faa04 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -91,6 +91,15 @@ static struct ctl_table dccp_default_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .ctl_name = NET_DCCP_DEFAULT_TX_QLEN, + .procname = "tx_qlen", + .data = &sysctl_dccp_tx_qlen, + .maxlen = sizeof(sysctl_dccp_tx_qlen), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .ctl_name = 0, } }; -- cgit v1.2.3 From 0bd4ff1b1528a39b07aab6c744ac37e053740ad0 Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Tue, 21 Nov 2006 16:17:10 -0200 Subject: [DCCP] ackvec: Remove unused dccpav_ack_ptr field from dccp_ackvec Commiter note: original patch was splitted. Signed-off-by: Andrea Bittau Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ackvec.c | 1 - net/dccp/ackvec.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 1b4b60d8bdec..215355d993de 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -141,7 +141,6 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1; av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1; av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; - av->dccpav_ack_ptr = 0; av->dccpav_time.tv_sec = 0; av->dccpav_time.tv_usec = 0; av->dccpav_vec_len = 0; diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index cf8f20ce23a9..d6a58db5fb13 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -41,7 +41,6 @@ * Ack Vectors it has recently sent. For each packet sent carrying an * Ack Vector, it remembers four variables: * - * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement. * @dccpav_records - list of dccp_ackvec_record * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. * @@ -53,7 +52,6 @@ struct dccp_ackvec { struct list_head dccpav_records; struct timeval dccpav_time; u8 dccpav_buf_head; - u8 dccpav_ack_ptr; u8 dccpav_vec_len; u8 dccpav_buf_nonce; u8 dccpav_ack_nonce; -- cgit v1.2.3 From bdf13d208dee4ada6d2b422536a12b45d5831aa3 Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Fri, 24 Nov 2006 13:02:42 -0200 Subject: [DCCP] ackvec: infrastructure for sending more than one ackvec per packet Commiter note: This was split from Andrea's original patch, in the process I changed the type of the ackvec index fields to u16 instead of to int and haven't folded dccp_ackvec_parse with dccp_ackvec_check_rcv_ackno. Next patch will actually do the insertion of more than one ackvec per packet, using, initially, up to a max of 2 ackvecs as per Andrea's original patch, then I'll work on support for larger ackvecs, be it using a sysctl or using setsockopt. Signed-off-by: Andrea Bittau Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ackvec.c | 23 +++++++++++------------ net/dccp/ackvec.h | 18 +++++++++++------- net/dccp/options.c | 3 ++- 3 files changed, 24 insertions(+), 20 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 215355d993de..41d34d1babc1 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -169,13 +169,13 @@ void dccp_ackvec_free(struct dccp_ackvec *av) } static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, - const u8 index) + const u32 index) { return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK; } static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, - const u8 index) + const u32 index) { return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK; } @@ -275,7 +275,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, * could reduce the complexity of this scan.) */ u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno); - u8 index = av->dccpav_buf_head; + u32 index = av->dccpav_buf_head; while (1) { const u8 len = dccp_ackvec_len(av, index); @@ -385,7 +385,7 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, } static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, - struct sock *sk, u64 ackno, + struct sock *sk, u64 *ackno, const unsigned char len, const unsigned char *vector) { @@ -408,7 +408,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; u64 ackno_end_rl; - dccp_set_seqno(&ackno_end_rl, ackno - rl); + dccp_set_seqno(&ackno_end_rl, *ackno - rl); /* * If our AVR sequence number is greater than the ack, go @@ -416,13 +416,13 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, */ list_for_each_entry_from(avr, &av->dccpav_records, dccpavr_node) { - if (!after48(avr->dccpavr_ack_seqno, ackno)) + if (!after48(avr->dccpavr_ack_seqno, *ackno)) goto found; } /* End of the dccpav_records list, not found, exit */ break; found: - if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) { + if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, *ackno)) { const u8 state = *vector & DCCP_ACKVEC_STATE_MASK; if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { dccp_pr_debug("%s ACK vector 0, len=%d, " @@ -442,21 +442,20 @@ found: */ } - dccp_set_seqno(&ackno, ackno_end_rl - 1); + dccp_set_seqno(ackno, ackno_end_rl - 1); ++vector; } } int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, - const u8 opt, const u8 *value, const u8 len) + u64 *ackno, const u8 opt, const u8 *value, const u8 len) { - if (len > DCCP_MAX_ACKVEC_LEN) + if (len > DCCP_MAX_ACKVEC_OPT_LEN) return -1; /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_ack_seq, - len, value); + ackno, len, value); return 0; } diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index d6a58db5fb13..96504a3b16e4 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -17,7 +17,9 @@ #include /* Read about the ECN nonce to see why it is 253 */ -#define DCCP_MAX_ACKVEC_LEN 253 +#define DCCP_MAX_ACKVEC_OPT_LEN 253 +/* We can spread an ack vector across multiple options */ +#define DCCP_MAX_ACKVEC_LEN (DCCP_MAX_ACKVEC_OPT_LEN * 2) #define DCCP_ACKVEC_STATE_RECEIVED 0 #define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) @@ -51,8 +53,8 @@ struct dccp_ackvec { u64 dccpav_buf_ackno; struct list_head dccpav_records; struct timeval dccpav_time; - u8 dccpav_buf_head; - u8 dccpav_vec_len; + u16 dccpav_buf_head; + u16 dccpav_vec_len; u8 dccpav_buf_nonce; u8 dccpav_ack_nonce; u8 dccpav_buf[DCCP_MAX_ACKVEC_LEN]; @@ -75,9 +77,9 @@ struct dccp_ackvec_record { struct list_head dccpavr_node; u64 dccpavr_ack_seqno; u64 dccpavr_ack_ackno; - u8 dccpavr_ack_ptr; + u16 dccpavr_ack_ptr; + u16 dccpavr_sent_len; u8 dccpavr_ack_nonce; - u8 dccpavr_sent_len; }; struct sock; @@ -96,7 +98,8 @@ extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, const u64 ackno); extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, - const u8 opt, const u8 *value, const u8 len); + u64 *ackno, const u8 opt, + const u8 *value, const u8 len); extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); @@ -135,7 +138,8 @@ static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, } static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, - const u8 opt, const u8 *value, const u8 len) + const u64 *ackno, const u8 opt, + const u8 *value, const u8 len) { return -1; } diff --git a/net/dccp/options.c b/net/dccp/options.c index ee709ae0a97f..f398b43bc055 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -62,6 +62,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) struct dccp_sock *dp = dccp_sk(sk); const struct dccp_hdr *dh = dccp_hdr(skb); const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; + u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); unsigned char *opt_ptr = options; const unsigned char *opt_end = (unsigned char *)dh + @@ -149,7 +150,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) break; if (dccp_msk(sk)->dccpms_send_ack_vector && - dccp_ackvec_parse(sk, skb, opt, value, len)) + dccp_ackvec_parse(sk, skb, &ackno, opt, value, len)) goto out_invalid_option; break; case DCCPO_TIMESTAMP: -- cgit v1.2.3 From 522f1d095bf76dbe2430fb9a9a257c0f27033f31 Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Sun, 26 Nov 2006 01:04:40 -0200 Subject: [DCCP] ackvec: Split long ack vectors across multiple options Ack vectors grow proportional to the window size. If an ack vector does not fit into a single option, it must be spread across multiple options. This patch will allow for windows to grow larger. Committer note: Simplified the patch a bit, original algorithm kept. Signed-off-by: Andrea Bittau Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ackvec.c | 46 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 14 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 41d34d1babc1..bdf1bb7a82c0 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -68,10 +68,15 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; - int len = av->dccpav_vec_len + 2; + /* Figure out how many options do we need to represent the ackvec */ + const u16 nr_opts = (av->dccpav_vec_len + + DCCP_MAX_ACKVEC_OPT_LEN - 1) / + DCCP_MAX_ACKVEC_OPT_LEN; + u16 len = av->dccpav_vec_len + 2 * nr_opts, i; struct timeval now; u32 elapsed_time; - unsigned char *to, *from; + const unsigned char *tail, *from; + unsigned char *to; struct dccp_ackvec_record *avr; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) @@ -90,24 +95,37 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) DCCP_SKB_CB(skb)->dccpd_opt_len += len; - to = skb_push(skb, len); - *to++ = DCCPO_ACK_VECTOR_0; - *to++ = len; - + to = skb_push(skb, len); len = av->dccpav_vec_len; from = av->dccpav_buf + av->dccpav_buf_head; + tail = av->dccpav_buf + DCCP_MAX_ACKVEC_LEN; + + for (i = 0; i < nr_opts; ++i) { + int copylen = len; + + if (len > DCCP_MAX_ACKVEC_OPT_LEN) + copylen = DCCP_MAX_ACKVEC_OPT_LEN; - /* Check if buf_head wraps */ - if ((int)av->dccpav_buf_head + len > DCCP_MAX_ACKVEC_LEN) { - const u32 tailsize = DCCP_MAX_ACKVEC_LEN - av->dccpav_buf_head; + *to++ = DCCPO_ACK_VECTOR_0; + *to++ = copylen + 2; + + /* Check if buf_head wraps */ + if (from + copylen > tail) { + const u16 tailsize = tail - from; + + memcpy(to, from, tailsize); + to += tailsize; + len -= tailsize; + copylen -= tailsize; + from = av->dccpav_buf; + } - memcpy(to, from, tailsize); - to += tailsize; - len -= tailsize; - from = av->dccpav_buf; + memcpy(to, from, copylen); + from += copylen; + to += copylen; + len -= copylen; } - memcpy(to, from, len); /* * From RFC 4340, A.2: * -- cgit v1.2.3 From 6472c051fcc5e571a9abee7f7a1ac58cc6e7bafa Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Sun, 26 Nov 2006 01:07:50 -0200 Subject: [DCCP] ccid2: Allow window to grow larger Now that we can stuff bigger ack vectors into options. Signed-off-by: Andrea Bittau Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index a97a89927308..ebd79499c85a 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -35,7 +35,7 @@ struct ccid2_seq { struct ccid2_seq *ccid2s_next; }; -#define CCID2_SEQBUF_LEN 256 +#define CCID2_SEQBUF_LEN 1024 #define CCID2_SEQBUF_MAX 128 /** struct ccid2_hc_tx_sock - CCID2 TX half connection -- cgit v1.2.3 From 90feeb951f61a80d3a8f8e5ced25b9ec78867eaf Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 27 Nov 2006 12:13:38 -0200 Subject: [DCCP] ccid3: Fix bug in calculation of first t_nom and first t_ipi Problem: --- net/dccp/ccids/ccid3.c | 11 ++++++----- net/dccp/ccids/ccid3.h | 2 -- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index fb21f2d9ffc6..d7b688e9f983 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -298,13 +298,14 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, hctx->ccid3hctx_last_win_count = 0; hctx->ccid3hctx_t_last_win_count = now; ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); - hctx->ccid3hctx_t_ipi = TFRC_INITIAL_IPI; - /* Set nominal send time for initial packet */ + /* First timeout, according to [RFC 3448, 4.2], is 1 second */ + hctx->ccid3hctx_t_ipi = USEC_PER_SEC; + /* Initial delta: minimum of 0.5 sec and t_gran/2 */ + hctx->ccid3hctx_delta = TFRC_OPSYS_HALF_TIME_GRAN; + + /* Set t_0 for initial packet */ hctx->ccid3hctx_t_nom = now; - timeval_add_usecs(&hctx->ccid3hctx_t_nom, - hctx->ccid3hctx_t_ipi); - ccid3_calc_new_delta(hctx); rc = 0; break; case TFRC_SSTATE_NO_FBACK: diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index e2e43c1a4726..462165234ff6 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -49,8 +49,6 @@ /* Two seconds as per CCID3 spec */ #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) -#define TFRC_INITIAL_IPI (USEC_PER_SEC / 4) - /* In usecs - half the scheduling granularity as per RFC3448 4.6 */ #define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) -- cgit v1.2.3 From f5c2d6367b04fd5ba98a5f9846b5fb870423968a Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 27 Nov 2006 12:22:48 -0200 Subject: [DCCP] ccid3: Simplify control flow in the calculation of t_ipi This patch performs a simplifying (performance) optimisation: In each call of the inline function ccid3_calc_new_t_ipi(), the state is tested against TFRC_SSTATE_NO_FBACK. This is expensive when the function is called very often. A simpler solution, implemented by this patch, is to adapt the control flow. Background: --- net/dccp/ccids/ccid3.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index d7b688e9f983..df88c54b2ec5 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -103,13 +103,7 @@ static void ccid3_hc_tx_set_state(struct sock *sk, /* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) { - /* - * If no feedback spec says t_ipi is 1 second (set elsewhere and then - * doubles after every no feedback timer (separate function) - */ - if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK) - hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s, - hctx->ccid3hctx_x); + hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_x); } /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ @@ -395,6 +389,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) "as a data packet", dccp_role(sk)); return; case TFRC_SSTATE_NO_FBACK: + /* t_nom, t_ipi, delta do not change until feedback arrives */ + return; case TFRC_SSTATE_FBACK: if (len > 0) { timeval_sub_usecs(&hctx->ccid3hctx_t_nom, -- cgit v1.2.3 From 91cf5a17257e1d2ef936fbf0223c3436ca583af9 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 27 Nov 2006 12:25:10 -0200 Subject: [DCCP] ccid3: Fix calculation of t_ipi time of scheduled transmission Problem: --- net/dccp/ccids/ccid3.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index df88c54b2ec5..fb1a5e89c023 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -304,11 +304,19 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, break; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: - delay = (timeval_delta(&now, &hctx->ccid3hctx_t_nom) - - hctx->ccid3hctx_delta); - delay /= -1000; - /* divide by -1000 is to convert to ms and get sign right */ - rc = delay > 0 ? delay : 0; + delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now); + /* + * Scheduling of packet transmissions [RFC 3448, 4.6] + * + * if (t_now > t_nom - delta) + * // send the packet now + * else + * // send the packet in (t_nom - t_now) milliseconds. + */ + if (delay < hctx->ccid3hctx_delta) + rc = 0; + else + rc = delay/1000L; break; case TFRC_SSTATE_TERM: DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk); -- cgit v1.2.3 From 7da7f456d7bc0e52009f882e8af0ac910293e157 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 27 Nov 2006 12:26:03 -0200 Subject: [DCCP] ccid3: Simplify control flow of ccid3_hc_tx_send_packet This makes some logically equivalent simplifications, by replacing rc - values plus goto's with direct return statements. Signed-off-by: Gerrit Renker Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index fb1a5e89c023..2745d835f037 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -247,6 +247,12 @@ out: sock_put(sk); } +/* + * returns + * > 0: delay (in msecs) that should pass before actually sending + * = 0: can send immediately + * < 0: error condition; do not send packet + */ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, int len) { @@ -255,7 +261,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct dccp_tx_hist_entry *new_packet; struct timeval now; long delay; - int rc = -ENOTCONN; BUG_ON(hctx == NULL); @@ -265,7 +270,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, * packets can have zero length, but why the comment about "pure ACK"? */ if (unlikely(len == 0)) - goto out; + return -ENOTCONN; /* See if last packet allocated was not sent */ new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); @@ -273,11 +278,10 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist, SLAB_ATOMIC); - rc = -ENOBUFS; if (unlikely(new_packet == NULL)) { DCCP_WARN("%s, sk=%p, not enough mem to add to history," "send refused\n", dccp_role(sk), sk); - goto out; + return -ENOBUFS; } dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet); @@ -300,7 +304,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, /* Set t_0 for initial packet */ hctx->ccid3hctx_t_nom = now; - rc = 0; break; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: @@ -313,28 +316,21 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, * else * // send the packet in (t_nom - t_now) milliseconds. */ - if (delay < hctx->ccid3hctx_delta) - rc = 0; - else - rc = delay/1000L; + if (delay >= hctx->ccid3hctx_delta) + return delay / 1000L; break; case TFRC_SSTATE_TERM: DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk); - rc = -EINVAL; - break; + return -EINVAL; } - /* Can we send? if so add options and add to packet history */ - if (rc == 0) { - dp->dccps_hc_tx_insert_options = 1; - new_packet->dccphtx_ccval = - DCCP_SKB_CB(skb)->dccpd_ccval = - hctx->ccid3hctx_last_win_count; - timeval_add_usecs(&hctx->ccid3hctx_t_nom, - hctx->ccid3hctx_t_ipi); - } -out: - return rc; + /* prepare to send now (add options etc.) */ + dp->dccps_hc_tx_insert_options = 1; + new_packet->dccphtx_ccval = DCCP_SKB_CB(skb)->dccpd_ccval = + hctx->ccid3hctx_last_win_count; + timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); + + return 0; } static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) -- cgit v1.2.3 From da335baf9e788edfb00ee3b96f7b9526b6b2f8a9 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 27 Nov 2006 12:26:57 -0200 Subject: [DCCP] ccid3: Avoid congestion control on zero-sized data packets This resolves an `XXX' in ccid3_hc_tx_send_packet(). The function is only called on Data and DataAck packets and returns a negative result on zero-sized messages. This is a reasonable policy since CCID 3 is a congestion-control module and congestion control on zero-sized Data(Ack) packets is in a way pathological. The patch uses a more suitable error code for this case, it returns the Posix.1 code `EBADMSG' ("Not a data message") instead of `ENOTCONN'. As a result of ignoring zero-sized packets, a the condition for a warning "First packet is data" in ccid3_hc_tx_packet_sent is always satisfied; this message has been removed since it will always be printed. Signed-off-by: Gerrit Renker Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 2745d835f037..62c304200eda 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -264,13 +264,13 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, BUG_ON(hctx == NULL); - /* Check if pure ACK or Terminating*/ /* - * XXX: We only call this function for DATA and DATAACK, on, these - * packets can have zero length, but why the comment about "pure ACK"? + * This function is called only for Data and DataAck packets. Sending + * zero-sized Data(Ack)s is theoretically possible, but for congestion + * control this case is pathological - ignore it. */ if (unlikely(len == 0)) - return -ENOTCONN; + return -EBADMSG; /* See if last packet allocated was not sent */ new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); @@ -387,11 +387,7 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_SENT: - /* if first wasn't pure ack */ - if (len != 0) - DCCP_CRIT("%s, First packet sent is noted " - "as a data packet", dccp_role(sk)); - return; + /* fall through */ case TFRC_SSTATE_NO_FBACK: /* t_nom, t_ipi, delta do not change until feedback arrives */ return; -- cgit v1.2.3 From 70dbd5b0ef3915f1e018e6437c8db9e999b0d701 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 27 Nov 2006 12:27:55 -0200 Subject: [DCCP] ccid3: Remove redundant statements in ccid3_hc_tx_packet_sent This patch removes a switch statement which is redundant since, * nothing is done in states TFRC_SSTATE_NO_SENT/TFRC_SSTATE_NO_FBACK * it is impossible that the function is called in the state TFRC_SSTATE_TERM, since --the function is called, in dccp_write_xmit, after ccid3_hc_tx_send_packet --if ccid3_hc_tx_send_packet is called in state TFRC_SSTATE_TERM, it returns -EINVAL, which means that ccid3_hc_tx_packet_sent will not be called (compare dccp_write_xmit) --> therefore, this case is logically impossible * the remaining state is TFRC_SSTATE_FBACK which conditionally updates t_ipi, t_nom, and t_delta. This is a no-op, since --t_ipi only changes when feedback is received --however, when feedback arrives via ccid3_hc_tx_packet_recv, there is an identical code block which performs the same set of operations --performing the same set of operations again in ccid3_hc_tx_packet_sent therefore does not change anything, since between the time of receiving the last feedback (and therefore update of t_ipi, t_nom, and t_delta), the value of t_ipi has not changed --since t_ipi has not changed, the values of t_delta and t_nom also do not change, they depend fully on t_ipi Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 62c304200eda..58f7cac6aa3d 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -384,27 +384,6 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) } else ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", dccp_role(sk), sk, dp->dccps_gss); - - switch (hctx->ccid3hctx_state) { - case TFRC_SSTATE_NO_SENT: - /* fall through */ - case TFRC_SSTATE_NO_FBACK: - /* t_nom, t_ipi, delta do not change until feedback arrives */ - return; - case TFRC_SSTATE_FBACK: - if (len > 0) { - timeval_sub_usecs(&hctx->ccid3hctx_t_nom, - hctx->ccid3hctx_t_ipi); - ccid3_calc_new_t_ipi(hctx); - ccid3_calc_new_delta(hctx); - timeval_add_usecs(&hctx->ccid3hctx_t_nom, - hctx->ccid3hctx_t_ipi); - } - break; - case TFRC_SSTATE_TERM: - DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk); - break; - } } static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) -- cgit v1.2.3 From 5e19e3fcd7351de1ca87c4797cca27ba55c7e55e Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 27 Nov 2006 12:28:48 -0200 Subject: [DCCP] ccid3: Resolve small FIXME This considers the case - ACK received while no packet has been sent so far. Resolved by printing a (rate-limited) warning message. Further removes an unnecessary BUG_ON in ccid3_hc_tx_packet_recv, received feedback on a terminating connection is simply ignored. Signed-off-by: Gerrit Renker Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 58f7cac6aa3d..6777a7f33a9a 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -413,9 +413,6 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) pinv = opt_recv->ccid3or_loss_event_rate; switch (hctx->ccid3hctx_state) { - case TFRC_SSTATE_NO_SENT: - /* FIXME: what to do here? */ - return; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: /* Calculate new round trip sample by @@ -521,8 +518,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* set idle flag */ hctx->ccid3hctx_idle = 1; break; - case TFRC_SSTATE_TERM: - DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk); + case TFRC_SSTATE_NO_SENT: + DCCP_WARN("Illegal ACK received - no packet has been sent\n"); + /* fall through */ + case TFRC_SSTATE_TERM: /* ignore feedback when closing */ break; } } -- cgit v1.2.3 From 48e03eee715b9e19df03153f2bcce6413632afcb Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 27 Nov 2006 20:29:27 -0200 Subject: [DCCP] ccid3: Consolidate timer resets This patch concerns updating the value of the nofeedback timer when no feedback has been received so far. Since in this case the value of R is still undefined according to [RFC 3448, 4.2], we can not perform step (3) of [RFC 3448, 4.3]. A clarification is provided in [RFC 4342, sec. 5], which states that in these cases the nofeedback timer (still) expires "after two seconds". Many thanks to Ian McDonald for pointing this out and providing the clarification. The patch * implements [RFC 4342, sec. 5] with regard to the above case * consolidates handling timer restart by - adding an appropriate jump label and - initialising the timeout value Acked-by: Ian McDonald Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 18 +++++++++--------- net/dccp/ccids/ccid3.h | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 6777a7f33a9a..9297fca78683 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -154,16 +154,14 @@ static void ccid3_hc_tx_update_x(struct sock *sk) static void ccid3_hc_tx_no_feedback_timer(unsigned long data) { struct sock *sk = (struct sock *)data; - unsigned long next_tmout = 0; struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + unsigned long next_tmout = USEC_PER_SEC / 5; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later. */ /* XXX: set some sensible MIB */ - sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - jiffies + HZ / 5); - goto out; + goto restart_timer; } ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk, @@ -183,9 +181,9 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state), hctx->ccid3hctx_x); - next_tmout = max_t(u32, 2 * usecs_div(hctx->ccid3hctx_s, - hctx->ccid3hctx_x), - TFRC_INITIAL_TIMEOUT); + /* The value of R is still undefined and so we can not recompute + * the timout value. Keep initial value as per [RFC 4342, 5]. */ + next_tmout = TFRC_INITIAL_TIMEOUT; /* * FIXME - not sure above calculation is correct. See section * 5 of CCID3 11 should adjust tx_t_ipi and double that to @@ -239,9 +237,11 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) goto out; } - sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); hctx->ccid3hctx_idle = 1; + +restart_timer: + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + usecs_to_jiffies(next_tmout)); out: bh_unlock_sock(sk); sock_put(sk); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 462165234ff6..970921700ce3 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -46,7 +46,7 @@ #define TFRC_STD_PACKET_SIZE 256 #define TFRC_MAX_PACKET_SIZE 65535 -/* Two seconds as per CCID3 spec */ +/* Two seconds as per RFC 3448 4.2 */ #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) /* In usecs - half the scheduling granularity as per RFC3448 4.6 */ -- cgit v1.2.3 From 17893bc1a632e195574dc0dd9751243f0d5993d2 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 27 Nov 2006 20:31:33 -0200 Subject: [DCCP] ccid3: Consistently update t_nom, t_ipi, t_delta This patch: * consolidates updating of parameters (t_nom, t_ipi, t_delta) which need to be updated at the same time, since they are inter-dependent * removes two inline functions which are no longer needed as a result of the above consolidation * resolves a FIXME regarding the re-calculation of t_ipi within the nofeedback timer, in the state where no feedback has previously been received * ties updating these parameters to updating the sending rate X, exploiting that all three parameters in turn depend on X; and using a small optimisation which can reduce the number of required instructions: only update the three parameters when X really changes Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 9297fca78683..4342caf53251 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -100,19 +100,24 @@ static void ccid3_hc_tx_set_state(struct sock *sk, hctx->ccid3hctx_state = state; } -/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ -static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) +/* + * Recalculate scheduled nominal send time t_nom, inter-packet interval + * t_ipi, and delta value. Should be called after each change to X. + */ +static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx) { + timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); + + /* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_x); -} -/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ -static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) -{ + /* Update nominal send time with regard to the new t_ipi */ + timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); + + /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); } - /* * Update X by * If (p > 0) @@ -126,6 +131,7 @@ static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) static void ccid3_hc_tx_update_x(struct sock *sk) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + const __u32 old_x = hctx->ccid3hctx_x; /* To avoid large error in calcX */ if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { @@ -149,6 +155,8 @@ static void ccid3_hc_tx_update_x(struct sock *sk) hctx->ccid3hctx_t_ld = now; } } + if (hctx->ccid3hctx_x != old_x) + ccid3_update_send_time(hctx); } static void ccid3_hc_tx_no_feedback_timer(unsigned long data) @@ -184,11 +192,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) /* The value of R is still undefined and so we can not recompute * the timout value. Keep initial value as per [RFC 4342, 5]. */ next_tmout = TFRC_INITIAL_TIMEOUT; - /* - * FIXME - not sure above calculation is correct. See section - * 5 of CCID3 11 should adjust tx_t_ipi and double that to - * achieve it really - */ + ccid3_update_send_time(hctx); break; case TFRC_SSTATE_FBACK: /* @@ -479,17 +483,9 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* unschedule no feedback timer */ sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); - /* Update sending rate */ + /* Update sending rate (and likely t_ipi, t_nom, and delta) */ ccid3_hc_tx_update_x(sk); - /* Update next send time */ - timeval_sub_usecs(&hctx->ccid3hctx_t_nom, - hctx->ccid3hctx_t_ipi); - ccid3_calc_new_t_ipi(hctx); - timeval_add_usecs(&hctx->ccid3hctx_t_nom, - hctx->ccid3hctx_t_ipi); - ccid3_calc_new_delta(hctx); - /* remove all packets older than the one acked from history */ dccp_tx_hist_purge_older(ccid3_tx_hist, &hctx->ccid3hctx_hist, packet); -- cgit v1.2.3 From 5d0dbc4a9b2d325458dcbf9a8329bd1d2cc7bd7e Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 27 Nov 2006 20:32:37 -0200 Subject: [DCCP] ccid3: Consolidate handling of t_RTO This patch * removes setting t_RTO in ccid3_hc_tx_init (per [RFC 3448, 4.2], t_RTO is undefined until feedback has been received); * makes some trivial changes (updates of comments); * performs a small optimisation by exploiting that the feedback timeout uses the value of t_ipi. The way it is done is safe, because the timeouts appear after the changes to t_ipi, ensuring that up-to-date values are used; * in ccid3_hc_tx_packet_recv, moves the t_rto statement closer to the calculation of the next_tmout. This makes the code clearer to read and is also safe, since t_rto is not updated until the next call of ccid3_hc_tx_packet_recv, and is not read by the functions called via ccid_wait_for_ccid(); * removes a `max' statement in sk_reset_timer, this is not needed since the timeout value is always greater than 1E6 microseconds. * adds `XXX'es to highlight that currently the nofeedback timer is set in a non-standard way Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 4342caf53251..f0ed67c84a55 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -228,11 +228,10 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) } /* * Schedule no feedback timer to expire in - * max(4 * R, 2 * s / X) + * max(4 * t_RTO, 2 * s/X) = max(4 * t_RTO, 2 * t_ipi) + * XXX This is non-standard, RFC 3448, 4.3 uses 4 * R */ - next_tmout = max_t(u32, hctx->ccid3hctx_t_rto, - 2 * usecs_div(hctx->ccid3hctx_s, - hctx->ccid3hctx_x)); + next_tmout = max(hctx->ccid3hctx_t_rto, 2*hctx->ccid3hctx_t_ipi); break; case TFRC_SSTATE_NO_SENT: DCCP_BUG("Illegal %s state NO_SENT, sk=%p", dccp_role(sk), sk); @@ -460,10 +459,6 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) "r_sample=%us\n", dccp_role(sk), sk, hctx->ccid3hctx_rtt, r_sample); - /* Update timeout interval */ - hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, - USEC_PER_SEC); - /* Update receive rate */ hctx->ccid3hctx_x_recv = x_recv;/* X_recv in bytes per sec */ @@ -491,17 +486,22 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) &hctx->ccid3hctx_hist, packet); /* * As we have calculated new ipi, delta, t_nom it is possible that - * we now can send a packet, so wake up dccp_wait_for_ccids. + * we now can send a packet, so wake up dccp_wait_for_ccid */ sk->sk_write_space(sk); + + /* Update timeout interval. We use the alternative variant of + * [RFC 3448, 3.1] which sets the upper bound of t_rto to one + * second, as it is suggested for TCP (see RFC 2988, 2.4). */ + hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, + USEC_PER_SEC ); /* * Schedule no feedback timer to expire in - * max(4 * R, 2 * s / X) + * max(4 * t_RTO, 2 * s/X) = max(4 * t_RTO, 2 * t_ipi) + * XXX This is non-standard, RFC 3448, 4.3 uses 4 * R */ - next_tmout = max(hctx->ccid3hctx_t_rto, - 2 * usecs_div(hctx->ccid3hctx_s, - hctx->ccid3hctx_x)); + next_tmout = max(hctx->ccid3hctx_t_rto, 2*hctx->ccid3hctx_t_ipi); ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to " "expire in %lu jiffies (%luus)\n", @@ -509,7 +509,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) usecs_to_jiffies(next_tmout), next_tmout); sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); + jiffies + usecs_to_jiffies(next_tmout)); /* set idle flag */ hctx->ccid3hctx_idle = 1; @@ -607,7 +607,6 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) /* Set transmission rate to 1 packet per second */ hctx->ccid3hctx_x = hctx->ccid3hctx_s; - hctx->ccid3hctx_t_rto = USEC_PER_SEC; hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; INIT_LIST_HEAD(&hctx->ccid3hctx_hist); -- cgit v1.2.3 From 4384260443efe90a2ec0d907568dbc58ae792cd0 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 28 Nov 2006 18:14:10 -0200 Subject: [DCCP]: Remove allocation of sysctl numbers This is in response to a request sent earlier by Eric W. Biederman and replaces all sysctl numbers for net.dccp.default with CTL_UNNUMBERED. It has been tested to compile and to work. Commiter note: I've removed the use of CTL_UNNUMBERED, not setting .ctl_name sets it to 0, that is the what CTL_UNNUMBERED is, reason is to avoid unneeded source code cluttering. Signed-off-by: Gerrit Renker Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/sysctl.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 4775ba3faa04..fdcfca3e9208 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -20,7 +20,6 @@ static struct ctl_table dccp_default_table[] = { { - .ctl_name = NET_DCCP_DEFAULT_SEQ_WINDOW, .procname = "seq_window", .data = &sysctl_dccp_feat_sequence_window, .maxlen = sizeof(sysctl_dccp_feat_sequence_window), @@ -28,7 +27,6 @@ static struct ctl_table dccp_default_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_DCCP_DEFAULT_RX_CCID, .procname = "rx_ccid", .data = &sysctl_dccp_feat_rx_ccid, .maxlen = sizeof(sysctl_dccp_feat_rx_ccid), @@ -36,7 +34,6 @@ static struct ctl_table dccp_default_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_DCCP_DEFAULT_TX_CCID, .procname = "tx_ccid", .data = &sysctl_dccp_feat_tx_ccid, .maxlen = sizeof(sysctl_dccp_feat_tx_ccid), @@ -44,7 +41,6 @@ static struct ctl_table dccp_default_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_DCCP_DEFAULT_ACK_RATIO, .procname = "ack_ratio", .data = &sysctl_dccp_feat_ack_ratio, .maxlen = sizeof(sysctl_dccp_feat_ack_ratio), @@ -52,7 +48,6 @@ static struct ctl_table dccp_default_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_DCCP_DEFAULT_SEND_ACKVEC, .procname = "send_ackvec", .data = &sysctl_dccp_feat_send_ack_vector, .maxlen = sizeof(sysctl_dccp_feat_send_ack_vector), @@ -60,7 +55,6 @@ static struct ctl_table dccp_default_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_DCCP_DEFAULT_SEND_NDP, .procname = "send_ndp", .data = &sysctl_dccp_feat_send_ndp_count, .maxlen = sizeof(sysctl_dccp_feat_send_ndp_count), @@ -68,7 +62,6 @@ static struct ctl_table dccp_default_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_DCCP_DEFAULT_REQ_RETRIES, .procname = "request_retries", .data = &sysctl_dccp_request_retries, .maxlen = sizeof(sysctl_dccp_request_retries), @@ -76,7 +69,6 @@ static struct ctl_table dccp_default_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_DCCP_DEFAULT_RETRIES1, .procname = "retries1", .data = &sysctl_dccp_retries1, .maxlen = sizeof(sysctl_dccp_retries1), @@ -84,7 +76,6 @@ static struct ctl_table dccp_default_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_DCCP_DEFAULT_RETRIES2, .procname = "retries2", .data = &sysctl_dccp_retries2, .maxlen = sizeof(sysctl_dccp_retries2), @@ -92,7 +83,6 @@ static struct ctl_table dccp_default_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_DCCP_DEFAULT_TX_QLEN, .procname = "tx_qlen", .data = &sysctl_dccp_tx_qlen, .maxlen = sizeof(sysctl_dccp_tx_qlen), -- cgit v1.2.3 From 2a1fda6f6c01d7ac195c040f14edcf9f64a5451e Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 28 Nov 2006 18:34:34 -0200 Subject: [DCCP] ccid3: Set NoFeedback Timeout according to RFC 3448 This corrects the setting of the nofeedback timer with regard to RFC 3448 - previously it was not set to max(4*R, 2*s/X) as specified. Using the maximum of 1 second as upper bound (as it was done before) can have detrimental effects, especially if R is small. Signed-off-by: Ian McDonald Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index f0ed67c84a55..577fd0ef84e5 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -163,7 +163,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) { struct sock *sk = (struct sock *)data; struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - unsigned long next_tmout = USEC_PER_SEC / 5; + unsigned long t_nfb = USEC_PER_SEC / 5; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { @@ -191,7 +191,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) hctx->ccid3hctx_x); /* The value of R is still undefined and so we can not recompute * the timout value. Keep initial value as per [RFC 4342, 5]. */ - next_tmout = TFRC_INITIAL_TIMEOUT; + t_nfb = TFRC_INITIAL_TIMEOUT; ccid3_update_send_time(hctx); break; case TFRC_SSTATE_FBACK: @@ -228,10 +228,9 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) } /* * Schedule no feedback timer to expire in - * max(4 * t_RTO, 2 * s/X) = max(4 * t_RTO, 2 * t_ipi) - * XXX This is non-standard, RFC 3448, 4.3 uses 4 * R + * max(4 * R, 2 * s/X) = max(4 * R, 2 * t_ipi) */ - next_tmout = max(hctx->ccid3hctx_t_rto, 2*hctx->ccid3hctx_t_ipi); + t_nfb = max(4 * hctx->ccid3hctx_rtt, 2 * hctx->ccid3hctx_t_ipi); break; case TFRC_SSTATE_NO_SENT: DCCP_BUG("Illegal %s state NO_SENT, sk=%p", dccp_role(sk), sk); @@ -244,7 +243,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) restart_timer: sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - jiffies + usecs_to_jiffies(next_tmout)); + jiffies + usecs_to_jiffies(t_nfb)); out: bh_unlock_sock(sk); sock_put(sk); @@ -396,7 +395,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct ccid3_options_received *opt_recv; struct dccp_tx_hist_entry *packet; struct timeval now; - unsigned long next_tmout; + unsigned long t_nfb; u32 t_elapsed; u32 pinv; u32 x_recv; @@ -498,18 +497,17 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) USEC_PER_SEC ); /* * Schedule no feedback timer to expire in - * max(4 * t_RTO, 2 * s/X) = max(4 * t_RTO, 2 * t_ipi) - * XXX This is non-standard, RFC 3448, 4.3 uses 4 * R + * max(4 * R, 2 * s/X) = max(4 * R, 2 * t_ipi) */ - next_tmout = max(hctx->ccid3hctx_t_rto, 2*hctx->ccid3hctx_t_ipi); + t_nfb = max(4 * hctx->ccid3hctx_rtt, 2 * hctx->ccid3hctx_t_ipi); ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to " "expire in %lu jiffies (%luus)\n", dccp_role(sk), sk, - usecs_to_jiffies(next_tmout), next_tmout); + usecs_to_jiffies(t_nfb), t_nfb); sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - jiffies + usecs_to_jiffies(next_tmout)); + jiffies + usecs_to_jiffies(t_nfb)); /* set idle flag */ hctx->ccid3hctx_idle = 1; -- cgit v1.2.3 From 78ad713da673a2977763521c347176137f3e493f Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 28 Nov 2006 19:22:33 -0200 Subject: [DCCP] ccid3: Track RX/TX packet size `s' using moving-average Problem: --- net/dccp/ccids/ccid3.c | 60 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 19 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 577fd0ef84e5..05513f3df652 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -159,6 +159,25 @@ static void ccid3_hc_tx_update_x(struct sock *sk) ccid3_update_send_time(hctx); } +/* + * Track the mean packet size `s' (cf. RFC 4342, 5.3 and RFC 3448, 4.1) + * @len: DCCP packet payload size in bytes + */ +static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) +{ + if (unlikely(len == 0)) + ccid3_pr_debug("Packet payload length is 0 - not updating\n"); + else + hctx->ccid3hctx_s = hctx->ccid3hctx_s == 0 ? len : + (9 * hctx->ccid3hctx_s + len) / 10; + /* + * Note: We could do a potential optimisation here - when `s' changes, + * recalculate sending rate and consequently t_ipi, t_delta, and + * t_now. This is however non-standard, and the benefits are not + * clear, so it is currently left out. + */ +} + static void ccid3_hc_tx_no_feedback_timer(unsigned long data) { struct sock *sk = (struct sock *)data; @@ -299,6 +318,10 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, hctx->ccid3hctx_t_last_win_count = now; ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); + /* Set initial sending rate to 1 packet per second */ + ccid3_hc_tx_update_s(hctx, len); + hctx->ccid3hctx_x = hctx->ccid3hctx_s; + /* First timeout, according to [RFC 3448, 4.2], is 1 second */ hctx->ccid3hctx_t_ipi = USEC_PER_SEC; /* Initial delta: minimum of 0.5 sec and t_gran/2 */ @@ -350,6 +373,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) unsigned long quarter_rtt; struct dccp_tx_hist_entry *packet; + ccid3_hc_tx_update_s(hctx, len); + packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); if (unlikely(packet == NULL)) { DCCP_WARN("packet doesn't exist in history!\n"); @@ -594,17 +619,9 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid); - if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && - dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE) - hctx->ccid3hctx_s = dp->dccps_packet_size; - else - hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE; - - /* Set transmission rate to 1 packet per second */ - hctx->ccid3hctx_x = hctx->ccid3hctx_s; + hctx->ccid3hctx_s = 0; hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; INIT_LIST_HEAD(&hctx->ccid3hctx_hist); @@ -658,6 +675,15 @@ static void ccid3_hc_rx_set_state(struct sock *sk, hcrx->ccid3hcrx_state = state; } +static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len) +{ + if (unlikely(len == 0)) /* don't update on empty packets (e.g. ACKs) */ + ccid3_pr_debug("Packet payload length is 0 - not updating\n"); + else + hcrx->ccid3hcrx_s = hcrx->ccid3hcrx_s == 0 ? len : + (9 * hcrx->ccid3hcrx_s + len) / 10; +} + static void ccid3_hc_rx_send_feedback(struct sock *sk) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); @@ -934,7 +960,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) struct dccp_rx_hist_entry *packet; struct timeval now; u32 p_prev, rtt_prev, r_sample, t_elapsed; - int loss; + int loss, payload_size; BUG_ON(hcrx == NULL); @@ -989,6 +1015,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) return; + payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4; + ccid3_hc_rx_update_s(hcrx, payload_size); + switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial " @@ -999,8 +1028,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); return; case TFRC_RSTATE_DATA: - hcrx->ccid3hcrx_bytes_recv += skb->len - - dccp_hdr(skb)->dccph_doff * 4; + hcrx->ccid3hcrx_bytes_recv += payload_size; if (loss) break; @@ -1040,22 +1068,16 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid); ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && - dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE) - hcrx->ccid3hcrx_s = dp->dccps_packet_size; - else - hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE; - hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack); hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack; + hcrx->ccid3hcrx_s = 0; hcrx->ccid3hcrx_rtt = 5000; /* XXX 5ms for now... */ return 0; } -- cgit v1.2.3 From 5aed324369c94a2c38469c8288e42eb1a9fac400 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 28 Nov 2006 19:33:36 -0200 Subject: [DCCP]: Tidy up unused structures This removes and cleans up unused variables and structures which have become unnecessary following the introduction of the EWMA patch to automatically track the CCID 3 receiver/sender packet sizes `s'. It deprecates the PACKET_SIZE socket option by returning an error code and printing a deprecation warning if an application tries to read or write this socket option. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.h | 4 ---- net/dccp/proto.c | 8 ++++---- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 970921700ce3..dbb884426dfa 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -42,10 +42,6 @@ #include #include "../ccid.h" -#define TFRC_MIN_PACKET_SIZE 16 -#define TFRC_STD_PACKET_SIZE 256 -#define TFRC_MAX_PACKET_SIZE 65535 - /* Two seconds as per RFC 3448 4.2 */ #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 3c44d502e5c1..2604e34d8f38 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -470,7 +470,8 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, lock_sock(sk); switch (optname) { case DCCP_SOCKOPT_PACKET_SIZE: - dp->dccps_packet_size = val; + DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); + err = -EINVAL; break; case DCCP_SOCKOPT_CHANGE_L: if (optlen != sizeof(struct dccp_so_feat)) @@ -581,9 +582,8 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case DCCP_SOCKOPT_PACKET_SIZE: - val = dp->dccps_packet_size; - len = sizeof(dp->dccps_packet_size); - break; + DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); + return -EINVAL; case DCCP_SOCKOPT_SERVICE: return dccp_getsockopt_service(sk, len, (__be32 __user *)optval, optlen); -- cgit v1.2.3 From 841bac1d607d8bf2e068e4b24393fb77372814e3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 28 Nov 2006 19:42:03 -0200 Subject: [DCCP]: Make {set,get}sockopt(DCCP_SOCKOPT_PACKET_SIZE) return 0 To reflect the fact that this now is of no effect, not making apps stop working, just be warned in the system log. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/proto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 2604e34d8f38..5ec47d9ee447 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -471,7 +471,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case DCCP_SOCKOPT_PACKET_SIZE: DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); - err = -EINVAL; + err = 0; break; case DCCP_SOCKOPT_CHANGE_L: if (optlen != sizeof(struct dccp_so_feat)) @@ -583,7 +583,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case DCCP_SOCKOPT_PACKET_SIZE: DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); - return -EINVAL; + return 0; case DCCP_SOCKOPT_SERVICE: return dccp_getsockopt_service(sk, len, (__be32 __user *)optval, optlen); -- cgit v1.2.3 From a79ef76f4d8424324c2f108824a7398571193f43 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 28 Nov 2006 19:51:42 -0200 Subject: [DCCP] ccid3: Larger initial windows This implements the larger-initial-windows feature for CCID 3, as described in section 5 of RFC 4342. When the first feedback packet arrives, the sender can send up to 2..4 packets per RTT, instead of just one. The patch further * reduces the number of timestamping calls by passing the timestamp value (which is computed in one of the calling functions anyway) as argument * renames one constant with a very long name into one which is shorter and resembles the one in RFC 3448 (t_mbi) * simplifies some of the min_t/max_t cases where both `x', `y' have the same type Commiter note: renamed TFRC_t_mbi to TFRC_T_MBI, to follow Linux coding style. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 66 ++++++++++++++++++++++++++------------------------ net/dccp/ccids/ccid3.h | 4 +-- 2 files changed, 37 insertions(+), 33 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 05513f3df652..aa5440ee20ae 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -128,7 +128,8 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx) * X = max(min(2 * X, 2 * X_recv), s / R); * tld = now; */ -static void ccid3_hc_tx_update_x(struct sock *sk) +static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) + { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); const __u32 old_x = hctx->ccid3hctx_x; @@ -138,23 +139,20 @@ static void ccid3_hc_tx_update_x(struct sock *sk) hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt, hctx->ccid3hctx_p); - hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, - 2 * hctx->ccid3hctx_x_recv), - (hctx->ccid3hctx_s / - TFRC_MAX_BACK_OFF_TIME)); - } else { - struct timeval now; + hctx->ccid3hctx_x = max_t(u32, min(hctx->ccid3hctx_x_calc, + hctx->ccid3hctx_x_recv * 2), + hctx->ccid3hctx_s / TFRC_T_MBI); + + } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) >= + hctx->ccid3hctx_rtt) { + hctx->ccid3hctx_x = max(min(hctx->ccid3hctx_x_recv, + hctx->ccid3hctx_x ) * 2, + usecs_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_rtt) ); + hctx->ccid3hctx_t_ld = *now; + } else + ccid3_pr_debug("Not changing X\n"); - dccp_timestamp(sk, &now); - if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >= - hctx->ccid3hctx_rtt) { - hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv, - hctx->ccid3hctx_x) * 2, - usecs_div(hctx->ccid3hctx_s, - hctx->ccid3hctx_rtt)); - hctx->ccid3hctx_t_ld = now; - } - } if (hctx->ccid3hctx_x != old_x) ccid3_update_send_time(hctx); } @@ -196,12 +194,9 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_FBACK: - /* Halve send rate */ - hctx->ccid3hctx_x /= 2; - if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / - TFRC_MAX_BACK_OFF_TIME)) - hctx->ccid3hctx_x = (hctx->ccid3hctx_s / - TFRC_MAX_BACK_OFF_TIME); + /* RFC 3448, 4.4: Halve send rate directly */ + hctx->ccid3hctx_x = min_t(u32, hctx->ccid3hctx_x / 2, + hctx->ccid3hctx_s / TFRC_T_MBI); ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d " "bytes/s\n", @@ -221,6 +216,8 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) if (!hctx->ccid3hctx_idle || (hctx->ccid3hctx_x_recv >= 4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) { + struct timeval now; + ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state)); @@ -238,12 +235,13 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) if (hctx->ccid3hctx_p < TFRC_SMALLEST_P || hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv) hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2, - hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME)); + hctx->ccid3hctx_s / (2 * TFRC_T_MBI)); else hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4; /* Update sending rate */ - ccid3_hc_tx_update_x(sk); + dccp_timestamp(sk, &now); + ccid3_hc_tx_update_x(sk, &now); } /* * Schedule no feedback timer to expire in @@ -473,11 +471,21 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * q is a constant, RFC 3448 recomments 0.9 */ if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { + /* Use Larger Initial Windows [RFC 4342, sec. 5] + * We deviate in that we use `s' instead of `MSS'. */ + u16 w_init = max( 4 * hctx->ccid3hctx_s, + max(2 * hctx->ccid3hctx_s, 4380)); + hctx->ccid3hctx_rtt = r_sample; + hctx->ccid3hctx_x = usecs_div(w_init, r_sample); + hctx->ccid3hctx_t_ld = now; + + ccid3_update_send_time(hctx); ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); - hctx->ccid3hctx_rtt = r_sample; - } else + } else { hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + r_sample / 10; + ccid3_hc_tx_update_x(sk, &now); + } ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, " "r_sample=%us\n", dccp_role(sk), sk, @@ -502,9 +510,6 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* unschedule no feedback timer */ sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); - /* Update sending rate (and likely t_ipi, t_nom, and delta) */ - ccid3_hc_tx_update_x(sk); - /* remove all packets older than the one acked from history */ dccp_tx_hist_purge_older(ccid3_tx_hist, &hctx->ccid3hctx_hist, packet); @@ -514,7 +519,6 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) */ sk->sk_write_space(sk); - /* Update timeout interval. We use the alternative variant of * [RFC 3448, 3.1] which sets the upper bound of t_rto to one * second, as it is suggested for TCP (see RFC 2988, 2.4). */ diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index dbb884426dfa..27cb20ae1da8 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -48,8 +48,8 @@ /* In usecs - half the scheduling granularity as per RFC3448 4.6 */ #define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) -/* In seconds */ -#define TFRC_MAX_BACK_OFF_TIME 64 +/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ +#define TFRC_T_MBI 64 #define TFRC_SMALLEST_P 40 -- cgit v1.2.3 From 6b57c93dc3aa0115b589cb89ef862d46ab9bd95e Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 28 Nov 2006 19:55:06 -0200 Subject: [DCCP]: Use `unsigned' for packet lengths This patch implements a suggestion by Ian McDonald and 1) Avoids tests against negative packet lengths by using unsigned int for packet payload lengths in the CCID send_packet()/packet_sent() routines 2) As a consequence, it removes an now unnecessary test with regard to `len > 0' in ccid3_hc_tx_packet_sent: that condition is always true, since * negative packet lengths are avoided * ccid3_hc_tx_send_packet flags an error whenever the payload length is 0. As a consequence, ccid3_hc_tx_packet_sent is never called as all errors returned by ccid_hc_tx_send_packet are caught in dccp_write_xmit 3) Removes the third argument of ccid_hc_tx_send_packet (the `len' parameter), since it is currently always set to skb->len. The code is updated with regard to this parameter change. Signed-off-by: Gerrit Renker Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccid.h | 12 ++++---- net/dccp/ccids/ccid2.c | 5 ++- net/dccp/ccids/ccid3.c | 83 +++++++++++++++++++++++--------------------------- net/dccp/output.c | 6 ++-- 4 files changed, 48 insertions(+), 58 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index f7eb6c613414..c7c29514dce8 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -52,9 +52,9 @@ struct ccid_operations { unsigned char len, u16 idx, unsigned char* value); int (*ccid_hc_tx_send_packet)(struct sock *sk, - struct sk_buff *skb, int len); - void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, - int len); + struct sk_buff *skb); + void (*ccid_hc_tx_packet_sent)(struct sock *sk, + int more, unsigned int len); void (*ccid_hc_rx_get_info)(struct sock *sk, struct tcp_info *info); void (*ccid_hc_tx_get_info)(struct sock *sk, @@ -94,16 +94,16 @@ extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk); extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk); static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, - struct sk_buff *skb, int len) + struct sk_buff *skb) { int rc = 0; if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL) - rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb, len); + rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb); return rc; } static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, - int more, int len) + int more, unsigned int len) { if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL) ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, more, len); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 207f7f9b36ca..2555be8f4790 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -125,8 +125,7 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, return 0; } -static int ccid2_hc_tx_send_packet(struct sock *sk, - struct sk_buff *skb, int len) +static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { struct ccid2_hc_tx_sock *hctx; @@ -268,7 +267,7 @@ static void ccid2_start_rto_timer(struct sock *sk) jiffies + hctx->ccid2hctx_rto); } -static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) +static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index aa5440ee20ae..70ebe705eb75 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -272,8 +272,7 @@ out: * = 0: can send immediately * < 0: error condition; do not send packet */ -static int ccid3_hc_tx_send_packet(struct sock *sk, - struct sk_buff *skb, int len) +static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); @@ -288,7 +287,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, * zero-sized Data(Ack)s is theoretically possible, but for congestion * control this case is pathological - ignore it. */ - if (unlikely(len == 0)) + if (unlikely(skb->len == 0)) return -EBADMSG; /* See if last packet allocated was not sent */ @@ -317,7 +316,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); /* Set initial sending rate to 1 packet per second */ - ccid3_hc_tx_update_s(hctx, len); + ccid3_hc_tx_update_s(hctx, skb->len); hctx->ccid3hctx_x = hctx->ccid3hctx_s; /* First timeout, according to [RFC 3448, 4.2], is 1 second */ @@ -356,59 +355,53 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, return 0; } -static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) +static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) { const struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct timeval now; + unsigned long quarter_rtt; + struct dccp_tx_hist_entry *packet; BUG_ON(hctx == NULL); dccp_timestamp(sk, &now); - /* check if we have sent a data packet */ - if (len > 0) { - unsigned long quarter_rtt; - struct dccp_tx_hist_entry *packet; + ccid3_hc_tx_update_s(hctx, len); - ccid3_hc_tx_update_s(hctx, len); + packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); + if (unlikely(packet == NULL)) { + DCCP_WARN("packet doesn't exist in history!\n"); + return; + } + if (unlikely(packet->dccphtx_sent)) { + DCCP_WARN("no unsent packet in history!\n"); + return; + } + packet->dccphtx_tstamp = now; + packet->dccphtx_seqno = dp->dccps_gss; + /* + * Check if win_count have changed + * Algorithm in "8.1. Window Counter Value" in RFC 4342. + */ + quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count); + if (likely(hctx->ccid3hctx_rtt > 8)) + quarter_rtt /= hctx->ccid3hctx_rtt / 4; - packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); - if (unlikely(packet == NULL)) { - DCCP_WARN("packet doesn't exist in history!\n"); - return; - } - if (unlikely(packet->dccphtx_sent)) { - DCCP_WARN("no unsent packet in history!\n"); - return; - } - packet->dccphtx_tstamp = now; - packet->dccphtx_seqno = dp->dccps_gss; - /* - * Check if win_count have changed - * Algorithm in "8.1. Window Counter Value" in RFC 4342. - */ - quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count); - if (likely(hctx->ccid3hctx_rtt > 8)) - quarter_rtt /= hctx->ccid3hctx_rtt / 4; - - if (quarter_rtt > 0) { - hctx->ccid3hctx_t_last_win_count = now; - hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + - min_t(unsigned long, quarter_rtt, 5)) % 16; - ccid3_pr_debug("%s, sk=%p, window changed from " - "%u to %u!\n", - dccp_role(sk), sk, - packet->dccphtx_ccval, - hctx->ccid3hctx_last_win_count); - } + if (quarter_rtt > 0) { + hctx->ccid3hctx_t_last_win_count = now; + hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + + min_t(unsigned long, quarter_rtt, 5)) % 16; + ccid3_pr_debug("%s, sk=%p, window changed from " + "%u to %u!\n", + dccp_role(sk), sk, + packet->dccphtx_ccval, + hctx->ccid3hctx_last_win_count); + } - hctx->ccid3hctx_idle = 0; - packet->dccphtx_rtt = hctx->ccid3hctx_rtt; - packet->dccphtx_sent = 1; - } else - ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", - dccp_role(sk), sk, dp->dccps_gss); + hctx->ccid3hctx_idle = 0; + packet->dccphtx_rtt = hctx->ccid3hctx_rtt; + packet->dccphtx_sent = 1; } static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) diff --git a/net/dccp/output.c b/net/dccp/output.c index bfd9c5757897..400c30b6fcae 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -195,8 +195,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, if (signal_pending(current)) goto do_interrupted; - rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, - skb->len); + rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); if (rc <= 0) break; delay = msecs_to_jiffies(rc); @@ -245,8 +244,7 @@ void dccp_write_xmit(struct sock *sk, int block) this we have other issues */ while ((skb = skb_peek(&sk->sk_write_queue))) { - int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, - skb->len); + int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); if (err > 0) { if (!block) { -- cgit v1.2.3