From 75e3d8db531b462b875c1adb13eeb6b0be7374c0 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Tue, 21 Oct 2008 16:28:36 -0700 Subject: tcp: should use number of sack blocks instead of -1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While looking for the recent "sack issue" I also read all eff_sacks usage that was played around by some relevant commit. I found out that there's another thing that is asking for a fix (unrelated to the "sack issue" though). This feature has probably very little significance in practice. Opposite direction timeout with bidirectional tcp comes to me as the most likely scenario though there might be other cases as well related to non-data segments we send (e.g., response to the opposite direction segment). Also some ACK losses or option space wasted for other purposes is necessary to prevent the earlier SACK feedback getting to the sender. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 990a58493235..de54f02f10a9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -432,7 +432,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, if (tp->rx_opt.dsack) { tp->rx_opt.dsack = 0; - tp->rx_opt.eff_sacks--; + tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks; } } } -- cgit v1.2.3 From 91bd6b1e030266cf87d3f567b49f0fa60a7318ba Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 23 Oct 2008 00:59:52 -0700 Subject: sctp: Drop ICMP packet too big message with MTU larger than current PMTU If ICMP packet too big message is received with MTU larger than current PMTU, SCTP will still accept this ICMP message and sync the PMTU of assoc with the wrong MTU. Endpoing A Endpoint B (ESTABLISHED) (ESTABLISHED) ICMP ---------> (packet too big, MTU too larger) sync PMTU This patch fixed the problem by drop that ICMP message. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/input.c b/net/sctp/input.c index a49fa80b57b9..bf612d954d41 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -369,7 +369,7 @@ static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb) void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t, __u32 pmtu) { - if (!t || (t->pathmtu == pmtu)) + if (!t || (t->pathmtu <= pmtu)) return; if (sock_owned_by_user(sk)) { -- cgit v1.2.3 From df10eec476f2045a2ef5f85d97c7b47d992d7f7b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 23 Oct 2008 01:00:21 -0700 Subject: sctp: Add check for the TSN field of the SHUTDOWN chunk If SHUTDOWN chunk is received Cumulative TSN Ack beyond the max tsn currently send, SHUTDOWN chunk be accepted and the association will be broken. New data is send, but after received SACK it will be drop because TSN in SACK is less than the Cumulative TSN, data will be retrans again and again even if correct SACK is received. The packet sequence is like this: Endpoint A Endpoint B ULP (ESTABLISHED) (ESTABLISHED) <----------- DATA (TSN=x-1) <----------- DATA (TSN=x) SHUTDOWN -----------> (Now Cumulative TSN=x+1000) (TSN=x+1000) <----------- DATA (TSN=x+1) SACK -----------> drop the SACK (TSN=x+1) <----------- DATA (TSN=x+1)(retrans) This patch fix this problem by terminating the association and respond to the sender with an ABORT. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index d4c3fbc4671e..12f62174f4a1 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2544,6 +2544,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, sctp_shutdownhdr_t *sdh; sctp_disposition_t disposition; struct sctp_ulpevent *ev; + __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -2558,6 +2559,14 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, sdh = (sctp_shutdownhdr_t *)chunk->skb->data; skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t)); chunk->subh.shutdown_hdr = sdh; + ctsn = ntohl(sdh->cum_tsn_ack); + + /* If Cumulative TSN Ack beyond the max tsn currently + * send, terminating the association and respond to the + * sender with an ABORT. + */ + if (!TSN_lt(ctsn, asoc->next_tsn)) + return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); /* API 5.3.1.5 SCTP_SHUTDOWN_EVENT * When a peer sends a SHUTDOWN, SCTP delivers this notification to -- cgit v1.2.3 From cf896d514a4564027929a6d284872c74987085ef Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 23 Oct 2008 01:00:49 -0700 Subject: sctp: Fix to handle SHUTDOWN in SHUTDOWN-PENDING state If SHUTDOWN is received in SHUTDOWN-PENDING state, enpoint should enter the SHUTDOWN-RECEIVED state and check the Cumulative TSN Ack field of the SHUTDOWN chunk (RFC 4960 Section 9.2). If the SHUTDOWN chunk can acknowledge all of the send DATA chunks, SHUTDOWN-ACK should be sent. But now endpoint just silently discarded the SHUTDOWN chunk. SHUTDOWN received in SHUTDOWN-PENDING state can happend when the last SACK is lost by network, or the SHUTDOWN chunk can acknowledge all of the received DATA chunks. The packet sequence(SACK lost) is like this: Endpoint A Endpoint B ULP (ESTABLISHED) (ESTABLISHED) <----------- DATA <--- shutdown Enter SHUTDOWN-PENDING state SACK ----lost----> SHUTDOWN(*1) ------------> <----------- SHUTDOWN-ACK (*1) silently discarded now. This patch fix to handle SHUTDOWN in SHUTDOWN-PENDING state as the same as ESTABLISHED state. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_statetable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index dd4ddc40c0ad..a5b5590dc1a6 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -266,7 +266,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, /* SCTP_STATE_ESTABLISHED */ \ TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ + TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown_ack), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ -- cgit v1.2.3 From 2e3f92dad6bdbee796274bae5c1c50a6ddd31cbb Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 23 Oct 2008 01:01:18 -0700 Subject: sctp: Fix to handle SHUTDOWN in SHUTDOWN_RECEIVED state Once an endpoint has reached the SHUTDOWN-RECEIVED state, it MUST NOT send a SHUTDOWN in response to a ULP request. The Cumulative TSN Ack of the received SHUTDOWN chunk MUST be processed. This patch fix to process Cumulative TSN Ack of the received SHUTDOWN chunk in SHUTDOWN_RECEIVED state. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 1 + net/sctp/sm_statefuns.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ net/sctp/sm_statetable.c | 2 +- 3 files changed, 47 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 029a54a02396..c1dd89365833 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -125,6 +125,7 @@ sctp_state_fn_t sctp_sf_beat_8_3; sctp_state_fn_t sctp_sf_backbeat_8_3; sctp_state_fn_t sctp_sf_do_9_2_final; sctp_state_fn_t sctp_sf_do_9_2_shutdown; +sctp_state_fn_t sctp_sf_do_9_2_shut_ctsn; sctp_state_fn_t sctp_sf_do_ecn_cwr; sctp_state_fn_t sctp_sf_do_ecne; sctp_state_fn_t sctp_sf_ootb; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 12f62174f4a1..a6a0ea71ae93 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2608,6 +2608,51 @@ out: return disposition; } +/* + * sctp_sf_do_9_2_shut_ctsn + * + * Once an endpoint has reached the SHUTDOWN-RECEIVED state, + * it MUST NOT send a SHUTDOWN in response to a ULP request. + * The Cumulative TSN Ack of the received SHUTDOWN chunk + * MUST be processed. + */ +sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands) +{ + struct sctp_chunk *chunk = arg; + sctp_shutdownhdr_t *sdh; + + if (!sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + + /* Make sure that the SHUTDOWN chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, + sizeof(struct sctp_shutdown_chunk_t))) + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); + + sdh = (sctp_shutdownhdr_t *)chunk->skb->data; + + /* If Cumulative TSN Ack beyond the max tsn currently + * send, terminating the association and respond to the + * sender with an ABORT. + */ + if (!TSN_lt(ntohl(sdh->cum_tsn_ack), asoc->next_tsn)) + return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); + + /* verify, by checking the Cumulative TSN Ack field of the + * chunk, that all its outstanding DATA chunks have been + * received by the SHUTDOWN sender. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_CTSN, + SCTP_BE32(sdh->cum_tsn_ack)); + + return SCTP_DISPOSITION_CONSUME; +} + /* RFC 2960 9.2 * If an endpoint is in SHUTDOWN-ACK-SENT state and receives an INIT chunk * (e.g., if the SHUTDOWN COMPLETE was lost) with source and destination diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index a5b5590dc1a6..5c8186d88c61 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -270,7 +270,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, /* SCTP_STATE_SHUTDOWN_SENT */ \ TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown_ack), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ + TYPE_SCTP_FUNC(sctp_sf_do_9_2_shut_ctsn), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ } /* TYPE_SCTP_SHUTDOWN */ -- cgit v1.2.3 From b63365a2d60268a3988285d6c3c6003d7066f93a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 23 Oct 2008 01:11:29 -0700 Subject: net: Fix disjunct computation of netdev features My change commit e2a6b85247aacc52d6ba0d9b37a99b8d1a3e0d83 net: Enable TSO if supported by at least one device didn't do what was intended because the netdev_compute_features function was designed for conjunctions. So what happened was that it would simply take the TSO status of the last constituent device. This patch extends it to support both conjunctions and disjunctions under the new name of netdev_increment_features. It also adds a new function netdev_fix_features which does the sanity checking that usually occurs upon registration. This ensures that the computation doesn't result in an illegal combination since this checking is absent when the change is initiated via ethtool. The two users of netdev_compute_features have been converted. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 16 +++-- include/linux/netdevice.h | 12 +++- net/bridge/br_device.c | 2 +- net/bridge/br_if.c | 14 +++-- net/core/dev.c | 135 +++++++++++++++++++++------------------- 5 files changed, 104 insertions(+), 75 deletions(-) (limited to 'net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8e2be24f3fe4..832739f38db4 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1341,18 +1341,24 @@ static int bond_compute_features(struct bonding *bond) int i; features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES); - features |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_NO_CSUM; + features |= NETIF_F_GSO_MASK | NETIF_F_NO_CSUM; + + if (!bond->first_slave) + goto done; + + features &= ~NETIF_F_ONE_FOR_ALL; bond_for_each_slave(bond, slave, i) { - features = netdev_compute_features(features, - slave->dev->features); + features = netdev_increment_features(features, + slave->dev->features, + NETIF_F_ONE_FOR_ALL); if (slave->dev->hard_header_len > max_hard_header_len) max_hard_header_len = slave->dev->hard_header_len; } +done: features |= (bond_dev->features & BOND_VLAN_FEATURES); - bond_dev->features = features; + bond_dev->features = netdev_fix_features(features, NULL); bond_dev->hard_header_len = max_hard_header_len; return 0; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 64875859d654..c8bcb59adfdf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -541,6 +541,14 @@ struct net_device #define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) #define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) + /* + * If one device supports one of these features, then enable them + * for all in netdev_increment_features. + */ +#define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ + NETIF_F_SG | NETIF_F_HIGHDMA | \ + NETIF_F_FRAGLIST) + /* Interface index. Unique device identifier */ int ifindex; int iflink; @@ -1698,7 +1706,9 @@ extern char *netdev_drivername(const struct net_device *dev, char *buffer, int l extern void linkwatch_run_queue(void); -extern int netdev_compute_features(unsigned long all, unsigned long one); +unsigned long netdev_increment_features(unsigned long all, unsigned long one, + unsigned long mask); +unsigned long netdev_fix_features(unsigned long features, const char *name); static inline int net_gso_ok(int features, int gso_type) { diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 22ba8632196f..6c023f0f8252 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -179,5 +179,5 @@ void br_dev_setup(struct net_device *dev) dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | - NETIF_F_NETNS_LOCAL; + NETIF_F_NETNS_LOCAL | NETIF_F_GSO; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 573e20f7dba4..0a09ccf68c1c 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -347,15 +347,21 @@ int br_min_mtu(const struct net_bridge *br) void br_features_recompute(struct net_bridge *br) { struct net_bridge_port *p; - unsigned long features; + unsigned long features, mask; - features = br->feature_mask; + features = mask = br->feature_mask; + if (list_empty(&br->port_list)) + goto done; + + features &= ~NETIF_F_ONE_FOR_ALL; list_for_each_entry(p, &br->port_list, list) { - features = netdev_compute_features(features, p->dev->features); + features = netdev_increment_features(features, + p->dev->features, mask); } - br->dev->features = features; +done: + br->dev->features = netdev_fix_features(features, NULL); } /* called with RTNL */ diff --git a/net/core/dev.c b/net/core/dev.c index b8a4fd0806af..d9038e328cc1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3947,6 +3947,46 @@ static void netdev_init_queue_locks(struct net_device *dev) __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); } +unsigned long netdev_fix_features(unsigned long features, const char *name) +{ + /* Fix illegal SG+CSUM combinations. */ + if ((features & NETIF_F_SG) && + !(features & NETIF_F_ALL_CSUM)) { + if (name) + printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " + "checksum feature.\n", name); + features &= ~NETIF_F_SG; + } + + /* TSO requires that SG is present as well. */ + if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { + if (name) + printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " + "SG feature.\n", name); + features &= ~NETIF_F_TSO; + } + + if (features & NETIF_F_UFO) { + if (!(features & NETIF_F_GEN_CSUM)) { + if (name) + printk(KERN_ERR "%s: Dropping NETIF_F_UFO " + "since no NETIF_F_HW_CSUM feature.\n", + name); + features &= ~NETIF_F_UFO; + } + + if (!(features & NETIF_F_SG)) { + if (name) + printk(KERN_ERR "%s: Dropping NETIF_F_UFO " + "since no NETIF_F_SG feature.\n", name); + features &= ~NETIF_F_UFO; + } + } + + return features; +} +EXPORT_SYMBOL(netdev_fix_features); + /** * register_netdevice - register a network device * @dev: device to register @@ -4032,36 +4072,7 @@ int register_netdevice(struct net_device *dev) dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); } - - /* Fix illegal SG+CSUM combinations. */ - if ((dev->features & NETIF_F_SG) && - !(dev->features & NETIF_F_ALL_CSUM)) { - printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n", - dev->name); - dev->features &= ~NETIF_F_SG; - } - - /* TSO requires that SG is present as well. */ - if ((dev->features & NETIF_F_TSO) && - !(dev->features & NETIF_F_SG)) { - printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n", - dev->name); - dev->features &= ~NETIF_F_TSO; - } - if (dev->features & NETIF_F_UFO) { - if (!(dev->features & NETIF_F_HW_CSUM)) { - printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " - "NETIF_F_HW_CSUM feature.\n", - dev->name); - dev->features &= ~NETIF_F_UFO; - } - if (!(dev->features & NETIF_F_SG)) { - printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " - "NETIF_F_SG feature.\n", - dev->name); - dev->features &= ~NETIF_F_UFO; - } - } + dev->features = netdev_fix_features(dev->features, dev->name); /* Enable software GSO if SG is supported. */ if (dev->features & NETIF_F_SG) @@ -4700,49 +4711,45 @@ static int __init netdev_dma_register(void) { return -ENODEV; } #endif /* CONFIG_NET_DMA */ /** - * netdev_compute_feature - compute conjunction of two feature sets - * @all: first feature set - * @one: second feature set + * netdev_increment_features - increment feature set by one + * @all: current feature set + * @one: new feature set + * @mask: mask feature set * * Computes a new feature set after adding a device with feature set - * @one to the master device with current feature set @all. Returns - * the new feature set. + * @one to the master device with current feature set @all. Will not + * enable anything that is off in @mask. Returns the new feature set. */ -int netdev_compute_features(unsigned long all, unsigned long one) -{ - /* if device needs checksumming, downgrade to hw checksumming */ - if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) - all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; - - /* if device can't do all checksum, downgrade to ipv4/ipv6 */ - if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM)) - all ^= NETIF_F_HW_CSUM - | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; - - if (one & NETIF_F_GSO) - one |= NETIF_F_GSO_SOFTWARE; - one |= NETIF_F_GSO; - - /* - * If even one device supports a GSO protocol with software fallback, - * enable it for all. - */ - all |= one & NETIF_F_GSO_SOFTWARE; +unsigned long netdev_increment_features(unsigned long all, unsigned long one, + unsigned long mask) +{ + /* If device needs checksumming, downgrade to it. */ + if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) + all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM); + else if (mask & NETIF_F_ALL_CSUM) { + /* If one device supports v4/v6 checksumming, set for all. */ + if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) && + !(all & NETIF_F_GEN_CSUM)) { + all &= ~NETIF_F_ALL_CSUM; + all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + } - /* If even one device supports robust GSO, enable it for all. */ - if (one & NETIF_F_GSO_ROBUST) - all |= NETIF_F_GSO_ROBUST; + /* If one device supports hw checksumming, set for all. */ + if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) { + all &= ~NETIF_F_ALL_CSUM; + all |= NETIF_F_HW_CSUM; + } + } - all &= one | NETIF_F_LLTX; + one |= NETIF_F_ALL_CSUM; - if (!(all & NETIF_F_ALL_CSUM)) - all &= ~NETIF_F_SG; - if (!(all & NETIF_F_SG)) - all &= ~NETIF_F_GSO_MASK; + one |= all & NETIF_F_ONE_FOR_ALL; + all &= one | NETIF_F_LLTX | NETIF_F_GSO; + all |= one & mask & NETIF_F_ONE_FOR_ALL; return all; } -EXPORT_SYMBOL(netdev_compute_features); +EXPORT_SYMBOL(netdev_increment_features); static struct hlist_head *netdev_create_hash(void) { -- cgit v1.2.3 From fd6149d332973bafa50f03ddb0ea9513e67f4517 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 23 Oct 2008 14:06:35 -0700 Subject: tcp: Restore ordering of TCP options for the sake of inter-operability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is not our bug! Sadly some devices cannot cope with the change of TCP option ordering which was a result of the recent rewrite of the option code (not that there was some particular reason steming from the rewrite for the reordering) though any ordering of TCP options is perfectly legal. Thus we restore the original ordering to allow interoperability with/through such broken devices and add some warning about this trap. Since the reordering just happened without any particular reason, this change shouldn't cost us anything. There are already couple of known failure reports (within close proximity of the last release), so the problem might be more wide-spread than a single device. And other reports which may be due to the same problem though the symptoms were less obvious. Analysis of one of the case revealed (with very high probability) that sack capability cannot be negotiated as the first option (SYN never got a response). Signed-off-by: Ilpo Järvinen Reported-by: Aldo Maggi Tested-by: Aldo Maggi Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index de54f02f10a9..e4c5ac9fe89b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -362,6 +362,17 @@ struct tcp_out_options { __u32 tsval, tsecr; /* need to include OPTION_TS */ }; +/* Beware: Something in the Internet is very sensitive to the ordering of + * TCP options, we learned this through the hard way, so be careful here. + * Luckily we can at least blame others for their non-compliance but from + * inter-operatibility perspective it seems that we're somewhat stuck with + * the ordering which we have been using if we want to keep working with + * those broken things (not that it currently hurts anybody as there isn't + * particular reason why the ordering would need to be changed). + * + * At least SACK_PERM as the first option is known to lead to a disaster + * (but it may well be that other scenarios fail similarly). + */ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, const struct tcp_out_options *opts, __u8 **md5_hash) { @@ -376,6 +387,12 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, *md5_hash = NULL; } + if (unlikely(opts->mss)) { + *ptr++ = htonl((TCPOPT_MSS << 24) | + (TCPOLEN_MSS << 16) | + opts->mss); + } + if (likely(OPTION_TS & opts->options)) { if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) { *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | @@ -392,12 +409,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, *ptr++ = htonl(opts->tsecr); } - if (unlikely(opts->mss)) { - *ptr++ = htonl((TCPOPT_MSS << 24) | - (TCPOLEN_MSS << 16) | - opts->mss); - } - if (unlikely(OPTION_SACK_ADVERTISE & opts->options && !(OPTION_TS & opts->options))) { *ptr++ = htonl((TCPOPT_NOP << 24) | -- cgit v1.2.3