Index: include/linux/sysctl.h =================================================================== RCS file: /work/jeff519/cvsroot/linux/include/linux/sysctl.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- include/linux/sysctl.h 3 Apr 2002 00:45:38 -0000 1.1 +++ include/linux/sysctl.h 14 Apr 2002 21:23:23 -0000 1.2 @@ -289,7 +289,8 @@ NET_TCP_ADV_WIN_SCALE=87, NET_IPV4_NONLOCAL_BIND=88, NET_IPV4_ICMP_RATELIMIT=89, - NET_IPV4_ICMP_RATEMASK=90 + NET_IPV4_ICMP_RATEMASK=90, + NET_TCP_HOSTS=91 }; enum { Index: include/linux/tcp.h =================================================================== RCS file: /work/jeff519/cvsroot/linux/include/linux/tcp.h,v retrieving revision 1.1 retrieving revision 1.4 diff -u -r1.1 -r1.4 --- include/linux/tcp.h 3 Apr 2002 00:45:38 -0000 1.1 +++ include/linux/tcp.h 15 Apr 2002 21:59:46 -0000 1.4 @@ -127,12 +127,18 @@ #define TCP_WINDOW_CLAMP 10 /* Bound advertised window */ #define TCP_INFO 11 /* Information about this connection. */ #define TCP_QUICKACK 12 /* Block/reenable quick acks */ +#define TCP_HOSTS 42 /* TCP with host names */ #define TCPI_OPT_TIMESTAMPS 1 #define TCPI_OPT_SACK 2 #define TCPI_OPT_WSCALE 4 #define TCPI_OPT_ECN 8 +/* Maximal FQDN. */ +/* XXX: is this the right place for this? */ +#define TCP_MAX_HOST_LEN 255 + + enum tcp_ca_state { TCP_CA_Open = 0, @@ -183,6 +189,14 @@ __u32 tcpi_snd_cwnd; __u32 tcpi_advmss; __u32 tcpi_reordering; +}; + +struct host_info +{ + __u8 rcv_host[TCP_MAX_HOST_LEN + 1]; /* hostname for receiver*/ + __u8 rcv_host_len; /* length of rcv_host */ + __u8 snd_host[TCP_MAX_HOST_LEN + 1]; /* hostname for sender */ + __u8 snd_host_len; /* length of snd_host */ }; #endif /* _LINUX_TCP_H */ Index: include/net/sock.h =================================================================== RCS file: /work/jeff519/cvsroot/linux/include/net/sock.h,v retrieving revision 1.1 retrieving revision 1.3 diff -u -r1.1 -r1.3 --- include/net/sock.h 3 Apr 2002 00:45:40 -0000 1.1 +++ include/net/sock.h 12 Apr 2002 22:11:04 -0000 1.3 @@ -349,12 +349,14 @@ */ char tstamp_ok, /* TIMESTAMP seen on SYN packet */ wscale_ok, /* Wscale seen on SYN packet */ - sack_ok; /* SACK seen on SYN packet */ + sack_ok, /* SACK seen on SYN packet */ + hosts_ok; /* HOSTS seen on SYN packet */ char saw_tstamp; /* Saw TIMESTAMP on last packet */ __u8 snd_wscale; /* Window scaling received from sender */ __u8 rcv_wscale; /* Window scaling to send to receiver */ __u8 nonagle; /* Disable Nagle algorithm? */ __u8 keepalive_probes; /* num of allowed keep alive probes */ + struct host_info host; /* Host name info for the TCP_HOST opt */ /* PAWS/RTTM data */ __u32 rcv_tsval; /* Time stamp value */ Index: include/net/tcp.h =================================================================== RCS file: /work/jeff519/cvsroot/linux/include/net/tcp.h,v retrieving revision 1.1 retrieving revision 1.5 diff -u -r1.1 -r1.5 --- include/net/tcp.h 3 Apr 2002 00:45:40 -0000 1.1 +++ include/net/tcp.h 15 Apr 2002 03:04:37 -0000 1.5 @@ -262,6 +262,14 @@ #define MAX_TCP_HEADER (128 + MAX_HEADER) +/* For the HOSTS option. */ +#define TCP_HOSTS_CSUM_SIZE 2 +/* + * In case the HOSTS option is being used, there needs to be room + * in the SYN for some data (namely a checksum and two hostnames). + */ +#define MAX_TCP_SYN_SIZE (MAX_TCP_HEADER + TCP_HOSTS_CSUM_SIZE + (2 * TCP_MAX_HOST_LEN)) + /* * Never offer a window over 32767 without using window scaling. Some * poor stacks do signed 16bit maths! @@ -405,6 +413,8 @@ #define TCPOPT_SACK_PERM 4 /* SACK Permitted */ #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ +/* XXX this option is nonstandard and not approved by IANA --jeff@litech.org */ +#define TCPOPT_HOSTS 42 /* TCP Host header */ /* * TCP option lengths @@ -414,6 +424,7 @@ #define TCPOLEN_WINDOW 3 #define TCPOLEN_SACK_PERM 2 #define TCPOLEN_TIMESTAMP 10 +#define TCPOLEN_HOSTS 6 /* But this is what stacks really send out. */ #define TCPOLEN_TSTAMP_ALIGNED 12 @@ -422,6 +433,7 @@ #define TCPOLEN_SACK_BASE 2 #define TCPOLEN_SACK_BASE_ALIGNED 4 #define TCPOLEN_SACK_PERBLOCK 8 +#define TCPOLEN_HOSTS_ALIGNED 8 #define TCP_TIME_RETRANS 1 /* Retransmit timer */ #define TCP_TIME_DACK 2 /* Delayed ack timer */ @@ -433,6 +445,7 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; +extern int sysctl_tcp_hosts; extern int sysctl_tcp_fin_timeout; extern int sysctl_tcp_tw_recycle; extern int sysctl_tcp_keepalive_time; @@ -505,7 +518,12 @@ sack_ok : 1, wscale_ok : 1, ecn_ok : 1, - acked : 1; + acked : 1, + hosts_ok : 1; + /* XXX This is awfully big to be inlined here, but I don't + * have any better idea...we need the information. + * -- jeff@litech.org */ + struct host_info host; /* The following two fields can be easily recomputed I think -AK */ __u32 window_clamp; /* window clamp at creation time */ __u32 rcv_wnd; /* rcv_wnd offered first time */ @@ -1444,7 +1462,8 @@ * can generate. */ static inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack, - int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent) + int offer_hosts, __u16 hosts_offset, struct host_info *hosti, + int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent) { /* We always get an MSS option. * The option bytes which will be seen in normal data @@ -1474,6 +1493,10 @@ (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); if (offer_wscale) *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale)); + if (offer_hosts) { + *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_HOSTS << 8) | TCPOLEN_HOSTS); + *ptr++ = htonl((hosts_offset << 16) | (hosti->snd_host_len << 8) | hosti->rcv_host_len); + } } /* Determine a window scaling and initial window to offer. @@ -1658,6 +1681,8 @@ req->sack_ok = tp->sack_ok; req->snd_wscale = tp->snd_wscale; req->wscale_ok = tp->wscale_ok; + req->hosts_ok = tp->hosts_ok; + memcpy(&req->host, &tp->host, sizeof(struct host_info)); req->acked = 0; req->ecn_ok = 0; req->rmt_port = skb->h.th->source; Index: net/ipv4/sysctl_net_ipv4.c =================================================================== RCS file: /work/jeff519/cvsroot/linux/net/ipv4/sysctl_net_ipv4.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- net/ipv4/sysctl_net_ipv4.c 3 Apr 2002 00:45:43 -0000 1.1 +++ net/ipv4/sysctl_net_ipv4.c 14 Apr 2002 21:23:38 -0000 1.2 @@ -1,7 +1,7 @@ /* * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem. * - * $Id: sysctl_net_ipv4.c,v 1.1 2002/04/03 00:45:43 jed Exp $ + * $Id: sysctl_net_ipv4.c,v 1.2 2002/04/14 21:23:38 jeff Exp $ * * Begun April 1, 1996, Mike Shaver. * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS] @@ -219,6 +219,8 @@ &sysctl_icmp_ratelimit, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ICMP_RATEMASK, "icmp_ratemask", &sysctl_icmp_ratemask, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_TCP_HOSTS, "tcp_hosts", + &sysctl_tcp_hosts, sizeof(int), 0644, NULL, &proc_dointvec}, {0} }; Index: net/ipv4/tcp.c =================================================================== RCS file: /work/jeff519/cvsroot/linux/net/ipv4/tcp.c,v retrieving revision 1.1 retrieving revision 1.9 diff -u -r1.1 -r1.9 --- net/ipv4/tcp.c 3 Apr 2002 00:45:43 -0000 1.1 +++ net/ipv4/tcp.c 22 Apr 2002 00:48:07 -0000 1.9 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.1 2002/04/03 00:45:43 jed Exp $ + * Version: $Id: tcp.c,v 1.9 2002/04/22 00:48:07 jed Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -2305,6 +2305,15 @@ } break; + case TCP_HOSTS: + if (optlen != sizeof (struct host_info)) { + return -EINVAL; + } + if (copy_from_user(&(tp->host), optval, optlen)) + return -EFAULT; + tp->hosts_ok = 1; + break; + default: err = -ENOPROTOOPT; break; @@ -2426,9 +2435,27 @@ return -EFAULT; return 0; } + case TCP_QUICKACK: val = !tp->ack.pingpong; break; + + case TCP_HOSTS: +#define MIN(a,b) (a < b ? a : b) + if(get_user(len,optlen)) + return -EFAULT; + if(!tp->hosts_ok) { + return -EOPNOTSUPP; + } + + len = MIN(len, sizeof(struct host_info)); + if(copy_to_user(optval, &(tp->host), len)) + return -EFAULT; + if(put_user(MIN(tp->host.rcv_host_len, len), optlen)) + return -EFAULT; + return 0; +#undef MIN + default: return -ENOPROTOOPT; }; Index: net/ipv4/tcp_input.c =================================================================== RCS file: /work/jeff519/cvsroot/linux/net/ipv4/tcp_input.c,v retrieving revision 1.1 retrieving revision 1.11 diff -u -r1.1 -r1.11 --- net/ipv4/tcp_input.c 3 Apr 2002 00:45:43 -0000 1.1 +++ net/ipv4/tcp_input.c 22 Apr 2002 00:48:07 -0000 1.11 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.1 2002/04/03 00:45:43 jed Exp $ + * Version: $Id: tcp_input.c,v 1.11 2002/04/22 00:48:07 jed Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -72,6 +72,7 @@ int sysctl_tcp_timestamps = 1; int sysctl_tcp_window_scaling = 1; int sysctl_tcp_sack = 1; +int sysctl_tcp_hosts = 1; int sysctl_tcp_fack = 1; int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; #ifdef CONFIG_INET_ECN @@ -2067,6 +2068,39 @@ tp->sack_ok) { TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th; } + break; + + case TCPOPT_HOSTS: + if(opsize==TCPOLEN_HOSTS && th->syn && !estab) { + int len; + unsigned long option = ntohl(*(__u32*)ptr); + __u8 *p; + __u16 offset; + __u16 csum; + + tp->host.snd_host_len = option & 0xff; + option >>= 8; + tp->host.rcv_host_len = option & 0xff; + option >>= 8; + offset = option & 0xffff; + + len = 2 + tp->host.snd_host_len + tp->host.rcv_host_len; + + if(offset + len > skb->len - 4 * th->doff) { + break; + } + p = skb->data + 4 * th->doff + offset; + csum = *(__u16*)p; + p += TCP_HOSTS_CSUM_SIZE; + memcpy(tp->host.rcv_host, p, tp->host.rcv_host_len); + p += tp->host.rcv_host_len; + memcpy(tp->host.snd_host, p, tp->host.snd_host_len); + tp->hosts_ok = 1; + + tp->host.snd_host[tp->host.snd_host_len] = '\0'; + tp->host.rcv_host[tp->host.rcv_host_len] = '\0'; + } + break; }; ptr+=opsize-2; length-=opsize; @@ -3444,7 +3478,16 @@ * We do not send data with SYN, so that RFC-correct * test reduces to: */ - if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt) + + int data_length = 0; + /* length of syn data we added */ + if (tp->hosts_ok) + data_length = TCP_HOSTS_CSUM_SIZE + + tp->host.snd_host_len + tp->host.snd_host_len; + + if ((TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt) && + (!tp->hosts_ok || + (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt + data_length))) goto reset_and_undo; if (tp->saw_tstamp && tp->rcv_tsecr && Index: net/ipv4/tcp_ipv4.c =================================================================== RCS file: /work/jeff519/cvsroot/linux/net/ipv4/tcp_ipv4.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- net/ipv4/tcp_ipv4.c 3 Apr 2002 00:45:43 -0000 1.1 +++ net/ipv4/tcp_ipv4.c 13 Apr 2002 21:45:54 -0000 1.2 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.1 2002/04/03 00:45:43 jed Exp $ + * Version: $Id: tcp_ipv4.c,v 1.2 2002/04/13 21:45:54 jeff Exp $ * * IPv4 specific functions * @@ -683,7 +683,7 @@ daddr = rt->rt_dst; err = -ENOBUFS; - buff = alloc_skb(MAX_TCP_HEADER + 15, sk->allocation); + buff = alloc_skb(MAX_TCP_SYN_SIZE + 15, sk->allocation); if (buff == NULL) goto failure; Index: net/ipv4/tcp_minisocks.c =================================================================== RCS file: /work/jeff519/cvsroot/linux/net/ipv4/tcp_minisocks.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- net/ipv4/tcp_minisocks.c 3 Apr 2002 00:45:43 -0000 1.1 +++ net/ipv4/tcp_minisocks.c 15 Apr 2002 03:04:40 -0000 1.2 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_minisocks.c,v 1.1 2002/04/03 00:45:43 jed Exp $ + * Version: $Id: tcp_minisocks.c,v 1.2 2002/04/15 03:04:40 jeff Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -770,6 +770,8 @@ } newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->snd_wscale; newtp->max_window = newtp->snd_wnd; + newtp->hosts_ok = req->hosts_ok; + memcpy(&newtp->host, &req->host, sizeof(struct host_info)); if (newtp->tstamp_ok) { newtp->ts_recent = req->ts_recent; Index: net/ipv4/tcp_output.c =================================================================== RCS file: /work/jeff519/cvsroot/linux/net/ipv4/tcp_output.c,v retrieving revision 1.1 retrieving revision 1.8 diff -u -r1.1 -r1.8 --- net/ipv4/tcp_output.c 3 Apr 2002 00:45:43 -0000 1.1 +++ net/ipv4/tcp_output.c 22 Apr 2002 00:48:07 -0000 1.8 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_output.c,v 1.1 2002/04/03 00:45:43 jed Exp $ + * Version: $Id: tcp_output.c,v 1.8 2002/04/22 00:48:07 jed Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -173,6 +173,39 @@ return new_win; } +static inline +char *tcp_skb_zero(struct sk_buff *skb, int len) { + unsigned char *to = skb_put(skb, len); + memset(to, '\0', len); + return to; +} + +static inline +int write_syn_data(struct host_info *hosti, char *data) { + unsigned int csum; + int len; + __u8 buff[2 * TCP_MAX_HOST_LEN + 1]; + + memcpy(buff, hosti->snd_host, hosti->snd_host_len); + memcpy(buff + hosti->snd_host_len, hosti->rcv_host, hosti->rcv_host_len); + len = hosti->snd_host_len + hosti->rcv_host_len; + + buff[len] = '\0'; + csum = csum_fold(csum_partial(buff, len, 0)); + + /* This is a crazy hack. We have to get it just right so that the + * checksum of the entire data segment is always zero. As a result, + * when the remote side computes the checksum, regardless of whether + * they include the data segment or not, they find that the TCP + * checksum matches. + */ + *(__u16*)data = csum; + data += TCP_HOSTS_CSUM_SIZE; + memcpy(data, hosti->snd_host, hosti->snd_host_len); + data += hosti->snd_host_len; + memcpy(data, hosti->rcv_host, hosti->rcv_host_len); + return len + TCP_HOSTS_CSUM_SIZE; +} /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial @@ -193,11 +226,14 @@ int tcp_header_size = tp->tcp_header_len; struct tcphdr *th; int sysctl_flags; + char *data = NULL; /* initialize to silence gcc */ + int data_offset; int err; #define SYSCTL_FLAG_TSTAMPS 0x1 #define SYSCTL_FLAG_WSCALE 0x2 #define SYSCTL_FLAG_SACK 0x4 +#define SYSCTL_FLAG_HOSTS 0x8 sysctl_flags = 0; if (tcb->flags & TCPCB_FLAG_SYN) { @@ -215,6 +251,10 @@ if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS)) tcp_header_size += TCPOLEN_SACKPERM_ALIGNED; } + if(sysctl_tcp_hosts && tp->hosts_ok) { + tcp_header_size += TCPOLEN_HOSTS_ALIGNED; + sysctl_flags |= SYSCTL_FLAG_HOSTS; + } } else if (tp->eff_sacks) { /* A SACK is 2 pad bytes, a 2 byte header, plus * 2 32-bit sequence numbers for each SACK block. @@ -250,14 +290,27 @@ } if (tcb->flags & TCPCB_FLAG_SYN) { + data_offset = skb->len - tcp_header_size; tcp_syn_build_options((__u32 *)(th + 1), tcp_advertise_mss(sk), (sysctl_flags & SYSCTL_FLAG_TSTAMPS), (sysctl_flags & SYSCTL_FLAG_SACK), + (sysctl_flags & SYSCTL_FLAG_HOSTS), + data_offset, + &tp->host, (sysctl_flags & SYSCTL_FLAG_WSCALE), tp->rcv_wscale, tcb->when, tp->ts_recent); + if(sysctl_flags & SYSCTL_FLAG_HOSTS) + /* Update the length to account for what will + * go in the data section, including the + * special HOSTS checksum. + */ + data = tcp_skb_zero(skb, + tp->host.snd_host_len + + tp->host.rcv_host_len + + TCP_HOSTS_CSUM_SIZE); } else { tcp_build_and_update_options((__u32 *)(th + 1), tp, tcb->when); @@ -269,8 +322,19 @@ if (tcb->flags & TCPCB_FLAG_ACK) tcp_event_ack_sent(sk); - if (skb->len != tcp_header_size) - tcp_event_data_sent(tp, skb); + if (tcb->flags & TCPCB_FLAG_SYN) { + /* Now that the checksum has been calculated with the + * length long enough for what will be in the data + * segment, but not actually including that data, we + * put the data in, including the HOSTS checksum. + */ + if(sysctl_flags & SYSCTL_FLAG_HOSTS) + data_offset += write_syn_data(&tp->host, data); + } else { + /* Ignore payload in SYN packet. */ + if (skb->len != tcp_header_size) + tcp_event_data_sent(tp, skb); + } TCP_INC_STATS(TcpOutSegs); @@ -292,6 +356,7 @@ #undef SYSCTL_FLAG_TSTAMPS #undef SYSCTL_FLAG_WSCALE #undef SYSCTL_FLAG_SACK +#undef SYSCTL_FLAG_HOSTS } @@ -1147,8 +1212,8 @@ TCP_SKB_CB(skb)->when = tcp_time_stamp; tcp_syn_build_options((__u32 *)(th + 1), dst->advmss, req->tstamp_ok, - req->sack_ok, req->wscale_ok, req->rcv_wscale, - TCP_SKB_CB(skb)->when, + req->sack_ok, 0, 0, NULL, req->wscale_ok, + req->rcv_wscale, TCP_SKB_CB(skb)->when, req->ts_recent); skb->csum = 0;