ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/slirp/tcp_input.c
Revision: 1.3
Committed: 2010-08-22T19:43:29Z (14 years, 3 months ago) by asvitkine
Content type: text/plain
Branch: MAIN
Changes since 1.2: +1 -0 lines
Log Message:
Fix warning with undeclared free() with clang

File Contents

# User Rev Content
1 gbeauche 1.1 /*
2     * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
3     * The Regents of the University of California. All rights reserved.
4     *
5     * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions
7     * are met:
8     * 1. Redistributions of source code must retain the above copyright
9     * notice, this list of conditions and the following disclaimer.
10     * 2. Redistributions in binary form must reproduce the above copyright
11     * notice, this list of conditions and the following disclaimer in the
12     * documentation and/or other materials provided with the distribution.
13     * 3. All advertising materials mentioning features or use of this software
14     * must display the following acknowledgement:
15     * This product includes software developed by the University of
16     * California, Berkeley and its contributors.
17     * 4. Neither the name of the University nor the names of its contributors
18     * may be used to endorse or promote products derived from this software
19     * without specific prior written permission.
20     *
21     * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22     * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24     * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25     * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26     * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27     * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28     * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29     * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30     * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31     * SUCH DAMAGE.
32     *
33     * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94
34     * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp
35     */
36    
37     /*
38     * Changes and additions relating to SLiRP
39     * Copyright (c) 1995 Danny Gasparovski.
40     *
41     * Please read the file COPYRIGHT for the
42     * terms and conditions of the copyright.
43     */
44    
45 asvitkine 1.3 #include <stdlib.h>
46 gbeauche 1.1 #include <slirp.h>
47     #include "ip_icmp.h"
48    
49     struct socket tcb;
50    
51     int tcprexmtthresh = 3;
52     struct socket *tcp_last_so = &tcb;
53    
54     tcp_seq tcp_iss; /* tcp initial send seq # */
55    
56     #define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ)
57    
58     /* for modulo comparisons of timestamps */
59     #define TSTMP_LT(a,b) ((int)((a)-(b)) < 0)
60     #define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0)
61    
62     /*
63     * Insert segment ti into reassembly queue of tcp with
64     * control block tp. Return TH_FIN if reassembly now includes
65     * a segment with FIN. The macro form does the common case inline
66     * (segment is the next to be received on an established connection,
67     * and the queue is empty), avoiding linkage into and removal
68     * from the queue and repetition of various conversions.
69     * Set DELACK for segments received in order, but ack immediately
70     * when segments are out of order (so fast retransmit can work).
71     */
72     #ifdef TCP_ACK_HACK
73     #define TCP_REASS(tp, ti, m, so, flags) {\
74     if ((ti)->ti_seq == (tp)->rcv_nxt && \
75     (tp)->seg_next == (tcpiphdrp_32)(tp) && \
76     (tp)->t_state == TCPS_ESTABLISHED) {\
77     if (ti->ti_flags & TH_PUSH) \
78     tp->t_flags |= TF_ACKNOW; \
79     else \
80     tp->t_flags |= TF_DELACK; \
81     (tp)->rcv_nxt += (ti)->ti_len; \
82     flags = (ti)->ti_flags & TH_FIN; \
83     tcpstat.tcps_rcvpack++;\
84     tcpstat.tcps_rcvbyte += (ti)->ti_len;\
85     if (so->so_emu) { \
86     if (tcp_emu((so),(m))) sbappend((so), (m)); \
87     } else \
88     sbappend((so), (m)); \
89     /* sorwakeup(so); */ \
90     } else {\
91     (flags) = tcp_reass((tp), (ti), (m)); \
92     tp->t_flags |= TF_ACKNOW; \
93     } \
94     }
95     #else
96     #define TCP_REASS(tp, ti, m, so, flags) { \
97     if ((ti)->ti_seq == (tp)->rcv_nxt && \
98     (tp)->seg_next == (tcpiphdrp_32)(tp) && \
99     (tp)->t_state == TCPS_ESTABLISHED) { \
100     tp->t_flags |= TF_DELACK; \
101     (tp)->rcv_nxt += (ti)->ti_len; \
102     flags = (ti)->ti_flags & TH_FIN; \
103     tcpstat.tcps_rcvpack++;\
104     tcpstat.tcps_rcvbyte += (ti)->ti_len;\
105     if (so->so_emu) { \
106     if (tcp_emu((so),(m))) sbappend(so, (m)); \
107     } else \
108     sbappend((so), (m)); \
109     /* sorwakeup(so); */ \
110     } else { \
111     (flags) = tcp_reass((tp), (ti), (m)); \
112     tp->t_flags |= TF_ACKNOW; \
113     } \
114     }
115     #endif
116    
117     int
118     tcp_reass(tp, ti, m)
119     register struct tcpcb *tp;
120     register struct tcpiphdr *ti;
121     struct mbuf *m;
122     {
123     register struct tcpiphdr *q;
124     struct socket *so = tp->t_socket;
125     int flags;
126    
127     /*
128     * Call with ti==0 after become established to
129     * force pre-ESTABLISHED data up to user socket.
130     */
131     if (ti == 0)
132     goto present;
133    
134     /*
135     * Find a segment which begins after this one does.
136     */
137     for (q = (struct tcpiphdr *)tp->seg_next; q != (struct tcpiphdr *)tp;
138     q = (struct tcpiphdr *)q->ti_next)
139     if (SEQ_GT(q->ti_seq, ti->ti_seq))
140     break;
141    
142     /*
143     * If there is a preceding segment, it may provide some of
144     * our data already. If so, drop the data from the incoming
145     * segment. If it provides all of our data, drop us.
146     */
147     if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {
148     register int i;
149     q = (struct tcpiphdr *)q->ti_prev;
150     /* conversion to int (in i) handles seq wraparound */
151     i = q->ti_seq + q->ti_len - ti->ti_seq;
152     if (i > 0) {
153     if (i >= ti->ti_len) {
154     tcpstat.tcps_rcvduppack++;
155     tcpstat.tcps_rcvdupbyte += ti->ti_len;
156     m_freem(m);
157     /*
158     * Try to present any queued data
159     * at the left window edge to the user.
160     * This is needed after the 3-WHS
161     * completes.
162     */
163     goto present; /* ??? */
164     }
165     m_adj(m, i);
166     ti->ti_len -= i;
167     ti->ti_seq += i;
168     }
169     q = (struct tcpiphdr *)(q->ti_next);
170     }
171     tcpstat.tcps_rcvoopack++;
172     tcpstat.tcps_rcvoobyte += ti->ti_len;
173     REASS_MBUF(ti) = (mbufp_32) m; /* XXX */
174    
175     /*
176     * While we overlap succeeding segments trim them or,
177     * if they are completely covered, dequeue them.
178     */
179     while (q != (struct tcpiphdr *)tp) {
180     register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
181     if (i <= 0)
182     break;
183     if (i < q->ti_len) {
184     q->ti_seq += i;
185     q->ti_len -= i;
186     m_adj((struct mbuf *) REASS_MBUF(q), i);
187     break;
188     }
189     q = (struct tcpiphdr *)q->ti_next;
190     m = (struct mbuf *) REASS_MBUF((struct tcpiphdr *)q->ti_prev);
191     remque_32((void *)(q->ti_prev));
192     m_freem(m);
193     }
194    
195     /*
196     * Stick new segment in its place.
197     */
198     insque_32(ti, (void *)(q->ti_prev));
199    
200     present:
201     /*
202     * Present data to user, advancing rcv_nxt through
203     * completed sequence space.
204     */
205     if (!TCPS_HAVEESTABLISHED(tp->t_state))
206     return (0);
207     ti = (struct tcpiphdr *) tp->seg_next;
208     if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt)
209     return (0);
210     if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)
211     return (0);
212     do {
213     tp->rcv_nxt += ti->ti_len;
214     flags = ti->ti_flags & TH_FIN;
215     remque_32(ti);
216     m = (struct mbuf *) REASS_MBUF(ti); /* XXX */
217     ti = (struct tcpiphdr *)ti->ti_next;
218     /* if (so->so_state & SS_FCANTRCVMORE) */
219     if (so->so_state & SS_FCANTSENDMORE)
220     m_freem(m);
221     else {
222     if (so->so_emu) {
223     if (tcp_emu(so,m)) sbappend(so, m);
224     } else
225     sbappend(so, m);
226     }
227     } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);
228     /* sorwakeup(so); */
229     return (flags);
230     }
231    
232     /*
233     * TCP input routine, follows pages 65-76 of the
234     * protocol specification dated September, 1981 very closely.
235     */
236     void
237     tcp_input(m, iphlen, inso)
238     register struct mbuf *m;
239     int iphlen;
240     struct socket *inso;
241     {
242     struct ip save_ip, *ip;
243     register struct tcpiphdr *ti;
244     caddr_t optp = NULL;
245     int optlen = 0;
246     int len, tlen, off;
247     register struct tcpcb *tp = 0;
248     register int tiflags;
249     struct socket *so = 0;
250     int todrop, acked, ourfinisacked, needoutput = 0;
251     /* int dropsocket = 0; */
252     int iss = 0;
253     u_long tiwin;
254     int ret;
255     /* int ts_present = 0; */
256    
257     DEBUG_CALL("tcp_input");
258     DEBUG_ARGS((dfd," m = %8lx iphlen = %2d inso = %lx\n",
259     (long )m, iphlen, (long )inso ));
260    
261     /*
262     * If called with m == 0, then we're continuing the connect
263     */
264     if (m == NULL) {
265     so = inso;
266    
267     /* Re-set a few variables */
268     tp = sototcpcb(so);
269     m = so->so_m;
270     so->so_m = 0;
271     ti = so->so_ti;
272     tiwin = ti->ti_win;
273     tiflags = ti->ti_flags;
274    
275     goto cont_conn;
276     }
277    
278    
279     tcpstat.tcps_rcvtotal++;
280     /*
281     * Get IP and TCP header together in first mbuf.
282     * Note: IP leaves IP header in first mbuf.
283     */
284     ti = mtod(m, struct tcpiphdr *);
285     if (iphlen > sizeof(struct ip )) {
286     ip_stripoptions(m, (struct mbuf *)0);
287     iphlen=sizeof(struct ip );
288     }
289     /* XXX Check if too short */
290    
291    
292     /*
293     * Save a copy of the IP header in case we want restore it
294     * for sending an ICMP error message in response.
295     */
296     ip=mtod(m, struct ip *);
297     save_ip = *ip;
298     save_ip.ip_len+= iphlen;
299    
300     /*
301     * Checksum extended TCP header and data.
302     */
303     tlen = ((struct ip *)ti)->ip_len;
304     ti->ti_next = ti->ti_prev = 0;
305     ti->ti_x1 = 0;
306     ti->ti_len = htons((u_int16_t)tlen);
307     len = sizeof(struct ip ) + tlen;
308     /* keep checksum for ICMP reply
309     * ti->ti_sum = cksum(m, len);
310     * if (ti->ti_sum) { */
311     if(cksum(m, len)) {
312     tcpstat.tcps_rcvbadsum++;
313     goto drop;
314     }
315    
316     /*
317     * Check that TCP offset makes sense,
318     * pull out TCP options and adjust length. XXX
319     */
320     off = ti->ti_off << 2;
321     if (off < sizeof (struct tcphdr) || off > tlen) {
322     tcpstat.tcps_rcvbadoff++;
323     goto drop;
324     }
325     tlen -= off;
326     ti->ti_len = tlen;
327     if (off > sizeof (struct tcphdr)) {
328     optlen = off - sizeof (struct tcphdr);
329     optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
330    
331     /*
332     * Do quick retrieval of timestamp options ("options
333     * prediction?"). If timestamp is the only option and it's
334     * formatted as recommended in RFC 1323 appendix A, we
335     * quickly get the values now and not bother calling
336     * tcp_dooptions(), etc.
337     */
338     /* if ((optlen == TCPOLEN_TSTAMP_APPA ||
339     * (optlen > TCPOLEN_TSTAMP_APPA &&
340     * optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
341     * *(u_int32_t *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
342     * (ti->ti_flags & TH_SYN) == 0) {
343     * ts_present = 1;
344     * ts_val = ntohl(*(u_int32_t *)(optp + 4));
345     * ts_ecr = ntohl(*(u_int32_t *)(optp + 8));
346     * optp = NULL; / * we've parsed the options * /
347     * }
348     */
349     }
350     tiflags = ti->ti_flags;
351    
352     /*
353     * Convert TCP protocol specific fields to host format.
354     */
355     NTOHL(ti->ti_seq);
356     NTOHL(ti->ti_ack);
357     NTOHS(ti->ti_win);
358     NTOHS(ti->ti_urp);
359    
360     /*
361     * Drop TCP, IP headers and TCP options.
362     */
363     m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
364     m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
365    
366     /*
367     * Locate pcb for segment.
368     */
369     findso:
370     so = tcp_last_so;
371     if (so->so_fport != ti->ti_dport ||
372     so->so_lport != ti->ti_sport ||
373     so->so_laddr.s_addr != ti->ti_src.s_addr ||
374     so->so_faddr.s_addr != ti->ti_dst.s_addr) {
375     so = solookup(&tcb, ti->ti_src, ti->ti_sport,
376     ti->ti_dst, ti->ti_dport);
377     if (so)
378     tcp_last_so = so;
379     ++tcpstat.tcps_socachemiss;
380     }
381    
382     /*
383     * If the state is CLOSED (i.e., TCB does not exist) then
384     * all data in the incoming segment is discarded.
385     * If the TCB exists but is in CLOSED state, it is embryonic,
386     * but should either do a listen or a connect soon.
387     *
388     * state == CLOSED means we've done socreate() but haven't
389     * attached it to a protocol yet...
390     *
391     * XXX If a TCB does not exist, and the TH_SYN flag is
392     * the only flag set, then create a session, mark it
393     * as if it was LISTENING, and continue...
394     */
395     if (so == 0) {
396     if ((tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) != TH_SYN)
397     goto dropwithreset;
398    
399     if ((so = socreate()) == NULL)
400     goto dropwithreset;
401     if (tcp_attach(so) < 0) {
402     free(so); /* Not sofree (if it failed, it's not insqued) */
403     goto dropwithreset;
404     }
405    
406     sbreserve(&so->so_snd, tcp_sndspace);
407     sbreserve(&so->so_rcv, tcp_rcvspace);
408    
409     /* tcp_last_so = so; */ /* XXX ? */
410     /* tp = sototcpcb(so); */
411    
412     so->so_laddr = ti->ti_src;
413     so->so_lport = ti->ti_sport;
414     so->so_faddr = ti->ti_dst;
415     so->so_fport = ti->ti_dport;
416    
417     if ((so->so_iptos = tcp_tos(so)) == 0)
418     so->so_iptos = ((struct ip *)ti)->ip_tos;
419    
420     tp = sototcpcb(so);
421     tp->t_state = TCPS_LISTEN;
422     }
423    
424     /*
425     * If this is a still-connecting socket, this probably
426     * a retransmit of the SYN. Whether it's a retransmit SYN
427     * or something else, we nuke it.
428     */
429     if (so->so_state & SS_ISFCONNECTING)
430     goto drop;
431    
432     tp = sototcpcb(so);
433    
434     /* XXX Should never fail */
435     if (tp == 0)
436     goto dropwithreset;
437     if (tp->t_state == TCPS_CLOSED)
438     goto drop;
439    
440     /* Unscale the window into a 32-bit value. */
441     /* if ((tiflags & TH_SYN) == 0)
442     * tiwin = ti->ti_win << tp->snd_scale;
443     * else
444     */
445     tiwin = ti->ti_win;
446    
447     /*
448     * Segment received on connection.
449     * Reset idle time and keep-alive timer.
450     */
451     tp->t_idle = 0;
452     if (so_options)
453     tp->t_timer[TCPT_KEEP] = tcp_keepintvl;
454     else
455     tp->t_timer[TCPT_KEEP] = tcp_keepidle;
456    
457     /*
458     * Process options if not in LISTEN state,
459     * else do it below (after getting remote address).
460     */
461     if (optp && tp->t_state != TCPS_LISTEN)
462     tcp_dooptions(tp, (u_char *)optp, optlen, ti);
463     /* , */
464     /* &ts_present, &ts_val, &ts_ecr); */
465    
466     /*
467     * Header prediction: check for the two common cases
468     * of a uni-directional data xfer. If the packet has
469     * no control flags, is in-sequence, the window didn't
470     * change and we're not retransmitting, it's a
471     * candidate. If the length is zero and the ack moved
472     * forward, we're the sender side of the xfer. Just
473     * free the data acked & wake any higher level process
474     * that was blocked waiting for space. If the length
475     * is non-zero and the ack didn't move, we're the
476     * receiver side. If we're getting packets in-order
477     * (the reassembly queue is empty), add the data to
478     * the socket buffer and note that we need a delayed ack.
479     *
480     * XXX Some of these tests are not needed
481     * eg: the tiwin == tp->snd_wnd prevents many more
482     * predictions.. with no *real* advantage..
483     */
484     if (tp->t_state == TCPS_ESTABLISHED &&
485     (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
486     /* (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) && */
487     ti->ti_seq == tp->rcv_nxt &&
488     tiwin && tiwin == tp->snd_wnd &&
489     tp->snd_nxt == tp->snd_max) {
490     /*
491     * If last ACK falls within this segment's sequence numbers,
492     * record the timestamp.
493     */
494     /* if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
495     * SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len)) {
496     * tp->ts_recent_age = tcp_now;
497     * tp->ts_recent = ts_val;
498     * }
499     */
500     if (ti->ti_len == 0) {
501     if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
502     SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
503     tp->snd_cwnd >= tp->snd_wnd) {
504     /*
505     * this is a pure ack for outstanding data.
506     */
507     ++tcpstat.tcps_predack;
508     /* if (ts_present)
509     * tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
510     * else
511     */ if (tp->t_rtt &&
512     SEQ_GT(ti->ti_ack, tp->t_rtseq))
513     tcp_xmit_timer(tp, tp->t_rtt);
514     acked = ti->ti_ack - tp->snd_una;
515     tcpstat.tcps_rcvackpack++;
516     tcpstat.tcps_rcvackbyte += acked;
517     sbdrop(&so->so_snd, acked);
518     tp->snd_una = ti->ti_ack;
519     m_freem(m);
520    
521     /*
522     * If all outstanding data are acked, stop
523     * retransmit timer, otherwise restart timer
524     * using current (possibly backed-off) value.
525     * If process is waiting for space,
526     * wakeup/selwakeup/signal. If data
527     * are ready to send, let tcp_output
528     * decide between more output or persist.
529     */
530     if (tp->snd_una == tp->snd_max)
531     tp->t_timer[TCPT_REXMT] = 0;
532     else if (tp->t_timer[TCPT_PERSIST] == 0)
533     tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
534    
535     /*
536     * There's room in so_snd, sowwakup will read()
537     * from the socket if we can
538     */
539     /* if (so->so_snd.sb_flags & SB_NOTIFY)
540     * sowwakeup(so);
541     */
542     /*
543     * This is called because sowwakeup might have
544     * put data into so_snd. Since we don't so sowwakeup,
545     * we don't need this.. XXX???
546     */
547     if (so->so_snd.sb_cc)
548     (void) tcp_output(tp);
549    
550     return;
551     }
552     } else if (ti->ti_ack == tp->snd_una &&
553     tp->seg_next == (tcpiphdrp_32)tp &&
554     ti->ti_len <= sbspace(&so->so_rcv)) {
555     /*
556     * this is a pure, in-sequence data packet
557     * with nothing on the reassembly queue and
558     * we have enough buffer space to take it.
559     */
560     ++tcpstat.tcps_preddat;
561     tp->rcv_nxt += ti->ti_len;
562     tcpstat.tcps_rcvpack++;
563     tcpstat.tcps_rcvbyte += ti->ti_len;
564     /*
565     * Add data to socket buffer.
566     */
567     if (so->so_emu) {
568     if (tcp_emu(so,m)) sbappend(so, m);
569     } else
570     sbappend(so, m);
571    
572     /*
573     * XXX This is called when data arrives. Later, check
574     * if we can actually write() to the socket
575     * XXX Need to check? It's be NON_BLOCKING
576     */
577     /* sorwakeup(so); */
578    
579     /*
580     * If this is a short packet, then ACK now - with Nagel
581     * congestion avoidance sender won't send more until
582     * he gets an ACK.
583     *
584 gbeauche 1.2 * It is better to not delay acks at all to maximize
585     * TCP throughput. See RFC 2581.
586 gbeauche 1.1 */
587 gbeauche 1.2 tp->t_flags |= TF_ACKNOW;
588     tcp_output(tp);
589 gbeauche 1.1 return;
590     }
591     } /* header prediction */
592     /*
593     * Calculate amount of space in receive window,
594     * and then do TCP input processing.
595     * Receive window is amount of space in rcv queue,
596     * but not less than advertised window.
597     */
598     { int win;
599     win = sbspace(&so->so_rcv);
600     if (win < 0)
601     win = 0;
602     tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt));
603     }
604    
605     switch (tp->t_state) {
606    
607     /*
608     * If the state is LISTEN then ignore segment if it contains an RST.
609     * If the segment contains an ACK then it is bad and send a RST.
610     * If it does not contain a SYN then it is not interesting; drop it.
611     * Don't bother responding if the destination was a broadcast.
612     * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
613     * tp->iss, and send a segment:
614     * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
615     * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
616     * Fill in remote peer address fields if not previously specified.
617     * Enter SYN_RECEIVED state, and process any other fields of this
618     * segment in this state.
619     */
620     case TCPS_LISTEN: {
621    
622     if (tiflags & TH_RST)
623     goto drop;
624     if (tiflags & TH_ACK)
625     goto dropwithreset;
626     if ((tiflags & TH_SYN) == 0)
627     goto drop;
628    
629     /*
630     * This has way too many gotos...
631     * But a bit of spaghetti code never hurt anybody :)
632     */
633    
634     /*
635     * If this is destined for the control address, then flag to
636     * tcp_ctl once connected, otherwise connect
637     */
638     if ((so->so_faddr.s_addr&htonl(0xffffff00)) == special_addr.s_addr) {
639     int lastbyte=ntohl(so->so_faddr.s_addr) & 0xff;
640     if (lastbyte!=CTL_ALIAS && lastbyte!=CTL_DNS) {
641     #if 0
642     if(lastbyte==CTL_CMD || lastbyte==CTL_EXEC) {
643     /* Command or exec adress */
644     so->so_state |= SS_CTL;
645     } else
646     #endif
647     {
648     /* May be an add exec */
649     struct ex_list *ex_ptr;
650     for(ex_ptr = exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) {
651     if(ex_ptr->ex_fport == so->so_fport &&
652     lastbyte == ex_ptr->ex_addr) {
653     so->so_state |= SS_CTL;
654     break;
655     }
656     }
657     }
658     if(so->so_state & SS_CTL) goto cont_input;
659     }
660     /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */
661     }
662    
663     if (so->so_emu & EMU_NOCONNECT) {
664     so->so_emu &= ~EMU_NOCONNECT;
665     goto cont_input;
666     }
667    
668     if((tcp_fconnect(so) == -1) && (errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
669     u_char code=ICMP_UNREACH_NET;
670     DEBUG_MISC((dfd," tcp fconnect errno = %d-%s\n",
671     errno,strerror(errno)));
672     if(errno == ECONNREFUSED) {
673     /* ACK the SYN, send RST to refuse the connection */
674     tcp_respond(tp, ti, m, ti->ti_seq+1, (tcp_seq)0,
675     TH_RST|TH_ACK);
676     } else {
677     if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST;
678     HTONL(ti->ti_seq); /* restore tcp header */
679     HTONL(ti->ti_ack);
680     HTONS(ti->ti_win);
681     HTONS(ti->ti_urp);
682     m->m_data -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
683     m->m_len += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
684     *ip=save_ip;
685     icmp_error(m, ICMP_UNREACH,code, 0,strerror(errno));
686     }
687     tp = tcp_close(tp);
688     m_free(m);
689     } else {
690     /*
691     * Haven't connected yet, save the current mbuf
692     * and ti, and return
693     * XXX Some OS's don't tell us whether the connect()
694     * succeeded or not. So we must time it out.
695     */
696     so->so_m = m;
697     so->so_ti = ti;
698     tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
699     tp->t_state = TCPS_SYN_RECEIVED;
700     }
701     return;
702    
703     cont_conn:
704     /* m==NULL
705     * Check if the connect succeeded
706     */
707     if (so->so_state & SS_NOFDREF) {
708     tp = tcp_close(tp);
709     goto dropwithreset;
710     }
711     cont_input:
712     tcp_template(tp);
713    
714     if (optp)
715     tcp_dooptions(tp, (u_char *)optp, optlen, ti);
716     /* , */
717     /* &ts_present, &ts_val, &ts_ecr); */
718    
719     if (iss)
720     tp->iss = iss;
721     else
722     tp->iss = tcp_iss;
723     tcp_iss += TCP_ISSINCR/2;
724     tp->irs = ti->ti_seq;
725     tcp_sendseqinit(tp);
726     tcp_rcvseqinit(tp);
727     tp->t_flags |= TF_ACKNOW;
728     tp->t_state = TCPS_SYN_RECEIVED;
729     tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
730     tcpstat.tcps_accepts++;
731     goto trimthenstep6;
732     } /* case TCPS_LISTEN */
733    
734     /*
735     * If the state is SYN_SENT:
736     * if seg contains an ACK, but not for our SYN, drop the input.
737     * if seg contains a RST, then drop the connection.
738     * if seg does not contain SYN, then drop it.
739     * Otherwise this is an acceptable SYN segment
740     * initialize tp->rcv_nxt and tp->irs
741     * if seg contains ack then advance tp->snd_una
742     * if SYN has been acked change to ESTABLISHED else SYN_RCVD state
743     * arrange for segment to be acked (eventually)
744     * continue processing rest of data/controls, beginning with URG
745     */
746     case TCPS_SYN_SENT:
747     if ((tiflags & TH_ACK) &&
748     (SEQ_LEQ(ti->ti_ack, tp->iss) ||
749     SEQ_GT(ti->ti_ack, tp->snd_max)))
750     goto dropwithreset;
751    
752     if (tiflags & TH_RST) {
753     if (tiflags & TH_ACK)
754     tp = tcp_drop(tp,0); /* XXX Check t_softerror! */
755     goto drop;
756     }
757    
758     if ((tiflags & TH_SYN) == 0)
759     goto drop;
760     if (tiflags & TH_ACK) {
761     tp->snd_una = ti->ti_ack;
762     if (SEQ_LT(tp->snd_nxt, tp->snd_una))
763     tp->snd_nxt = tp->snd_una;
764     }
765    
766     tp->t_timer[TCPT_REXMT] = 0;
767     tp->irs = ti->ti_seq;
768     tcp_rcvseqinit(tp);
769     tp->t_flags |= TF_ACKNOW;
770     if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
771     tcpstat.tcps_connects++;
772     soisfconnected(so);
773     tp->t_state = TCPS_ESTABLISHED;
774    
775     /* Do window scaling on this connection? */
776     /* if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
777     * (TF_RCVD_SCALE|TF_REQ_SCALE)) {
778     * tp->snd_scale = tp->requested_s_scale;
779     * tp->rcv_scale = tp->request_r_scale;
780     * }
781     */
782     (void) tcp_reass(tp, (struct tcpiphdr *)0,
783     (struct mbuf *)0);
784     /*
785     * if we didn't have to retransmit the SYN,
786     * use its rtt as our initial srtt & rtt var.
787     */
788     if (tp->t_rtt)
789     tcp_xmit_timer(tp, tp->t_rtt);
790     } else
791     tp->t_state = TCPS_SYN_RECEIVED;
792    
793     trimthenstep6:
794     /*
795     * Advance ti->ti_seq to correspond to first data byte.
796     * If data, trim to stay within window,
797     * dropping FIN if necessary.
798     */
799     ti->ti_seq++;
800     if (ti->ti_len > tp->rcv_wnd) {
801     todrop = ti->ti_len - tp->rcv_wnd;
802     m_adj(m, -todrop);
803     ti->ti_len = tp->rcv_wnd;
804     tiflags &= ~TH_FIN;
805     tcpstat.tcps_rcvpackafterwin++;
806     tcpstat.tcps_rcvbyteafterwin += todrop;
807     }
808     tp->snd_wl1 = ti->ti_seq - 1;
809     tp->rcv_up = ti->ti_seq;
810     goto step6;
811     } /* switch tp->t_state */
812     /*
813     * States other than LISTEN or SYN_SENT.
814     * First check timestamp, if present.
815     * Then check that at least some bytes of segment are within
816     * receive window. If segment begins before rcv_nxt,
817     * drop leading data (and SYN); if nothing left, just ack.
818     *
819     * RFC 1323 PAWS: If we have a timestamp reply on this segment
820     * and it's less than ts_recent, drop it.
821     */
822     /* if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
823     * TSTMP_LT(ts_val, tp->ts_recent)) {
824     *
825     */ /* Check to see if ts_recent is over 24 days old. */
826     /* if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
827     */ /*
828     * * Invalidate ts_recent. If this segment updates
829     * * ts_recent, the age will be reset later and ts_recent
830     * * will get a valid value. If it does not, setting
831     * * ts_recent to zero will at least satisfy the
832     * * requirement that zero be placed in the timestamp
833     * * echo reply when ts_recent isn't valid. The
834     * * age isn't reset until we get a valid ts_recent
835     * * because we don't want out-of-order segments to be
836     * * dropped when ts_recent is old.
837     * */
838     /* tp->ts_recent = 0;
839     * } else {
840     * tcpstat.tcps_rcvduppack++;
841     * tcpstat.tcps_rcvdupbyte += ti->ti_len;
842     * tcpstat.tcps_pawsdrop++;
843     * goto dropafterack;
844     * }
845     * }
846     */
847    
848     todrop = tp->rcv_nxt - ti->ti_seq;
849     if (todrop > 0) {
850     if (tiflags & TH_SYN) {
851     tiflags &= ~TH_SYN;
852     ti->ti_seq++;
853     if (ti->ti_urp > 1)
854     ti->ti_urp--;
855     else
856     tiflags &= ~TH_URG;
857     todrop--;
858     }
859     /*
860     * Following if statement from Stevens, vol. 2, p. 960.
861     */
862     if (todrop > ti->ti_len
863     || (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) {
864     /*
865     * Any valid FIN must be to the left of the window.
866     * At this point the FIN must be a duplicate or out
867     * of sequence; drop it.
868     */
869     tiflags &= ~TH_FIN;
870    
871     /*
872     * Send an ACK to resynchronize and drop any data.
873     * But keep on processing for RST or ACK.
874     */
875     tp->t_flags |= TF_ACKNOW;
876     todrop = ti->ti_len;
877     tcpstat.tcps_rcvduppack++;
878     tcpstat.tcps_rcvdupbyte += todrop;
879     } else {
880     tcpstat.tcps_rcvpartduppack++;
881     tcpstat.tcps_rcvpartdupbyte += todrop;
882     }
883     m_adj(m, todrop);
884     ti->ti_seq += todrop;
885     ti->ti_len -= todrop;
886     if (ti->ti_urp > todrop)
887     ti->ti_urp -= todrop;
888     else {
889     tiflags &= ~TH_URG;
890     ti->ti_urp = 0;
891     }
892     }
893     /*
894     * If new data are received on a connection after the
895     * user processes are gone, then RST the other end.
896     */
897     if ((so->so_state & SS_NOFDREF) &&
898     tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
899     tp = tcp_close(tp);
900     tcpstat.tcps_rcvafterclose++;
901     goto dropwithreset;
902     }
903    
904     /*
905     * If segment ends after window, drop trailing data
906     * (and PUSH and FIN); if nothing left, just ACK.
907     */
908     todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
909     if (todrop > 0) {
910     tcpstat.tcps_rcvpackafterwin++;
911     if (todrop >= ti->ti_len) {
912     tcpstat.tcps_rcvbyteafterwin += ti->ti_len;
913     /*
914     * If a new connection request is received
915     * while in TIME_WAIT, drop the old connection
916     * and start over if the sequence numbers
917     * are above the previous ones.
918     */
919     if (tiflags & TH_SYN &&
920     tp->t_state == TCPS_TIME_WAIT &&
921     SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
922     iss = tp->rcv_nxt + TCP_ISSINCR;
923     tp = tcp_close(tp);
924     goto findso;
925     }
926     /*
927     * If window is closed can only take segments at
928     * window edge, and have to drop data and PUSH from
929     * incoming segments. Continue processing, but
930     * remember to ack. Otherwise, drop segment
931     * and ack.
932     */
933     if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
934     tp->t_flags |= TF_ACKNOW;
935     tcpstat.tcps_rcvwinprobe++;
936     } else
937     goto dropafterack;
938     } else
939     tcpstat.tcps_rcvbyteafterwin += todrop;
940     m_adj(m, -todrop);
941     ti->ti_len -= todrop;
942     tiflags &= ~(TH_PUSH|TH_FIN);
943     }
944    
945     /*
946     * If last ACK falls within this segment's sequence numbers,
947     * record its timestamp.
948     */
949     /* if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
950     * SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len +
951     * ((tiflags & (TH_SYN|TH_FIN)) != 0))) {
952     * tp->ts_recent_age = tcp_now;
953     * tp->ts_recent = ts_val;
954     * }
955     */
956    
957     /*
958     * If the RST bit is set examine the state:
959     * SYN_RECEIVED STATE:
960     * If passive open, return to LISTEN state.
961     * If active open, inform user that connection was refused.
962     * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
963     * Inform user that connection was reset, and close tcb.
964     * CLOSING, LAST_ACK, TIME_WAIT STATES
965     * Close the tcb.
966     */
967     if (tiflags&TH_RST) switch (tp->t_state) {
968    
969     case TCPS_SYN_RECEIVED:
970     /* so->so_error = ECONNREFUSED; */
971     goto close;
972    
973     case TCPS_ESTABLISHED:
974     case TCPS_FIN_WAIT_1:
975     case TCPS_FIN_WAIT_2:
976     case TCPS_CLOSE_WAIT:
977     /* so->so_error = ECONNRESET; */
978     close:
979     tp->t_state = TCPS_CLOSED;
980     tcpstat.tcps_drops++;
981     tp = tcp_close(tp);
982     goto drop;
983    
984     case TCPS_CLOSING:
985     case TCPS_LAST_ACK:
986     case TCPS_TIME_WAIT:
987     tp = tcp_close(tp);
988     goto drop;
989     }
990    
991     /*
992     * If a SYN is in the window, then this is an
993     * error and we send an RST and drop the connection.
994     */
995     if (tiflags & TH_SYN) {
996     tp = tcp_drop(tp,0);
997     goto dropwithreset;
998     }
999    
1000     /*
1001     * If the ACK bit is off we drop the segment and return.
1002     */
1003     if ((tiflags & TH_ACK) == 0) goto drop;
1004    
1005     /*
1006     * Ack processing.
1007     */
1008     switch (tp->t_state) {
1009     /*
1010     * In SYN_RECEIVED state if the ack ACKs our SYN then enter
1011     * ESTABLISHED state and continue processing, otherwise
1012     * send an RST. una<=ack<=max
1013     */
1014     case TCPS_SYN_RECEIVED:
1015    
1016     if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
1017     SEQ_GT(ti->ti_ack, tp->snd_max))
1018     goto dropwithreset;
1019     tcpstat.tcps_connects++;
1020     tp->t_state = TCPS_ESTABLISHED;
1021     /*
1022     * The sent SYN is ack'ed with our sequence number +1
1023     * The first data byte already in the buffer will get
1024     * lost if no correction is made. This is only needed for
1025     * SS_CTL since the buffer is empty otherwise.
1026     * tp->snd_una++; or:
1027     */
1028     tp->snd_una=ti->ti_ack;
1029     if (so->so_state & SS_CTL) {
1030     /* So tcp_ctl reports the right state */
1031     ret = tcp_ctl(so);
1032     if (ret == 1) {
1033     soisfconnected(so);
1034     so->so_state &= ~SS_CTL; /* success XXX */
1035     } else if (ret == 2) {
1036     so->so_state = SS_NOFDREF; /* CTL_CMD */
1037     } else {
1038     needoutput = 1;
1039     tp->t_state = TCPS_FIN_WAIT_1;
1040     }
1041     } else {
1042     soisfconnected(so);
1043     }
1044    
1045     /* Do window scaling? */
1046     /* if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
1047     * (TF_RCVD_SCALE|TF_REQ_SCALE)) {
1048     * tp->snd_scale = tp->requested_s_scale;
1049     * tp->rcv_scale = tp->request_r_scale;
1050     * }
1051     */
1052     (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
1053     tp->snd_wl1 = ti->ti_seq - 1;
1054     /* Avoid ack processing; snd_una==ti_ack => dup ack */
1055     goto synrx_to_est;
1056     /* fall into ... */
1057    
1058     /*
1059     * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
1060     * ACKs. If the ack is in the range
1061     * tp->snd_una < ti->ti_ack <= tp->snd_max
1062     * then advance tp->snd_una to ti->ti_ack and drop
1063     * data from the retransmission queue. If this ACK reflects
1064     * more up to date window information we update our window information.
1065     */
1066     case TCPS_ESTABLISHED:
1067     case TCPS_FIN_WAIT_1:
1068     case TCPS_FIN_WAIT_2:
1069     case TCPS_CLOSE_WAIT:
1070     case TCPS_CLOSING:
1071     case TCPS_LAST_ACK:
1072     case TCPS_TIME_WAIT:
1073    
1074     if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
1075     if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
1076     tcpstat.tcps_rcvdupack++;
1077     DEBUG_MISC((dfd," dup ack m = %lx so = %lx \n",
1078     (long )m, (long )so));
1079     /*
1080     * If we have outstanding data (other than
1081     * a window probe), this is a completely
1082     * duplicate ack (ie, window info didn't
1083     * change), the ack is the biggest we've
1084     * seen and we've seen exactly our rexmt
1085     * threshold of them, assume a packet
1086     * has been dropped and retransmit it.
1087     * Kludge snd_nxt & the congestion
1088     * window so we send only this one
1089     * packet.
1090     *
1091     * We know we're losing at the current
1092     * window size so do congestion avoidance
1093     * (set ssthresh to half the current window
1094     * and pull our congestion window back to
1095     * the new ssthresh).
1096     *
1097     * Dup acks mean that packets have left the
1098     * network (they're now cached at the receiver)
1099     * so bump cwnd by the amount in the receiver
1100     * to keep a constant cwnd packets in the
1101     * network.
1102     */
1103     if (tp->t_timer[TCPT_REXMT] == 0 ||
1104     ti->ti_ack != tp->snd_una)
1105     tp->t_dupacks = 0;
1106     else if (++tp->t_dupacks == tcprexmtthresh) {
1107     tcp_seq onxt = tp->snd_nxt;
1108     u_int win =
1109     min(tp->snd_wnd, tp->snd_cwnd) / 2 /
1110     tp->t_maxseg;
1111    
1112     if (win < 2)
1113     win = 2;
1114     tp->snd_ssthresh = win * tp->t_maxseg;
1115     tp->t_timer[TCPT_REXMT] = 0;
1116     tp->t_rtt = 0;
1117     tp->snd_nxt = ti->ti_ack;
1118     tp->snd_cwnd = tp->t_maxseg;
1119     (void) tcp_output(tp);
1120     tp->snd_cwnd = tp->snd_ssthresh +
1121     tp->t_maxseg * tp->t_dupacks;
1122     if (SEQ_GT(onxt, tp->snd_nxt))
1123     tp->snd_nxt = onxt;
1124     goto drop;
1125     } else if (tp->t_dupacks > tcprexmtthresh) {
1126     tp->snd_cwnd += tp->t_maxseg;
1127     (void) tcp_output(tp);
1128     goto drop;
1129     }
1130     } else
1131     tp->t_dupacks = 0;
1132     break;
1133     }
1134     synrx_to_est:
1135     /*
1136     * If the congestion window was inflated to account
1137     * for the other side's cached packets, retract it.
1138     */
1139     if (tp->t_dupacks > tcprexmtthresh &&
1140     tp->snd_cwnd > tp->snd_ssthresh)
1141     tp->snd_cwnd = tp->snd_ssthresh;
1142     tp->t_dupacks = 0;
1143     if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
1144     tcpstat.tcps_rcvacktoomuch++;
1145     goto dropafterack;
1146     }
1147     acked = ti->ti_ack - tp->snd_una;
1148     tcpstat.tcps_rcvackpack++;
1149     tcpstat.tcps_rcvackbyte += acked;
1150    
1151     /*
1152     * If we have a timestamp reply, update smoothed
1153     * round trip time. If no timestamp is present but
1154     * transmit timer is running and timed sequence
1155     * number was acked, update smoothed round trip time.
1156     * Since we now have an rtt measurement, cancel the
1157     * timer backoff (cf., Phil Karn's retransmit alg.).
1158     * Recompute the initial retransmit timer.
1159     */
1160     /* if (ts_present)
1161     * tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
1162     * else
1163     */
1164     if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
1165     tcp_xmit_timer(tp,tp->t_rtt);
1166    
1167     /*
1168     * If all outstanding data is acked, stop retransmit
1169     * timer and remember to restart (more output or persist).
1170     * If there is more data to be acked, restart retransmit
1171     * timer, using current (possibly backed-off) value.
1172     */
1173     if (ti->ti_ack == tp->snd_max) {
1174     tp->t_timer[TCPT_REXMT] = 0;
1175     needoutput = 1;
1176     } else if (tp->t_timer[TCPT_PERSIST] == 0)
1177     tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
1178     /*
1179     * When new data is acked, open the congestion window.
1180     * If the window gives us less than ssthresh packets
1181     * in flight, open exponentially (maxseg per packet).
1182     * Otherwise open linearly: maxseg per window
1183     * (maxseg^2 / cwnd per packet).
1184     */
1185     {
1186     register u_int cw = tp->snd_cwnd;
1187     register u_int incr = tp->t_maxseg;
1188    
1189     if (cw > tp->snd_ssthresh)
1190     incr = incr * incr / cw;
1191     tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
1192     }
1193     if (acked > so->so_snd.sb_cc) {
1194     tp->snd_wnd -= so->so_snd.sb_cc;
1195     sbdrop(&so->so_snd, (int )so->so_snd.sb_cc);
1196     ourfinisacked = 1;
1197     } else {
1198     sbdrop(&so->so_snd, acked);
1199     tp->snd_wnd -= acked;
1200     ourfinisacked = 0;
1201     }
1202     /*
1203     * XXX sowwakup is called when data is acked and there's room for
1204     * for more data... it should read() the socket
1205     */
1206     /* if (so->so_snd.sb_flags & SB_NOTIFY)
1207     * sowwakeup(so);
1208     */
1209     tp->snd_una = ti->ti_ack;
1210     if (SEQ_LT(tp->snd_nxt, tp->snd_una))
1211     tp->snd_nxt = tp->snd_una;
1212    
1213     switch (tp->t_state) {
1214    
1215     /*
1216     * In FIN_WAIT_1 STATE in addition to the processing
1217     * for the ESTABLISHED state if our FIN is now acknowledged
1218     * then enter FIN_WAIT_2.
1219     */
1220     case TCPS_FIN_WAIT_1:
1221     if (ourfinisacked) {
1222     /*
1223     * If we can't receive any more
1224     * data, then closing user can proceed.
1225     * Starting the timer is contrary to the
1226     * specification, but if we don't get a FIN
1227     * we'll hang forever.
1228     */
1229     if (so->so_state & SS_FCANTRCVMORE) {
1230     soisfdisconnected(so);
1231     tp->t_timer[TCPT_2MSL] = tcp_maxidle;
1232     }
1233     tp->t_state = TCPS_FIN_WAIT_2;
1234     }
1235     break;
1236    
1237     /*
1238     * In CLOSING STATE in addition to the processing for
1239     * the ESTABLISHED state if the ACK acknowledges our FIN
1240     * then enter the TIME-WAIT state, otherwise ignore
1241     * the segment.
1242     */
1243     case TCPS_CLOSING:
1244     if (ourfinisacked) {
1245     tp->t_state = TCPS_TIME_WAIT;
1246     tcp_canceltimers(tp);
1247     tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1248     soisfdisconnected(so);
1249     }
1250     break;
1251    
1252     /*
1253     * In LAST_ACK, we may still be waiting for data to drain
1254     * and/or to be acked, as well as for the ack of our FIN.
1255     * If our FIN is now acknowledged, delete the TCB,
1256     * enter the closed state and return.
1257     */
1258     case TCPS_LAST_ACK:
1259     if (ourfinisacked) {
1260     tp = tcp_close(tp);
1261     goto drop;
1262     }
1263     break;
1264    
1265     /*
1266     * In TIME_WAIT state the only thing that should arrive
1267     * is a retransmission of the remote FIN. Acknowledge
1268     * it and restart the finack timer.
1269     */
1270     case TCPS_TIME_WAIT:
1271     tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1272     goto dropafterack;
1273     }
1274     } /* switch(tp->t_state) */
1275    
1276     step6:
1277     /*
1278     * Update window information.
1279     * Don't look at window if no ACK: TAC's send garbage on first SYN.
1280     */
1281     if ((tiflags & TH_ACK) &&
1282     (SEQ_LT(tp->snd_wl1, ti->ti_seq) ||
1283     (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
1284     (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) {
1285     /* keep track of pure window updates */
1286     if (ti->ti_len == 0 &&
1287     tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd)
1288     tcpstat.tcps_rcvwinupd++;
1289     tp->snd_wnd = tiwin;
1290     tp->snd_wl1 = ti->ti_seq;
1291     tp->snd_wl2 = ti->ti_ack;
1292     if (tp->snd_wnd > tp->max_sndwnd)
1293     tp->max_sndwnd = tp->snd_wnd;
1294     needoutput = 1;
1295     }
1296    
1297     /*
1298     * Process segments with URG.
1299     */
1300     if ((tiflags & TH_URG) && ti->ti_urp &&
1301     TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1302     /*
1303     * This is a kludge, but if we receive and accept
1304     * random urgent pointers, we'll crash in
1305     * soreceive. It's hard to imagine someone
1306     * actually wanting to send this much urgent data.
1307     */
1308     if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) {
1309     ti->ti_urp = 0;
1310     tiflags &= ~TH_URG;
1311     goto dodata;
1312     }
1313     /*
1314     * If this segment advances the known urgent pointer,
1315     * then mark the data stream. This should not happen
1316     * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
1317     * a FIN has been received from the remote side.
1318     * In these states we ignore the URG.
1319     *
1320     * According to RFC961 (Assigned Protocols),
1321     * the urgent pointer points to the last octet
1322     * of urgent data. We continue, however,
1323     * to consider it to indicate the first octet
1324     * of data past the urgent section as the original
1325     * spec states (in one of two places).
1326     */
1327     if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
1328     tp->rcv_up = ti->ti_seq + ti->ti_urp;
1329     so->so_urgc = so->so_rcv.sb_cc +
1330     (tp->rcv_up - tp->rcv_nxt); /* -1; */
1331     tp->rcv_up = ti->ti_seq + ti->ti_urp;
1332    
1333     }
1334     } else
1335     /*
1336     * If no out of band data is expected,
1337     * pull receive urgent pointer along
1338     * with the receive window.
1339     */
1340     if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
1341     tp->rcv_up = tp->rcv_nxt;
1342     dodata:
1343    
1344     /*
1345     * Process the segment text, merging it into the TCP sequencing queue,
1346     * and arranging for acknowledgment of receipt if necessary.
1347     * This process logically involves adjusting tp->rcv_wnd as data
1348     * is presented to the user (this happens in tcp_usrreq.c,
1349     * case PRU_RCVD). If a FIN has already been received on this
1350     * connection then we just ignore the text.
1351     */
1352     if ((ti->ti_len || (tiflags&TH_FIN)) &&
1353     TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1354     TCP_REASS(tp, ti, m, so, tiflags);
1355     /*
1356     * Note the amount of data that peer has sent into
1357     * our window, in order to estimate the sender's
1358     * buffer size.
1359     */
1360     len = so->so_rcv.sb_datalen - (tp->rcv_adv - tp->rcv_nxt);
1361     } else {
1362     m_free(m);
1363     tiflags &= ~TH_FIN;
1364     }
1365    
1366     /*
1367     * If FIN is received ACK the FIN and let the user know
1368     * that the connection is closing.
1369     */
1370     if (tiflags & TH_FIN) {
1371     if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1372     /*
1373     * If we receive a FIN we can't send more data,
1374     * set it SS_FDRAIN
1375     * Shutdown the socket if there is no rx data in the
1376     * buffer.
1377     * soread() is called on completion of shutdown() and
1378     * will got to TCPS_LAST_ACK, and use tcp_output()
1379     * to send the FIN.
1380     */
1381     /* sofcantrcvmore(so); */
1382     sofwdrain(so);
1383    
1384     tp->t_flags |= TF_ACKNOW;
1385     tp->rcv_nxt++;
1386     }
1387     switch (tp->t_state) {
1388    
1389     /*
1390     * In SYN_RECEIVED and ESTABLISHED STATES
1391     * enter the CLOSE_WAIT state.
1392     */
1393     case TCPS_SYN_RECEIVED:
1394     case TCPS_ESTABLISHED:
1395     if(so->so_emu == EMU_CTL) /* no shutdown on socket */
1396     tp->t_state = TCPS_LAST_ACK;
1397     else
1398     tp->t_state = TCPS_CLOSE_WAIT;
1399     break;
1400    
1401     /*
1402     * If still in FIN_WAIT_1 STATE FIN has not been acked so
1403     * enter the CLOSING state.
1404     */
1405     case TCPS_FIN_WAIT_1:
1406     tp->t_state = TCPS_CLOSING;
1407     break;
1408    
1409     /*
1410     * In FIN_WAIT_2 state enter the TIME_WAIT state,
1411     * starting the time-wait timer, turning off the other
1412     * standard timers.
1413     */
1414     case TCPS_FIN_WAIT_2:
1415     tp->t_state = TCPS_TIME_WAIT;
1416     tcp_canceltimers(tp);
1417     tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1418     soisfdisconnected(so);
1419     break;
1420    
1421     /*
1422     * In TIME_WAIT state restart the 2 MSL time_wait timer.
1423     */
1424     case TCPS_TIME_WAIT:
1425     tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1426     break;
1427     }
1428     }
1429    
1430     /*
1431     * If this is a small packet, then ACK now - with Nagel
1432     * congestion avoidance sender won't send more until
1433     * he gets an ACK.
1434     *
1435     * See above.
1436     */
1437     /* if (ti->ti_len && (unsigned)ti->ti_len < tp->t_maxseg) {
1438     */
1439     /* if ((ti->ti_len && (unsigned)ti->ti_len < tp->t_maxseg &&
1440     * (so->so_iptos & IPTOS_LOWDELAY) == 0) ||
1441     * ((so->so_iptos & IPTOS_LOWDELAY) &&
1442     * ((struct tcpiphdr_2 *)ti)->first_char == (char)27)) {
1443     */
1444     if (ti->ti_len && (unsigned)ti->ti_len <= 5 &&
1445     ((struct tcpiphdr_2 *)ti)->first_char == (char)27) {
1446     tp->t_flags |= TF_ACKNOW;
1447     }
1448    
1449     /*
1450     * Return any desired output.
1451     */
1452     if (needoutput || (tp->t_flags & TF_ACKNOW)) {
1453     (void) tcp_output(tp);
1454     }
1455     return;
1456    
1457     dropafterack:
1458     /*
1459     * Generate an ACK dropping incoming segment if it occupies
1460     * sequence space, where the ACK reflects our state.
1461     */
1462     if (tiflags & TH_RST)
1463     goto drop;
1464     m_freem(m);
1465     tp->t_flags |= TF_ACKNOW;
1466     (void) tcp_output(tp);
1467     return;
1468    
1469     dropwithreset:
1470     /* reuses m if m!=NULL, m_free() unnecessary */
1471     if (tiflags & TH_ACK)
1472     tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
1473     else {
1474     if (tiflags & TH_SYN) ti->ti_len++;
1475     tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
1476     TH_RST|TH_ACK);
1477     }
1478    
1479     return;
1480    
1481     drop:
1482     /*
1483     * Drop space held by incoming segment and return.
1484     */
1485     m_free(m);
1486    
1487     return;
1488     }
1489    
1490     /* , ts_present, ts_val, ts_ecr) */
1491     /* int *ts_present;
1492     * u_int32_t *ts_val, *ts_ecr;
1493     */
1494     void
1495     tcp_dooptions(tp, cp, cnt, ti)
1496     struct tcpcb *tp;
1497     u_char *cp;
1498     int cnt;
1499     struct tcpiphdr *ti;
1500     {
1501     u_int16_t mss;
1502     int opt, optlen;
1503    
1504     DEBUG_CALL("tcp_dooptions");
1505     DEBUG_ARGS((dfd," tp = %lx cnt=%i \n", (long )tp, cnt));
1506    
1507     for (; cnt > 0; cnt -= optlen, cp += optlen) {
1508     opt = cp[0];
1509     if (opt == TCPOPT_EOL)
1510     break;
1511     if (opt == TCPOPT_NOP)
1512     optlen = 1;
1513     else {
1514     optlen = cp[1];
1515     if (optlen <= 0)
1516     break;
1517     }
1518     switch (opt) {
1519    
1520     default:
1521     continue;
1522    
1523     case TCPOPT_MAXSEG:
1524     if (optlen != TCPOLEN_MAXSEG)
1525     continue;
1526     if (!(ti->ti_flags & TH_SYN))
1527     continue;
1528     memcpy((char *) &mss, (char *) cp + 2, sizeof(mss));
1529     NTOHS(mss);
1530     (void) tcp_mss(tp, mss); /* sets t_maxseg */
1531     break;
1532    
1533     /* case TCPOPT_WINDOW:
1534     * if (optlen != TCPOLEN_WINDOW)
1535     * continue;
1536     * if (!(ti->ti_flags & TH_SYN))
1537     * continue;
1538     * tp->t_flags |= TF_RCVD_SCALE;
1539     * tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
1540     * break;
1541     */
1542     /* case TCPOPT_TIMESTAMP:
1543     * if (optlen != TCPOLEN_TIMESTAMP)
1544     * continue;
1545     * *ts_present = 1;
1546     * memcpy((char *) ts_val, (char *)cp + 2, sizeof(*ts_val));
1547     * NTOHL(*ts_val);
1548     * memcpy((char *) ts_ecr, (char *)cp + 6, sizeof(*ts_ecr));
1549     * NTOHL(*ts_ecr);
1550     *
1551     */ /*
1552     * * A timestamp received in a SYN makes
1553     * * it ok to send timestamp requests and replies.
1554     * */
1555     /* if (ti->ti_flags & TH_SYN) {
1556     * tp->t_flags |= TF_RCVD_TSTMP;
1557     * tp->ts_recent = *ts_val;
1558     * tp->ts_recent_age = tcp_now;
1559     * }
1560     */ break;
1561     }
1562     }
1563     }
1564    
1565    
1566     /*
1567     * Pull out of band byte out of a segment so
1568     * it doesn't appear in the user's data queue.
1569     * It is still reflected in the segment length for
1570     * sequencing purposes.
1571     */
1572    
1573     #ifdef notdef
1574    
1575     void
1576     tcp_pulloutofband(so, ti, m)
1577     struct socket *so;
1578     struct tcpiphdr *ti;
1579     register struct mbuf *m;
1580     {
1581     int cnt = ti->ti_urp - 1;
1582    
1583     while (cnt >= 0) {
1584     if (m->m_len > cnt) {
1585     char *cp = mtod(m, caddr_t) + cnt;
1586     struct tcpcb *tp = sototcpcb(so);
1587    
1588     tp->t_iobc = *cp;
1589     tp->t_oobflags |= TCPOOB_HAVEDATA;
1590     memcpy(sp, cp+1, (unsigned)(m->m_len - cnt - 1));
1591     m->m_len--;
1592     return;
1593     }
1594     cnt -= m->m_len;
1595     m = m->m_next; /* XXX WRONG! Fix it! */
1596     if (m == 0)
1597     break;
1598     }
1599     panic("tcp_pulloutofband");
1600     }
1601    
1602     #endif /* notdef */
1603    
1604     /*
1605     * Collect new round-trip time estimate
1606     * and update averages and current timeout.
1607     */
1608    
1609     void
1610     tcp_xmit_timer(tp, rtt)
1611     register struct tcpcb *tp;
1612     int rtt;
1613     {
1614     register short delta;
1615    
1616     DEBUG_CALL("tcp_xmit_timer");
1617     DEBUG_ARG("tp = %lx", (long)tp);
1618     DEBUG_ARG("rtt = %d", rtt);
1619    
1620     tcpstat.tcps_rttupdated++;
1621     if (tp->t_srtt != 0) {
1622     /*
1623     * srtt is stored as fixed point with 3 bits after the
1624     * binary point (i.e., scaled by 8). The following magic
1625     * is equivalent to the smoothing algorithm in rfc793 with
1626     * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
1627     * point). Adjust rtt to origin 0.
1628     */
1629     delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);
1630     if ((tp->t_srtt += delta) <= 0)
1631     tp->t_srtt = 1;
1632     /*
1633     * We accumulate a smoothed rtt variance (actually, a
1634     * smoothed mean difference), then set the retransmit
1635     * timer to smoothed rtt + 4 times the smoothed variance.
1636     * rttvar is stored as fixed point with 2 bits after the
1637     * binary point (scaled by 4). The following is
1638     * equivalent to rfc793 smoothing with an alpha of .75
1639     * (rttvar = rttvar*3/4 + |delta| / 4). This replaces
1640     * rfc793's wired-in beta.
1641     */
1642     if (delta < 0)
1643     delta = -delta;
1644     delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
1645     if ((tp->t_rttvar += delta) <= 0)
1646     tp->t_rttvar = 1;
1647     } else {
1648     /*
1649     * No rtt measurement yet - use the unsmoothed rtt.
1650     * Set the variance to half the rtt (so our first
1651     * retransmit happens at 3*rtt).
1652     */
1653     tp->t_srtt = rtt << TCP_RTT_SHIFT;
1654     tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
1655     }
1656     tp->t_rtt = 0;
1657     tp->t_rxtshift = 0;
1658    
1659     /*
1660     * the retransmit should happen at rtt + 4 * rttvar.
1661     * Because of the way we do the smoothing, srtt and rttvar
1662     * will each average +1/2 tick of bias. When we compute
1663     * the retransmit timer, we want 1/2 tick of rounding and
1664     * 1 extra tick because of +-1/2 tick uncertainty in the
1665     * firing of the timer. The bias will give us exactly the
1666     * 1.5 tick we need. But, because the bias is
1667     * statistical, we have to test that we don't drop below
1668     * the minimum feasible timer (which is 2 ticks).
1669     */
1670     TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
1671     (short)tp->t_rttmin, TCPTV_REXMTMAX); /* XXX */
1672    
1673     /*
1674     * We received an ack for a packet that wasn't retransmitted;
1675     * it is probably safe to discard any error indications we've
1676     * received recently. This isn't quite right, but close enough
1677     * for now (a route might have failed after we sent a segment,
1678     * and the return path might not be symmetrical).
1679     */
1680     tp->t_softerror = 0;
1681     }
1682    
1683     /*
1684     * Determine a reasonable value for maxseg size.
1685     * If the route is known, check route for mtu.
1686     * If none, use an mss that can be handled on the outgoing
1687     * interface without forcing IP to fragment; if bigger than
1688     * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
1689     * to utilize large mbufs. If no route is found, route has no mtu,
1690     * or the destination isn't local, use a default, hopefully conservative
1691     * size (usually 512 or the default IP max size, but no more than the mtu
1692     * of the interface), as we can't discover anything about intervening
1693     * gateways or networks. We also initialize the congestion/slow start
1694     * window to be a single segment if the destination isn't local.
1695     * While looking at the routing entry, we also initialize other path-dependent
1696     * parameters from pre-set or cached values in the routing entry.
1697     */
1698    
1699     int
1700     tcp_mss(tp, offer)
1701     register struct tcpcb *tp;
1702     u_int offer;
1703     {
1704     struct socket *so = tp->t_socket;
1705     int mss;
1706    
1707     DEBUG_CALL("tcp_mss");
1708     DEBUG_ARG("tp = %lx", (long)tp);
1709     DEBUG_ARG("offer = %d", offer);
1710    
1711     mss = min(if_mtu, if_mru) - sizeof(struct tcpiphdr);
1712     if (offer)
1713     mss = min(mss, offer);
1714     mss = max(mss, 32);
1715     if (mss < tp->t_maxseg || offer != 0)
1716     tp->t_maxseg = mss;
1717    
1718     tp->snd_cwnd = mss;
1719    
1720     sbreserve(&so->so_snd, tcp_sndspace+((tcp_sndspace%mss)?(mss-(tcp_sndspace%mss)):0));
1721     sbreserve(&so->so_rcv, tcp_rcvspace+((tcp_rcvspace%mss)?(mss-(tcp_rcvspace%mss)):0));
1722    
1723     DEBUG_MISC((dfd, " returning mss = %d\n", mss));
1724    
1725     return mss;
1726     }