ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/Windows/router/tcp.cpp
Revision: 1.3
Committed: 2008-01-01T09:40:34Z (16 years, 11 months ago) by gbeauche
Branch: MAIN
CVS Tags: HEAD
Changes since 1.2: +1 -1 lines
Log Message:
Happy New Year!

File Contents

# User Rev Content
1 gbeauche 1.1 /*
2     * tcp.cpp - ip router
3     *
4 gbeauche 1.3 * Basilisk II (C) 1997-2008 Christian Bauer
5 gbeauche 1.1 *
6     * Windows platform specific code copyright (C) Lauri Pesonen
7     *
8     * This program is free software; you can redistribute it and/or modify
9     * it under the terms of the GNU General Public License as published by
10     * the Free Software Foundation; either version 2 of the License, or
11     * (at your option) any later version.
12     *
13     * This program is distributed in the hope that it will be useful,
14     * but WITHOUT ANY WARRANTY; without even the implied warranty of
15     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16     * GNU General Public License for more details.
17     *
18     * You should have received a copy of the GNU General Public License
19     * along with this program; if not, write to the Free Software
20     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21     */
22    
23     /*
24     * Features implemented:
25     * state machine, flow control, sequence numbers, RST/SYN/FIN/ACK/PSH
26     *
27     * Features not implemented:
28     * oob data, urgent pointer, window sliding, some options
29     * "Half-Nagle" implementation is a bit weird (mac-router interface; winsock has it on by default)
30     *
31     *
32     * All possible tcp state machine transitions:
33     *
34     * CLOSED -> LISTEN passive open
35     * CLOSED -> SYN_SENT active open SYN->
36     *
37     * LISTEN -> SYN_SENT send data SYN->
38     * LISTEN -> SYN_RCVD ->SYN SYN+ACK->
39     *
40     * SYN_SENT -> SYN_RCVD ->SYN SYN+ACK->
41     * SYN_SENT -> ESTABLISHED ->SYN+ACK ACK->
42     * SYN_SENT -> CLOSED close/timeout
43     *
44     * SYN_RCVD -> CLOSED timeout RST->
45     * SYN_RCVD -> LISTEN ->RST
46     * SYN_RCVD -> ESTABLISHED ->ACK
47     * SYN_RCVD -> FINWAIT_1 close FIN->
48     *
49     * ESTABLISHED -> FINWAIT_1 close FIN->
50     * ESTABLISHED -> CLOSE_WAIT ->FIN ACK->
51     *
52     * CLOSE_WAIT -> LAST_ACK close FIN->
53     *
54     * LAST_ACK -> CLOSED ->ACK
55     *
56     * FINWAIT_1 -> CLOSING ->FIN ACK->
57     * FINWAIT_1 -> FINWAIT_2 ->ACK
58     * FINWAIT_1 -> TIME_WAIT ->FIN+ACK ACK->
59     *
60     * FINWAIT_2 -> TIME_WAIT ->FIN ACK->
61     *
62     * CLOSING -> TIME_WAIT ->ACK
63     *
64     * TIME_WAIT -> CLOSED timeout (2*msl)
65     *
66     */
67    
68     #include "sysdeps.h"
69    
70     #define WIN32_LEAN_AND_MEAN
71     #include <windows.h>
72     #include <process.h>
73    
74     #include "cpu_emulation.h"
75     #include "ws2tcpip.h"
76     #include "ether_windows.h"
77     #include "ether.h"
78     #include "prefs.h"
79     #include "router.h"
80     #include "router_types.h"
81     #include "dynsockets.h"
82     #include "iphelp.h"
83     #include "tcp.h"
84     #include "dump.h"
85 gbeauche 1.2 #include "mib/interfaces.h"
86 gbeauche 1.1 #include "ftp.h"
87    
88     #if DEBUG
89     #pragma optimize("",off)
90     #endif
91    
92     #include "debug.h"
93    
94     // If you need more, use multiple threads.
95     #define MAX_SOCKETS MAXIMUM_WAIT_OBJECTS
96    
97     // If true, always sends the PSH tcp flag with data.
98     // Otherwise only when a full buffer was received.
99     #define PUSH_ALWAYS 0
100    
101     // In milliseconds. A TCP implementation should implement
102     // this dynamically, adapting the timeout value to match to the
103     // averaged packet round-trip time.
104     #define RESEND_TIMEOUT 750
105    
106     // Just time out incoming connections after 5 secs if Mac has no time to reply
107     // No backlogs.
108     #define SYN_FLOOD_PROTECTION_TIMEOUT 5000
109    
110     const int MAX_SEGMENT_SIZE = 1460;
111    
112     // Shorthands
113     #define ISSET(f,x) ( ((f) & (x)) != 0 )
114     #define ISCLEAR(f,x) ( ((f) & (x)) == 0 )
115    
116     // Local aliases
117     #define URG tcp_flags_URG
118     #define ACK tcp_flags_ACK
119     #define PSH tcp_flags_PSH
120     #define RST tcp_flags_RST
121     #define SYN tcp_flags_SYN
122     #define FIN tcp_flags_FIN
123    
124     // Local aliases
125     #define CLOSED tcp_state_closed
126     #define LISTEN tcp_state_listen
127     #define SYN_SENT tcp_state_syn_sent
128     #define SYN_RCVD tcp_state_syn_rcvd
129     #define ESTABLISHED tcp_state_established
130     #define CLOSE_WAIT tcp_state_close_wait
131     #define LAST_ACK tcp_state_last_ack
132     #define FINWAIT_1 tcp_state_finwait_1
133     #define FINWAIT_2 tcp_state_finwait_2
134     #define CLOSING tcp_state_closing
135     #define TIME_WAIT tcp_state_time_wait
136    
137     // For debugging only
138     static const char *_tcp_state_name[] = {
139     "CLOSED",
140     "LISTEN",
141     "SYN_SENT",
142     "SYN_RCVD",
143     "ESTABLISHED",
144     "CLOSE_WAIT",
145     "LAST_ACK",
146     "FINWAIT_1",
147     "FINWAIT_2",
148     "CLOSING",
149     "TIME_WAIT"
150     };
151     #define STATENAME(i) _tcp_state_name[i]
152    
153     static CRITICAL_SECTION tcp_section;
154    
155     typedef struct {
156     SOCKET s;
157     int state;
158    
159     uint32 ip_src; // "source" is the mac, dest is the remote host,
160     uint32 ip_dest; // no matter who opened the connection.
161     uint16 src_port; // all in host byte order.
162     uint16 dest_port;
163    
164     struct sockaddr_in from; // remote host address, network byte order.
165     int from_len;
166    
167     // note: no true windows sliding, only one buffer.
168     WSABUF buffers_read[1]; // data from remote host to Mac
169     DWORD buffer_count_read;
170     DWORD bytes_received;
171     DWORD flags_read;
172     WSAOVERLAPPED overlapped_read;
173    
174     WSABUF buffers_write[1]; // data from Mac to remote host
175     DWORD buffer_count_write;
176     DWORD bytes_written;
177     DWORD flags_write;
178     WSAOVERLAPPED overlapped_write;
179    
180     bool remote_closed; // remote will not send any more data
181     bool accept_more_data_from_mac; // are we ready to accept more data from mac
182    
183     uint32 seq_in; // will ack this mac sequence number
184     uint32 seq_out; // next sequence number to mac (unless a resend is needed)
185     uint32 mac_ack; // mac has acked this byte count. can be used to determined when to send some more data
186    
187     uint32 bytes_to_send; // total send block size
188     uint32 bytes_remaining_to_send; // unsent byte count
189    
190     uint16 mac_window; // mac tcp receive window, slides according to the window principle
191     uint16 our_window; // not really used
192     uint16 mac_mss; // maximum segment size that mac reported at SYN handshaking
193    
194     // resend info
195     uint32 last_seq_out; // remember last packet seq number if a resend is needed
196     uint32 resend_timeout; // currently set t0 0.75 secs but not updated
197     uint32 stream_to_mac_stalled_until; // tick count indicating resend time
198    
199     DWORD time_wait; // do a graceful close after MSL*2
200     DWORD msl;
201    
202     int child;
203    
204     WSAEVENT ev; // used to signal remote-initiated close and host-initiated connect.
205    
206     bool in_use;
207     } tcp_socket_t;
208    
209     static tcp_socket_t sockets[MAX_SOCKETS];
210    
211     typedef struct {
212     SOCKET s;
213     uint16 port;
214     uint32 ip;
215     uint32 iface;
216     bool once;
217     int parent;
218     WSAEVENT ev;
219     } tcp_listening_socket_t;
220    
221     static tcp_listening_socket_t l_sockets[MAX_SOCKETS];
222    
223     static void CALLBACK tcp_read_completion(
224     DWORD error,
225     DWORD bytes_read,
226     LPWSAOVERLAPPED lpOverlapped,
227     DWORD flags
228     );
229    
230     static void CALLBACK tcp_write_completion(
231     DWORD error,
232     DWORD bytes_read,
233     LPWSAOVERLAPPED lpOverlapped,
234     DWORD flags
235     );
236    
237     // socket utilities assume that the critical section has already been entered.
238     static void free_socket( const int t )
239     {
240     _WSAResetEvent( sockets[t].ev );
241     if(sockets[t].s != INVALID_SOCKET) {
242     _closesocket( sockets[t].s );
243     sockets[t].s = INVALID_SOCKET;
244     }
245     sockets[t].state = CLOSED;
246     sockets[t].stream_to_mac_stalled_until = 0;
247     sockets[t].in_use = false;
248     sockets[t].time_wait = 0;
249    
250     // if there was an attached listening socket (ftp), close it.
251     int lst = sockets[t].child;
252     if( lst >= 0 ) {
253     if(l_sockets[lst].s != INVALID_SOCKET) {
254     D(bug(" closing listening socket %d\r\n", lst));
255     _closesocket( l_sockets[lst].s );
256     l_sockets[lst].s = INVALID_SOCKET;
257     }
258     l_sockets[lst].port = 0;
259     l_sockets[lst].parent = -1;
260     }
261     sockets[t].child = -1;
262     }
263    
264     static int alloc_socket()
265     {
266     static int last_allocated_socket = -1;
267    
268     int i = last_allocated_socket;
269     for( int j=0; j<MAX_SOCKETS; j++ ) {
270     if( ++i >= MAX_SOCKETS ) i = 0;
271     if( !sockets[i].in_use ) {
272     D(bug("<%d> Socket allocated\r\n", i));
273    
274     last_allocated_socket = i;
275     sockets[i].in_use = true;
276    
277     sockets[i].s = INVALID_SOCKET;
278     sockets[i].state = CLOSED;
279     sockets[i].remote_closed = false;
280    
281     sockets[i].accept_more_data_from_mac = false;
282    
283     sockets[i].ip_src = sockets[i].ip_dest = 0;
284     // sockets[i].src_port = sockets[i].dest_port = 0;
285    
286     memset( &sockets[i].overlapped_read, 0, sizeof(sockets[i].overlapped_read) );
287     sockets[i].overlapped_read.hEvent = (HANDLE)i;
288     memset( &sockets[i].overlapped_write, 0, sizeof(sockets[i].overlapped_write) );
289     sockets[i].overlapped_write.hEvent = (HANDLE)i;
290    
291     sockets[i].bytes_received = 0;
292     sockets[i].bytes_written = 0;
293    
294     sockets[i].flags_read = 0;
295     sockets[i].flags_write = 0;
296    
297     // sockets[i].from_len = sizeof(struct sockaddr_in);
298     // memset( &sockets[i].from, 0, sizeof(sockets[i].from) );
299     // sockets[i].from.sin_family = AF_INET;
300    
301     sockets[i].buffer_count_read = 1;
302     sockets[i].buffers_read[0].len = MAX_SEGMENT_SIZE;
303     if(!sockets[i].buffers_read[0].buf) {
304     sockets[i].buffers_read[0].buf = new char [sockets[i].buffers_read[0].len];
305     }
306    
307     sockets[i].buffer_count_write = 1;
308     sockets[i].buffers_write[0].len = MAX_SEGMENT_SIZE;
309     if(!sockets[i].buffers_write[0].buf) {
310     sockets[i].buffers_write[0].buf = new char [sockets[i].buffers_write[0].len];
311     }
312    
313     sockets[i].mac_window = MAX_SEGMENT_SIZE; // updated for all mac datagrams
314     sockets[i].our_window = MAX_SEGMENT_SIZE; // should use about 8-16 kB, really
315     sockets[i].mac_mss = 0; // not known yet
316    
317     sockets[i].time_wait = 0;
318     sockets[i].msl = 5000L; // The round-trip time can be hard to estimate.
319    
320     sockets[i].seq_in = 0;
321     sockets[i].seq_out = 0x00000001;
322     sockets[i].mac_ack = 0;
323     sockets[i].stream_to_mac_stalled_until = 0;
324    
325     sockets[i].resend_timeout = RESEND_TIMEOUT;
326    
327     sockets[i].child = -1;
328    
329     break;
330     }
331     }
332     if(i == MAX_SOCKETS) {
333     D(bug("Out of free sockets\r\n"));
334     i = -1;
335     }
336     return i;
337     }
338    
339     static int alloc_new_socket( const uint16 src_port, const uint16 dest_port, const uint32 ip_dest )
340     {
341     int t = alloc_socket();
342    
343     if(t >= 0) {
344     sockets[t].s = _socket( AF_INET, SOCK_STREAM, IPPROTO_TCP );
345     if(sockets[t].s == INVALID_SOCKET) {
346     free_socket( t );
347     t = -1;
348     } else {
349     sockets[t].src_port = src_port;
350     sockets[t].dest_port = dest_port;
351    
352     sockets[t].from_len = sizeof(sockets[t].from);
353     memset( &sockets[t].from, 0, sockets[t].from_len );
354     sockets[t].from.sin_family = AF_INET;
355     sockets[t].from.sin_port = htons(dest_port);
356     sockets[t].from.sin_addr.s_addr = htonl(ip_dest);
357    
358     struct sockaddr_in to;
359     memset( &to, 0, sizeof(to) );
360     to.sin_family = AF_INET;
361    
362     if( _bind ( sockets[t].s, (const struct sockaddr *)&to, sizeof(to) ) == 0 ) {
363     D(bug("<%d> socket bound\r\n", t));
364     } else {
365     if( _WSAGetLastError() == WSAEINPROGRESS ) {
366     D(bug("<%d> bind: a blocking call is in progress.\r\n", t));
367     } else {
368     D(bug("<%d> bind failed with error code %d\r\n", t, _WSAGetLastError()));
369     }
370     free_socket( t );
371     t = -1;
372     }
373     }
374     }
375     return t;
376     }
377    
378     static int get_socket_index( const uint16 src_port, const uint16 dest_port )
379     {
380     for( int i=0; i<MAX_SOCKETS; i++ ) {
381     if(sockets[i].in_use && sockets[i].src_port == src_port && sockets[i].dest_port == dest_port ) {
382     return i;
383     }
384     }
385     return -1;
386     }
387    
388     static int get_socket_index( const uint16 src_port )
389     {
390     for( int i=0; i<MAX_SOCKETS; i++ ) {
391     if(sockets[i].in_use && sockets[i].src_port == src_port ) {
392     return i;
393     }
394     }
395     return -1;
396     }
397    
398     static int find_socket( const uint16 src_port, const uint16 dest_port )
399     {
400     int i = get_socket_index( src_port, dest_port );
401     if( i < 0 ) {
402     i = get_socket_index( src_port );
403     if( i >= 0 ) {
404     if( sockets[i].s == INVALID_SOCKET ) {
405     D(bug("find_socket reusing slot %d...\r\n", i));
406     sockets[i].in_use = false;
407     } else {
408     D(bug("find_socket forcing close %d...\r\n", i));
409     free_socket( i );
410     }
411     i = -1;
412     }
413     }
414    
415     D(bug("<%d> find_socket(%d,%d): %s\r\n", i, src_port, dest_port, i>=0 ? "found" : "not found"));
416    
417     return i;
418     }
419    
420     static int alloc_listen_socket( const uint16 port, const uint32 ip, const uint32 iface, const bool once )
421     {
422     static int last_allocated_socket = -1;
423    
424     int i = last_allocated_socket;
425    
426     for( int j=0; j<MAX_SOCKETS; j++ ) {
427     if( ++i >= MAX_SOCKETS ) i = 0;
428     if( l_sockets[i].port == 0 ) {
429     D(bug("[%d] Slot allocated for listening port %d\r\n", i, port));
430     l_sockets[i].port = port;
431     l_sockets[i].ip = ip;
432     l_sockets[i].iface = iface;
433     l_sockets[i].once = once;
434     l_sockets[i].parent = -1;
435     last_allocated_socket = i;
436     _WSAResetEvent( l_sockets[i].ev );
437     return i;
438     }
439     }
440     return -1;
441     }
442    
443     static void tcp_start_listen( const int i )
444     {
445     if( l_sockets[i].port ) {
446     uint32 iface = l_sockets[i].iface;
447    
448     D(bug("[%d] binding to interface 0x%08X\r\n", i, iface));
449    
450     l_sockets[i].s = _socket( AF_INET, SOCK_STREAM, IPPROTO_TCP );
451     if(l_sockets[i].s != INVALID_SOCKET) {
452     struct sockaddr_in to;
453     memset( &to, 0, sizeof(to) );
454     to.sin_family = AF_INET;
455     to.sin_port = htons( l_sockets[i].port );
456     to.sin_addr.s_addr = htonl( iface );
457    
458     if( _bind ( l_sockets[i].s, (const struct sockaddr *)&to, sizeof(to) ) == 0 )
459     {
460     D(bug("[%d] socket bound to port %d on interface 0x%08X\r\n", i, l_sockets[i].port, iface));
461     if( _listen( l_sockets[i].s, SOMAXCONN ) == SOCKET_ERROR ) {
462     D(bug("[%d] listen() failed with error code %d\r\n", i, _WSAGetLastError()));
463     } else {
464     D(bug("[%d] listening to port %d\r\n", i, l_sockets[i].port));
465     _WSAResetEvent( l_sockets[i].ev );
466     if( SOCKET_ERROR == _WSAEventSelect( l_sockets[i].s, l_sockets[i].ev, FD_ACCEPT ) ) {
467     D(bug("[%d] WSAEventSelect() failed with error code %d\r\n", i, _WSAGetLastError()));
468     }
469     }
470     } else {
471     D(bug("[%d] bind to port %d failed with error code %d\r\n", i, l_sockets[i].port, _WSAGetLastError()));
472     }
473     } else {
474     D(bug("[%d] could not create a socket for port %d, error = %d\r\n", i, l_sockets[i].port, _WSAGetLastError()));
475     }
476     }
477     }
478    
479     static void set_ttl( const int t, const uint8 ttl )
480     {
481     int _ttl = ttl; // defensive programming, I know VCx
482    
483     if(_setsockopt( sockets[t].s, IPPROTO_IP, IP_TTL, (const char *)&_ttl, sizeof(int) ) == SOCKET_ERROR ) {
484     D(bug("<%d> could not set ttl to %d, error=%d\r\n", t, ttl, _WSAGetLastError()));
485     } else {
486     D(bug("<%d> ttl set to %d.\r\n", t, ttl));
487     }
488     }
489    
490     static void tcp_reply( const int flags, const int t )
491     {
492     int tcp_size = sizeof(tcp_t);
493    
494     tcp_t *tcp = (tcp_t *)malloc( tcp_size );
495     if(tcp) {
496     memcpy( tcp->ip.mac.dest, ether_addr, 6 );
497     memcpy( tcp->ip.mac.src, router_mac_addr, 6 );
498     tcp->ip.mac.type = htons(mac_type_ip4);
499    
500     tcp->ip.version = 4;
501     tcp->ip.header_len = 5;
502     tcp->ip.tos = 0;
503     tcp->ip.total_len = htons(tcp_size - sizeof(mac_t));
504     tcp->ip.ident = htons(next_ip_ident_number++);
505     tcp->ip.flags_n_frag_offset = 0;
506     tcp->ip.ttl = 128;
507     tcp->ip.proto = ip_proto_tcp;
508     tcp->ip.src = htonl(sockets[t].ip_dest);
509     tcp->ip.dest = htonl(sockets[t].ip_src);
510     make_ip4_checksum( (ip_t *)tcp );
511    
512     D(bug("<%d> Reply: Seq=%d, Ack=%d\r\n", t, sockets[t].seq_out, sockets[t].seq_in));
513    
514     tcp->src_port = htons(sockets[t].dest_port);
515     tcp->dest_port = htons(sockets[t].src_port);
516     tcp->seq = htonl(sockets[t].seq_out);
517     tcp->ack = htonl(sockets[t].seq_in);
518     tcp->header_len = (uint8)( 20 << 2 );
519     tcp->flags = flags;
520     tcp->window = htons( sockets[t].our_window );
521     tcp->urgent_ptr = 0;
522     make_tcp_checksum( tcp, tcp_size );
523    
524     // dump_bytes( (uint8 *)tcp, tcp_size );
525    
526     enqueue_packet( (uint8 *)tcp, tcp_size );
527     free(tcp);
528     }
529     }
530    
531     static bool has_mac_read_space( const int t )
532     {
533     uint32 pending_bytes = sockets[t].seq_out - sockets[t].mac_ack;
534     uint32 mac_can_accept_bytes = sockets[t].mac_window - pending_bytes;
535    
536     D(bug("<%d> mac_can_accept_bytes = %d\r\n", t, mac_can_accept_bytes));
537    
538     // Modified Nagle, effectively disabling window sliding (which I don't support anyway):
539     return pending_bytes == 0;
540    
541     // Use more of window bandwidth
542     // Enabling this would require that the buffers seq numbers are stored somewhere
543     // return mac_can_accept_bytes >= sockets[t].buffers_read[0].len;
544     }
545    
546     static bool b_recfrom( const int t )
547     {
548     bool result;
549    
550     if( !has_mac_read_space(t) ) {
551     D(bug("<%d> read stalled, mac cannot accept any more data\r\n", t));
552    
553     sockets[t].stream_to_mac_stalled_until = GetTickCount() + sockets[t].resend_timeout;
554     return true;
555     }
556    
557     int ret = _WSARecv(
558     sockets[t].s,
559     sockets[t].buffers_read,
560     sockets[t].buffer_count_read,
561     &sockets[t].bytes_received,
562     &sockets[t].flags_read,
563     &sockets[t].overlapped_read,
564     tcp_read_completion
565     );
566    
567     if(ret == SOCKET_ERROR) {
568     int socket_error = _WSAGetLastError();
569     if(socket_error == WSA_IO_PENDING) {
570     D(bug("<%d> WSARecv() i/o pending\r\n", t));
571     result = true;
572     } else {
573     D(bug("<%d> WSARecv() returned error %d\r\n", t, socket_error));
574     result = false;
575     }
576     } else /*if(ret == 0) */ {
577     D(bug("<%d> WSARecv() ok\r\n", t));
578     // Completion routine call is already scheduled.
579     result = true;
580     }
581     return result;
582     }
583    
584     static bool b_send( const int t )
585     {
586     int ret = _WSASend(
587     sockets[t].s,
588     sockets[t].buffers_write,
589     sockets[t].buffer_count_write,
590     &sockets[t].bytes_written,
591     sockets[t].flags_write,
592     &sockets[t].overlapped_write,
593     tcp_write_completion
594     );
595    
596     bool result;
597     if(ret == SOCKET_ERROR) {
598     int socket_error = _WSAGetLastError();
599     if(socket_error == WSA_IO_PENDING) {
600     D(bug("<%d> WSASend() i/o pending\r\n", t));
601     result = true;
602     } else {
603     D(bug("<%d> WSASend() returned %d\r\n", t, socket_error));
604     result = false;
605     }
606     } else /*if(ret == 0) */ {
607     D(bug("<%d> WSASend() ok\r\n", t));
608     // Completion routine call is already scheduled.
609     result = true;
610     }
611     return result;
612     }
613    
614     static void send_buffer( const int t, const bool resending )
615     {
616     if(resending) {
617     if(sockets[t].last_seq_out == 0) {
618     D(bug("<%d> resend failure\r\n", t ));
619     return;
620     }
621     sockets[t].seq_out = sockets[t].last_seq_out;
622     } else {
623     sockets[t].last_seq_out = sockets[t].seq_out;
624     }
625    
626     D(bug("<%d> %s data to Mac: Seq=%d, Ack=%d\r\n", t, (resending ? "resending" : "sending"), sockets[t].seq_out, sockets[t].seq_in));
627    
628     uint32 bytes_read = sockets[t].bytes_received;
629    
630     if( sockets[t].mac_mss && bytes_read > sockets[t].mac_mss ) {
631     D(bug("<%d> impossible: %d bytes to send, Mac mss is only %d\r\n", t, sockets[t].mac_mss && bytes_read, sockets[t].mac_mss));
632     }
633    
634     int tcp_size = sizeof(tcp_t) + bytes_read;
635    
636     tcp_t *tcp = (tcp_t *)malloc( tcp_size );
637     if(tcp) {
638     // Build MAC
639     // memcpy( tcp->ip.mac.dest, sockets[t].mac_src, 6 );
640     memcpy( tcp->ip.mac.dest, ether_addr, 6 );
641     memcpy( tcp->ip.mac.src, router_mac_addr, 6 );
642     tcp->ip.mac.type = htons(mac_type_ip4);
643    
644     // Build IP
645     tcp->ip.version = 4;
646     tcp->ip.header_len = 5;
647     tcp->ip.tos = 0;
648     tcp->ip.total_len = htons(sizeof(tcp_t) - sizeof(mac_t) + bytes_read); // no options
649     tcp->ip.ident = htons(next_ip_ident_number++);
650     tcp->ip.flags_n_frag_offset = 0;
651     tcp->ip.ttl = 128; // one hop actually!
652     tcp->ip.proto = ip_proto_tcp;
653     tcp->ip.src = htonl(sockets[t].ip_dest);
654     tcp->ip.dest = htonl(sockets[t].ip_src);
655     make_ip4_checksum( (ip_t *)tcp );
656    
657     // Copy payload (used by tcp checksum)
658     memcpy( (char *)tcp + sizeof(tcp_t), sockets[t].buffers_read[0].buf, bytes_read );
659    
660     // Build tcp
661     tcp->src_port = htons(sockets[t].dest_port);
662     tcp->dest_port = htons(sockets[t].src_port);
663    
664     tcp->seq = htonl(sockets[t].seq_out);
665     tcp->ack = htonl(sockets[t].seq_in);
666    
667     tcp->header_len = (uint8)( 20 << 2 );
668     #if PUSH_ALWAYS
669     tcp->flags = ACK|PSH;
670     #else
671     tcp->flags = (bytes_read == MAX_SEGMENT_SIZE) ? ACK : (ACK|PSH);
672     #endif
673     tcp->window = htons( sockets[t].our_window );
674     tcp->urgent_ptr = 0;
675     make_tcp_checksum( tcp, tcp_size );
676    
677     sockets[t].seq_out += bytes_read;
678    
679     // dump_bytes( (uint8 *)tcp, tcp_size );
680    
681     enqueue_packet( (uint8 *)tcp, tcp_size );
682     free(tcp);
683     }
684     }
685    
686     static void CALLBACK tcp_read_completion(
687     DWORD error,
688     DWORD bytes_read,
689     LPWSAOVERLAPPED lpOverlapped,
690     DWORD flags
691     )
692     {
693     EnterCriticalSection( &tcp_section );
694    
695     const int t = (int)lpOverlapped->hEvent;
696    
697     sockets[t].bytes_received = bytes_read;
698    
699     D(bug("<%d> tcp_read_completion(error=%d, bytes_read=%d)\r\n", t, error, bytes_read));
700    
701     D(bug("<%d> tcp_read_completion() start, old state = %s\r\n", t, STATENAME(sockets[t].state)));
702    
703     if(!sockets[t].in_use) {
704     D(bug("<%d> ignoring canceled read\r\n", t));
705     } else {
706     if( error != 0 ) {
707     D(bug("<%d> resetting after read error\r\n", t));
708     tcp_reply( RST, t );
709     free_socket(t);
710     } else {
711     if(bytes_read == 0) {
712     _closesocket( sockets[t].s );
713     sockets[t].s = INVALID_SOCKET;
714     } else if( bytes_read > 0) {
715     send_buffer( t, false );
716     }
717    
718     switch( sockets[t].state ) {
719     case SYN_RCVD:
720     if( bytes_read == 0 ) {
721     D(bug("<%d> Closing: SYN_RCVD -> FINWAIT_1\r\n", t));
722     tcp_reply( ACK|FIN, t );
723     sockets[t].seq_out++;
724     sockets[t].state = FINWAIT_1;
725     }
726     break;
727     case ESTABLISHED:
728     if( bytes_read == 0 ) {
729     D(bug("<%d> Closing: ESTABLISHED -> FINWAIT_1\r\n", t));
730     tcp_reply( ACK|FIN, t );
731     sockets[t].seq_out++;
732     sockets[t].state = FINWAIT_1;
733     }
734     break;
735     case LISTEN:
736     tcp_reply( SYN, t );
737     sockets[t].seq_out++;
738     sockets[t].state = SYN_SENT;
739     sockets[t].time_wait = GetTickCount() + SYN_FLOOD_PROTECTION_TIMEOUT;
740     D(bug("<%d> LISTEN -> SYN_SENT\r\n", t));
741     break;
742     case CLOSE_WAIT:
743     if( bytes_read == 0) {
744     tcp_reply( ACK|FIN, t );
745     sockets[t].seq_out++;
746     sockets[t].state = LAST_ACK;
747     D(bug("<%d> Closing: CLOSE_WAIT -> LAST_ACK\r\n", t));
748     if(sockets[t].remote_closed) {
749     // Just in case that mac gets out of sync.
750     _closesocket(sockets[t].s);
751     sockets[t].s = INVALID_SOCKET;
752     }
753     }
754     break;
755     default:
756     break;
757     }
758    
759     if(!is_router_shutting_down && sockets[t].s != INVALID_SOCKET) {
760     if(sockets[t].state != LISTEN) {
761     b_recfrom(t);
762     }
763     }
764     }
765     }
766    
767     LeaveCriticalSection( &tcp_section );
768     }
769    
770     static void CALLBACK tcp_write_completion(
771     DWORD error,
772     DWORD bytes_written,
773     LPWSAOVERLAPPED lpOverlapped,
774     DWORD flags
775     )
776     {
777     EnterCriticalSection( &tcp_section );
778    
779     const int t = (int)lpOverlapped->hEvent;
780    
781     sockets[t].bytes_written = bytes_written;
782     sockets[t].bytes_remaining_to_send -= bytes_written;
783    
784     D(bug("<%d> tcp_write_completion(error=%d, bytes_written=%d)\r\n", t, error, bytes_written));
785    
786     if(!sockets[t].in_use) {
787     D(bug("<%d> ignoring canceled write\r\n", t));
788     } else {
789     if(is_router_shutting_down || sockets[t].s == INVALID_SOCKET) {
790     D(bug("<%d> is not alive for sending.\r\n", t));
791     } else {
792     if( sockets[t].bytes_remaining_to_send <= 0 ) {
793     D(bug("<%d> all data sent, accepting some more.\r\n", t));
794     sockets[t].seq_in += sockets[t].bytes_to_send;
795     sockets[t].bytes_to_send = sockets[t].bytes_remaining_to_send = 0; // superfluous
796     tcp_reply( ACK, t );
797     sockets[t].accept_more_data_from_mac = true;
798     } else {
799     D(bug("<%d> %d bytes (of %d total) remaining, sending.\r\n", t, sockets[t].bytes_remaining_to_send, sockets[t].bytes_to_send));
800     sockets[t].buffers_write[0].len = sockets[t].bytes_remaining_to_send;
801     char *p = sockets[t].buffers_write[0].buf;
802     memmove( p, &p[bytes_written], sockets[t].bytes_remaining_to_send );
803     if(!b_send(t)) {
804     } else {
805     }
806     }
807     }
808     }
809    
810     LeaveCriticalSection( &tcp_section );
811     }
812    
813     static void tcp_connect_callback( const int t )
814     {
815     D(bug("<%d> tcp_connect_callback() start, old state = %s\r\n", t, STATENAME(sockets[t].state)));
816    
817     switch( sockets[t].state ) {
818     case LISTEN:
819     tcp_reply( SYN|ACK, t );
820     sockets[t].seq_out++;
821     sockets[t].state = SYN_RCVD;
822     D(bug("<%d> Connect: LISTEN -> SYN_RCVD\r\n", t));
823     break;
824     default:
825     break;
826     }
827     D(bug("<%d> tcp_connect_callback() end, new state = %s\r\n", t, STATENAME(sockets[t].state)));
828     }
829    
830     static void tcp_accept_callback( const int lst )
831     {
832     D(bug("[%d] tcp_accept_callback()\r\n", lst));
833    
834     struct sockaddr_in to;
835     memset( &to, 0, sizeof(to) );
836     to.sin_family = AF_INET;
837     int tolen = sizeof(to);
838    
839     SOCKET s = _accept( l_sockets[lst].s, (struct sockaddr *)&to, &tolen );
840     if( s == INVALID_SOCKET ) {
841     D(bug("[%d] connection not accepted, error code %d\r\n", lst, _WSAGetLastError()));
842     } else {
843     _WSAEventSelect( s, 0, 0 );
844    
845     uint16 src_port = l_sockets[lst].port;
846     uint16 dest_port = ntohs(to.sin_port);
847     uint32 ip_dest = ntohl(to.sin_addr.s_addr);
848    
849     D(bug("[%d] connection accepted, local port:%d, remote %s:%d\r\n", lst, src_port, _inet_ntoa(to.sin_addr), dest_port));
850    
851     if( l_sockets[lst].ip != 0 && l_sockets[lst].ip != ip_dest ) {
852     _closesocket( s );
853     D(bug("[%d] authorization failure. connection closed.\r\n", lst ));
854     } else {
855     int t = alloc_new_socket( src_port, dest_port, ip_dest );
856     if( t < 0 ) {
857     D(bug("<%d> out of slot space, connection dropped\r\n", t ));
858     free_socket(t);
859     } else {
860     sockets[t].s = s;
861     sockets[t].state = LISTEN;
862     sockets[t].src_port = src_port;
863     sockets[t].dest_port = dest_port;
864     sockets[t].ip_src = macos_ip_address;
865     sockets[t].ip_dest = ip_dest;
866    
867     sockets[t].seq_out = 0x00000001;
868     sockets[t].seq_in = 0; // not known yet
869     sockets[t].mac_ack = sockets[t].seq_out; // zero out pending bytes
870    
871     tcp_reply( SYN, t );
872     sockets[t].seq_out++;
873     sockets[t].state = SYN_SENT;
874     sockets[t].time_wait = GetTickCount() + SYN_FLOOD_PROTECTION_TIMEOUT;
875     D(bug("<%d> Connect: LISTEN -> SYN_SENT\r\n", t));
876    
877     _WSAResetEvent( sockets[t].ev );
878     if( SOCKET_ERROR == _WSAEventSelect( sockets[t].s, sockets[t].ev, FD_CLOSE ) ) {
879     D(bug("<%d> WSAEventSelect() failed with error code %d\r\n", t, _WSAGetLastError()));
880     }
881    
882     // No data from the remote host is needed until the connection is established.
883     // So don't initiate read yet.
884     }
885     }
886     }
887     }
888    
889     /*
890     MSS is the only option I care about, and since I'm on ethernet
891     I already pretty much know everything needed.
892    
893     AFAIK window scaling is not in effect unless both parties specify it,
894     and I'm not doing it.
895     */
896     static void process_options( const int t, const uint8 *opt, int len, uint32 &mss )
897     {
898     mss = 0;
899    
900     while( len > 0 ) {
901     switch( *opt ) {
902     case 0: // End of Option List
903     D(bug("<%d> End of Option List\r\n", t));
904     len = 0;
905     break;
906     case 1: // No-Operation
907     D(bug("<%d> No-Operation\r\n", t));
908     len--;
909     opt++;
910     break;
911     case 2: // Maximum Segment Size
912     {
913     mss = ntohs( *((uint16 *)&opt[2]) );
914     D(bug("<%d> Maximum Segment Size = %d\r\n", t, mss));
915     len -= 4;
916     opt += 4;
917     }
918     break;
919     case 3: // Window Scale
920     {
921     int wscale = opt[2];
922     D(bug("<%d> Window Scale = %d\r\n", t, (int)wscale));
923     len -= 3;
924     opt += 3;
925     }
926     break;
927     case 4: // Sack-Permitted
928     D(bug("<%d> Sack-Permitted option is set\r\n", t));
929     len -= 2;
930     opt += 2;
931     break;
932     case 5: // Sack
933     {
934     int sack_len = opt[1];
935     int hf = (sack_len-2) / 4;
936     D(bug("<%d> Sack, %d half-blocks\r\n", t, hf));
937     len -= sack_len;
938     opt += sack_len;
939     }
940     break;
941     case 8: // Time Stamps
942     {
943     int valve = ntohl( *((uint32 *)&opt[2]) );
944     int ereply = ntohl( *((uint32 *)&opt[6]) );
945     D(bug("<%d> Time Stamps, TS valve = 0x%X, TS echo reply = 0x%X\r\n", t, valve, ereply));
946     len -= 10;
947     opt += 10;
948     }
949     break;
950     default:
951     D(bug("<%d> Unknown tcp header option 0x%02x, breaking out\r\n", t, (int)*opt));
952     len = 0;
953     break;
954     }
955     }
956     }
957    
958     void write_tcp( tcp_t *tcp, int len )
959     {
960     if(len < sizeof(tcp_t)) {
961     D(bug("<%d> Too small tcp packet(%d) on unknown slot, dropped\r\n", -1, len));
962     return;
963     }
964     uint16 src_port = ntohs(tcp->src_port);
965     uint16 dest_port = ntohs(tcp->dest_port);
966    
967     BOOL ok = true;
968     BOOL handle_data = false;
969     BOOL initiate_read = false;
970    
971     EnterCriticalSection( &tcp_section );
972    
973     int t = find_socket( src_port, dest_port );
974    
975     if(t < 0) {
976     t = alloc_new_socket( src_port, dest_port, ntohl(tcp->ip.dest) );
977     ok = t >= 0;
978     }
979    
980     if(ok) {
981     D(bug("<%d> write_tcp %d bytes from port %d to port %d\r\n", t, len, src_port, dest_port));
982     } else {
983     D(bug("<%d> FAILED write_tcp %d bytes from port %d to port %d\r\n", t, len, src_port, dest_port));
984     }
985    
986     if( ok && ISSET(tcp->flags,RST) ) {
987     D(bug("<%d> RST set, resetting socket\r\n", t));
988     if( sockets[t].s != INVALID_SOCKET ) {
989     D(bug("<%d> doing an extra shutdown (ie4)\r\n", t));
990     _shutdown( sockets[t].s, SD_BOTH );
991     }
992     free_socket( t );
993     ok = false;
994     }
995    
996     if(ok) {
997     D(bug("<%d> State machine start = %s\r\n", t, STATENAME(sockets[t].state)));
998    
999     // always update receive window
1000     sockets[t].mac_window = ntohs(tcp->window);
1001    
1002     int header_len = tcp->header_len >> 2;
1003     int option_bytes = header_len - 20;
1004     char *data = (char *)tcp + sizeof(tcp_t) + option_bytes;
1005     int dlen = len - sizeof(tcp_t) - option_bytes;
1006    
1007     if( !ISSET(tcp->flags,ACK) ) {
1008     D(bug("<%d> ACK not set\r\n", t));
1009     }
1010     if( ISSET(tcp->flags,SYN) ) {
1011     D(bug("<%d> SYN set\r\n", t));
1012    
1013     // Note that some options are valid even if there is no SYN.
1014     // I don't care about those however.
1015    
1016     uint32 new_mss;
1017     process_options( t, (uint8 *)data - option_bytes, option_bytes, new_mss );
1018     if(new_mss) {
1019     sockets[t].mac_mss = (int)new_mss;
1020     if( new_mss < sockets[t].buffers_read[0].len ) {
1021     sockets[t].buffers_read[0].len = new_mss;
1022     }
1023     D(bug("<%d> Max segment size set to %d\r\n", t, new_mss));
1024     }
1025     }
1026     if( ISSET(tcp->flags,FIN) ) {
1027     D(bug("<%d> FIN set\r\n", t));
1028     }
1029    
1030     // The sequence number Mac expects to see next time.
1031     sockets[t].mac_ack = ntohl(tcp->ack);
1032    
1033     D(bug("<%d> From Mac: Seq=%d, Ack=%d, window=%d, router Seq=%d\r\n", t, ntohl(tcp->seq), sockets[t].mac_ack, sockets[t].mac_window, sockets[t].seq_out));
1034    
1035     if( sockets[t].stream_to_mac_stalled_until &&
1036     sockets[t].mac_ack == sockets[t].seq_out &&
1037     (sockets[t].state == ESTABLISHED || sockets[t].state == CLOSE_WAIT) )
1038     {
1039     if( has_mac_read_space(t) ) {
1040     initiate_read = true;
1041     sockets[t].stream_to_mac_stalled_until = 0;
1042     D(bug("<%d> read resumed, mac can accept more data\r\n", t));
1043     }
1044     }
1045    
1046     switch( sockets[t].state ) {
1047     case CLOSED:
1048     sockets[t].src_port = src_port;
1049     sockets[t].dest_port = dest_port;
1050     sockets[t].ip_src = ntohl(tcp->ip.src);
1051     sockets[t].ip_dest = ntohl(tcp->ip.dest);
1052    
1053     if( ISSET(tcp->flags,SYN) ) {
1054    
1055     sockets[t].seq_out = 0x00000001;
1056     sockets[t].seq_in = ntohl(tcp->seq) + 1;
1057    
1058     _WSAResetEvent( sockets[t].ev );
1059     if( SOCKET_ERROR == _WSAEventSelect( sockets[t].s, sockets[t].ev, FD_CONNECT | FD_CLOSE ) ) {
1060     D(bug("<%d> WSAEventSelect() failed with error code %d\r\n", t, _WSAGetLastError()));
1061     }
1062    
1063     D(bug("<%d> connecting local port %d to remote %s:%d\r\n", t, src_port, _inet_ntoa(sockets[t].from.sin_addr), dest_port));
1064    
1065     sockets[t].state = LISTEN;
1066     if( _WSAConnect(
1067     sockets[t].s,
1068     (const struct sockaddr *)&sockets[t].from,
1069     sockets[t].from_len,
1070     NULL, NULL,
1071     NULL, NULL
1072     ) == SOCKET_ERROR )
1073     {
1074     int connect_error = _WSAGetLastError();
1075     if( connect_error == WSAEWOULDBLOCK ) {
1076     D(bug("<%d> WSAConnect() i/o pending.\r\n", t));
1077     } else {
1078     D(bug("<%d> WSAConnect() failed with error %d.\r\n", t, connect_error));
1079     }
1080     } else {
1081     D(bug("<%d> WSAConnect() ok.\r\n", t));
1082     }
1083     } else {
1084     if( ISSET(tcp->flags,FIN) ) {
1085     D(bug("<%d> No SYN but FIN on a closed socket.\r\n", t));
1086     free_socket(t);
1087     } else {
1088     D(bug("<%d> No SYN on a closed socket. resetting.\r\n", t));
1089     free_socket(t);
1090     }
1091     }
1092     break;
1093     case LISTEN:
1094     // handled in connect callback
1095     break;
1096     case SYN_SENT:
1097     if( ISSET(tcp->flags,SYN) && ISSET(tcp->flags,ACK) ) {
1098     sockets[t].seq_in = ntohl(tcp->seq) + 1;
1099     tcp_reply( ACK, t );
1100     sockets[t].state = ESTABLISHED;
1101     initiate_read = true;
1102     sockets[t].accept_more_data_from_mac = true;
1103     sockets[t].time_wait = 0;
1104     } else if( ISSET(tcp->flags,SYN) ) {
1105     sockets[t].seq_in = ntohl(tcp->seq) + 1;
1106     tcp_reply( ACK|SYN, t );
1107     sockets[t].seq_out++;
1108     sockets[t].state = SYN_RCVD;
1109     sockets[t].time_wait = 0;
1110     } else if( ISSET(tcp->flags,ACK) ) {
1111     // What was the bright idea here.
1112     D(bug("<%d> State is SYN_SENT, but got only ACK from Mac??\r\n", t));
1113     sockets[t].state = FINWAIT_2;
1114     sockets[t].time_wait = 0;
1115     }
1116     break;
1117     case SYN_RCVD:
1118     if( ISSET(tcp->flags,ACK) ) {
1119     sockets[t].state = ESTABLISHED;
1120     handle_data = true;
1121     initiate_read = true;
1122     sockets[t].accept_more_data_from_mac = true;
1123     }
1124     break;
1125     case ESTABLISHED:
1126     if( ISSET(tcp->flags,FIN) ) {
1127     sockets[t].seq_in++;
1128     tcp_reply( ACK, t );
1129     _shutdown( sockets[t].s, SD_SEND );
1130     sockets[t].state = CLOSE_WAIT;
1131     }
1132     handle_data = true;
1133     break;
1134     case CLOSE_WAIT:
1135     // handled in tcp_read_completion
1136     break;
1137     case LAST_ACK:
1138     if( ISSET(tcp->flags,ACK) ) {
1139     D(bug("<%d> LAST_ACK received, socket closed\r\n", t));
1140     free_socket( t );
1141     }
1142     break;
1143     case FINWAIT_1:
1144     if( ISSET(tcp->flags,FIN) && ISSET(tcp->flags,ACK) ) {
1145     sockets[t].seq_in++;
1146     tcp_reply( ACK, t );
1147     if(sockets[t].remote_closed) {
1148     _closesocket(sockets[t].s);
1149     sockets[t].s = INVALID_SOCKET;
1150     } else {
1151     _shutdown( sockets[t].s, SD_SEND );
1152     }
1153     sockets[t].state = TIME_WAIT;
1154     sockets[t].time_wait = GetTickCount() + 2 * sockets[t].msl;
1155     } else if( ISSET(tcp->flags,FIN) ) {
1156     sockets[t].seq_in++;
1157     tcp_reply( ACK, t );
1158     if(sockets[t].remote_closed) {
1159     _closesocket(sockets[t].s);
1160     sockets[t].s = INVALID_SOCKET;
1161     } else {
1162     _shutdown( sockets[t].s, SD_SEND );
1163     }
1164     sockets[t].state = CLOSING;
1165     } else if( ISSET(tcp->flags,ACK) ) {
1166     sockets[t].state = FINWAIT_2;
1167     }
1168     break;
1169     case FINWAIT_2:
1170     if( ISSET(tcp->flags,FIN) ) {
1171     sockets[t].seq_in++;
1172     tcp_reply( ACK, t );
1173     if(sockets[t].remote_closed) {
1174     _closesocket(sockets[t].s);
1175     sockets[t].s = INVALID_SOCKET;
1176     } else {
1177     _shutdown( sockets[t].s, SD_SEND );
1178     }
1179     sockets[t].state = TIME_WAIT;
1180     sockets[t].time_wait = GetTickCount() + 2 * sockets[t].msl;
1181     }
1182     break;
1183     case CLOSING:
1184     if( ISSET(tcp->flags,ACK) ) {
1185     sockets[t].state = TIME_WAIT;
1186     sockets[t].time_wait = GetTickCount() + 2 * sockets[t].msl;
1187     }
1188     break;
1189     case TIME_WAIT:
1190     // Catching stray packets: wait MSL * 2 seconds, -> CLOSED
1191     // Timer already set since we might not get here at all.
1192     // I'm using exceptionally low MSL value (5 secs).
1193     D(bug("<%d> time wait, datagram discarded\r\n", t));
1194     break;
1195     }
1196    
1197     // The "t" descriptor may already be freed. However, it's safe
1198     // to peek the state value inside the critical section.
1199     D(bug("<%d> State machine end = %s\r\n", t, STATENAME(sockets[t].state)));
1200    
1201     D(bug("<%d> handle_data=%d, initiate_read=%d\r\n", t, handle_data, initiate_read));
1202    
1203     if( handle_data && dlen && sockets[t].accept_more_data_from_mac ) {
1204     if( sockets[t].seq_in != ntohl(tcp->seq) ) {
1205     D(bug("<%d> dropping duplicate datagram seq=%d, expected=%d\r\n", t, ntohl(tcp->seq), sockets[t].seq_in));
1206     } else {
1207     set_ttl( t, tcp->ip.ttl );
1208    
1209     struct sockaddr_in to;
1210     memset( &to, 0, sizeof(to) );
1211     to.sin_family = AF_INET;
1212     to.sin_port = tcp->dest_port;
1213     to.sin_addr.s_addr = tcp->ip.dest;
1214    
1215     D(bug("<%d> sending %d bytes to remote host\r\n", t, dlen));
1216    
1217     sockets[t].accept_more_data_from_mac = false;
1218    
1219     if( dlen > MAX_SEGMENT_SIZE ) {
1220     D(bug("<%d> IMPOSSIBLE: b_send() dropped %d bytes! \r\n", t, dlen-MAX_SEGMENT_SIZE));
1221     dlen = MAX_SEGMENT_SIZE;
1222     }
1223    
1224     memcpy( sockets[t].buffers_write[0].buf, data, dlen );
1225    
1226     sockets[t].buffers_write[0].len = dlen;
1227     sockets[t].bytes_remaining_to_send = dlen;
1228     sockets[t].bytes_to_send = dlen;
1229    
1230     bool send_now = false;
1231     if( ISSET(tcp->flags,PSH) ) {
1232     send_now = true;
1233     } else {
1234     // todo -- delayed send
1235     send_now = true;
1236     }
1237    
1238     if(send_now) {
1239    
1240     // Patch ftp server or client address if needed.
1241    
1242     int lst = 1;
1243     bool is_pasv;
1244     uint16 ftp_data_port = 0;
1245    
1246     if(ftp_is_ftp_port(sockets[t].src_port)) {
1247     // Local ftp server may be entering to passive mode.
1248     is_pasv = true;
1249     ftp_parse_port_command(
1250     sockets[t].buffers_write[0].buf,
1251     dlen,
1252     ftp_data_port,
1253     is_pasv
1254     );
1255     } else if(ftp_is_ftp_port(sockets[t].dest_port)) {
1256     // Local ftp client may be using port command.
1257     is_pasv = false;
1258     ftp_parse_port_command(
1259     sockets[t].buffers_write[0].buf,
1260     dlen,
1261     ftp_data_port,
1262     is_pasv
1263     );
1264     }
1265    
1266     if(ftp_data_port) {
1267     D(bug("<%d> ftp %s command detected, port %d\r\n", t, (is_pasv ? "SERVER PASV REPLY" : "CLIENT PORT"), ftp_data_port ));
1268    
1269     // Note: for security reasons, only allow incoming connection from sockets[t].ip_dest
1270     lst = alloc_listen_socket( ftp_data_port, sockets[t].ip_dest, 0/*iface*/, true );
1271    
1272     if(lst < 0) {
1273     D(bug("<%d> no more free slots\r\n", t));
1274     } else {
1275     // First start listening (need to know the local name later)
1276     tcp_start_listen( lst );
1277    
1278     // When t is closed, lst must be closed too.
1279     sockets[t].child = lst;
1280     l_sockets[lst].parent = t;
1281    
1282     // Find out the local name
1283     struct sockaddr_in name;
1284     int namelen = sizeof(name);
1285     memset( &name, 0, sizeof(name) );
1286     if( _getsockname( sockets[t].s, (struct sockaddr *)&name, &namelen ) == SOCKET_ERROR ) {
1287     D(bug("_getsockname() failed, error=%d\r\n", _WSAGetLastError() ));
1288     }
1289    
1290     ftp_modify_port_command(
1291     sockets[t].buffers_write[0].buf,
1292     dlen,
1293     MAX_SEGMENT_SIZE,
1294     ntohl(name.sin_addr.s_addr),
1295     ftp_data_port,
1296     is_pasv
1297     );
1298    
1299     sockets[t].buffers_write[0].len = dlen;
1300     sockets[t].bytes_remaining_to_send = dlen;
1301     // Do not change "bytes_to_send" field as it is used for ack calculation
1302     }
1303     } // end of ftp patch
1304    
1305     if(!b_send(t)) {
1306     // on error, close the ftp data listening socket if one was created
1307     if(lst >= 0) {
1308     D(bug("[%d] closing listening port %d after write error\r\n", t, l_sockets[lst].port));
1309     _closesocket( l_sockets[lst].s );
1310     l_sockets[lst].s = INVALID_SOCKET;
1311     l_sockets[lst].port = 0;
1312     l_sockets[lst].ip = 0;
1313     l_sockets[lst].parent = -1;
1314     sockets[t].child = -1;
1315     }
1316     }
1317     }
1318     }
1319     }
1320    
1321     if(initiate_read) {
1322     if(!b_recfrom(t)) {
1323     // post icmp error message
1324     }
1325     }
1326     }
1327    
1328     LeaveCriticalSection( &tcp_section );
1329     }
1330    
1331     /*
1332     - Dispatch remote close and connect events.
1333     - Expire time-waits.
1334     - Handle resend timeouts.
1335     */
1336     static WINAPI unsigned int tcp_connect_close_thread(void *arg)
1337     {
1338     WSAEVENT wait_handles[MAX_SOCKETS];
1339    
1340     for( int i=0; i<MAX_SOCKETS; i++ ) {
1341     wait_handles[i] = sockets[i].ev;
1342     }
1343    
1344     while(!is_router_shutting_down) {
1345     DWORD ret = WaitForMultipleObjects(
1346     MAX_SOCKETS,
1347     wait_handles,
1348     FALSE,
1349     200
1350     );
1351     if(is_router_shutting_down) break;
1352    
1353     EnterCriticalSection( &tcp_section );
1354     if( ret >= WAIT_OBJECT_0 && ret < WAIT_OBJECT_0 + MAX_SOCKETS ) {
1355     const int t = ret - WAIT_OBJECT_0;
1356    
1357     D(bug("<%d> Event %d\r\n", t, ret));
1358    
1359     if(sockets[t].in_use) {
1360     WSANETWORKEVENTS what;
1361    
1362     if( _WSAEnumNetworkEvents( sockets[t].s, sockets[t].ev, &what ) != SOCKET_ERROR ) {
1363     if( what.lNetworkEvents & FD_CONNECT ) {
1364     if( what.iErrorCode[FD_CONNECT_BIT] == 0 ) {
1365     D(bug("<%d> Connect ok\r\n", t));
1366     tcp_connect_callback(t);
1367     } else {
1368     D(bug("<%d> Connect error=%d\r\n", t, what.iErrorCode[FD_CONNECT_BIT]));
1369     // Post icmp error
1370     }
1371     } else if( what.lNetworkEvents & FD_CLOSE ) {
1372     if( what.iErrorCode[FD_CLOSE_BIT] == 0 ) {
1373     D(bug("<%d> graceful close, state = %s\r\n", t, STATENAME(sockets[t].state)));
1374     } else {
1375     D(bug("<%d> abortive close, state = %s, code=%d\r\n", t, STATENAME(sockets[t].state), what.iErrorCode[FD_CLOSE_BIT]));
1376     }
1377     sockets[t].remote_closed = true;
1378     }
1379     } else {
1380     int err = _WSAGetLastError();
1381     if( err == WSAENOTSOCK ) {
1382     D(bug("<%d> WSAEnumNetworkEvents: socket is already closed\r\n", t));
1383     } else {
1384     D(bug("<%d> WSAEnumNetworkEvents failed with error code %d, freeing slot\r\n", t, err));
1385     free_socket( t );
1386     }
1387     }
1388     }
1389     _WSAResetEvent( sockets[t].ev );
1390     } else {
1391     static int interval = 5;
1392     if( !--interval ) {
1393     for( int i=0; i<MAX_SOCKETS; i++ ) {
1394     if(sockets[i].in_use) {
1395     DWORD tmw = sockets[i].time_wait;
1396     DWORD stl = sockets[i].stream_to_mac_stalled_until;
1397     if( tmw ) {
1398     if( GetTickCount() >= tmw ) {
1399     if( sockets[i].state == SYN_SENT ) {
1400     /*
1401     A very basic SYN flood protection. Note that watching
1402     SYN_SENT instead of SYN_RCVD, because the state codes are
1403     from the point of view of the Mac-Router interface, not Router-Remote.
1404     */
1405     D(bug("<%d> SYN_SENT time-out expired\r\n", i));
1406     } else {
1407     D(bug("<%d> TIME_WAIT expired\r\n", i));
1408     }
1409     free_socket( i );
1410     }
1411     } else if( stl ) {
1412     if( sockets[i].state == ESTABLISHED ) {
1413     if( GetTickCount() >= stl ) {
1414     D(bug("<%d> RESEND timeout expired\r\n", i));
1415     sockets[i].stream_to_mac_stalled_until = GetTickCount() + sockets[i].resend_timeout;
1416     send_buffer( i, true );
1417     }
1418     } else {
1419     sockets[i].stream_to_mac_stalled_until = 0;
1420     }
1421     }
1422     }
1423     }
1424     interval = 5;
1425     }
1426     }
1427     LeaveCriticalSection( &tcp_section );
1428     }
1429     return 0;
1430     }
1431    
1432     static WINAPI unsigned int tcp_listen_thread(void *arg)
1433     {
1434     WSAEVENT wait_handles[MAX_SOCKETS];
1435    
1436     for( int i=0; i<MAX_SOCKETS; i++ ) {
1437     wait_handles[i] = l_sockets[i].ev;
1438     tcp_start_listen( i );
1439     }
1440    
1441     while(!is_router_shutting_down) {
1442     DWORD ret = WaitForMultipleObjects(
1443     MAX_SOCKETS,
1444     wait_handles,
1445     FALSE,
1446     200
1447     );
1448    
1449     if(is_router_shutting_down) break;
1450    
1451     EnterCriticalSection( &tcp_section );
1452     if( ret >= WAIT_OBJECT_0 && ret < WAIT_OBJECT_0 + MAX_SOCKETS ) {
1453     const int lst = ret - WAIT_OBJECT_0;
1454    
1455     D(bug("[%d] connection attempt to port %d\r\n", lst, l_sockets[lst].port));
1456    
1457     WSANETWORKEVENTS what;
1458    
1459     if( _WSAEnumNetworkEvents( l_sockets[lst].s, l_sockets[lst].ev, &what ) != SOCKET_ERROR ) {
1460     if( what.lNetworkEvents & FD_ACCEPT ) {
1461     if( what.iErrorCode[FD_ACCEPT_BIT] == 0 ) {
1462     D(bug("[%d] Connect ok\r\n", lst));
1463     tcp_accept_callback(lst);
1464     } else {
1465     D(bug("[%d] Connect error=%d\r\n", lst, what.iErrorCode[FD_ACCEPT_BIT]));
1466     // Post icmp error
1467     }
1468     }
1469     }
1470    
1471     // close on errors too
1472     if(l_sockets[lst].once) {
1473     D(bug("[%d] once mode: closing listening socket on port %d\r\n", lst, l_sockets[lst].port));
1474     if( _closesocket( l_sockets[lst].s ) == SOCKET_ERROR ) {
1475     int err = _WSAGetLastError();
1476     D(bug("[%d] close error %d\r\n", lst, err));
1477     }
1478    
1479     l_sockets[lst].s = INVALID_SOCKET;
1480     l_sockets[lst].port = 0;
1481     l_sockets[lst].ip = 0;
1482    
1483     int t = l_sockets[lst].parent;
1484     if( t >= 0 ) {
1485     sockets[t].child = -1;
1486     }
1487     l_sockets[lst].parent = -1;
1488     }
1489    
1490     _WSAResetEvent( l_sockets[lst].ev );
1491     }
1492     LeaveCriticalSection( &tcp_section );
1493     }
1494     return 0;
1495     }
1496    
1497     /*
1498     tcp_port=<port> [,<interface to bind>]
1499     tcp_port=21,192.168.0.1
1500     */
1501    
1502     static void init_tcp_listen_ports()
1503     {
1504     int32 index = 0;
1505     const char *port_str;
1506     while ((port_str = PrefsFindString("tcp_port", index++)) != NULL) {
1507     uint32 iface = 0;
1508     char *if_str = strchr(port_str,',');
1509     if(if_str) {
1510     *if_str++ = 0;
1511     uint32 if_net = _inet_addr( if_str );
1512     if(if_net == INADDR_NONE) if_net = INADDR_ANY;
1513     iface = ntohl( if_net );
1514     }
1515     uint16 port = (uint16)strtoul( port_str, 0, 0 );
1516     if( port ) {
1517     uint32 ip = 0;
1518     bool once = false;
1519     alloc_listen_socket( port, ip, iface, once );
1520     }
1521     }
1522     }
1523    
1524     static HANDLE tcp_handle = 0;
1525     static HANDLE tcp_l_handle = 0;
1526    
1527     void init_tcp()
1528     {
1529     InitializeCriticalSection( &tcp_section );
1530    
1531     for( int i=0; i<MAX_SOCKETS; i++ ) {
1532     memset( &sockets[i], 0, sizeof(tcp_socket_t) );
1533     sockets[i].s = INVALID_SOCKET;
1534     sockets[i].state = CLOSED;
1535     sockets[i].ev = _WSACreateEvent();
1536     sockets[i].child = -1;
1537     }
1538    
1539     for( int i=0; i<MAX_SOCKETS; i++ ) {
1540     memset( &l_sockets[i], 0, sizeof(tcp_listening_socket_t) );
1541     l_sockets[i].s = INVALID_SOCKET;
1542     l_sockets[i].ev = _WSACreateEvent();
1543     l_sockets[i].parent = -1;
1544     /*
1545     l_sockets[i].port = 0;
1546     l_sockets[i].ip = 0;
1547     l_sockets[i].iface = 0;
1548     l_sockets[i].once = false;
1549     */
1550     }
1551    
1552     init_tcp_listen_ports();
1553    
1554     unsigned int tcp_tid;
1555     tcp_handle = (HANDLE)_beginthreadex( 0, 0, tcp_connect_close_thread, 0, 0, &tcp_tid );
1556    
1557     unsigned int tcp_l_tid;
1558     tcp_l_handle = (HANDLE)_beginthreadex( 0, 0, tcp_listen_thread, 0, 0, &tcp_l_tid );
1559     }
1560    
1561     void final_tcp()
1562     {
1563     D(bug("closing all tcp sockets\r\n"));
1564     for( int i=0; i<MAX_SOCKETS; i++ ) {
1565     if(sockets[i].s != INVALID_SOCKET) {
1566     D(bug(" closing socket %d\r\n", i));
1567     }
1568     free_socket( i );
1569     if(sockets[i].buffers_write[0].buf) {
1570     delete [] sockets[i].buffers_write[0].buf;
1571     sockets[i].buffers_write[0].buf = 0;
1572     }
1573     if(sockets[i].buffers_read[0].buf) {
1574     delete [] sockets[i].buffers_read[0].buf;
1575     sockets[i].buffers_read[0].buf = 0;
1576     }
1577     }
1578    
1579     D(bug("closing all tcp listening socket\r\n"));
1580     for( int i=0; i<MAX_SOCKETS; i++ ) {
1581     if(l_sockets[i].s != INVALID_SOCKET) {
1582     D(bug(" closing listening socket %d\r\n", i));
1583     _closesocket( l_sockets[i].s );
1584     l_sockets[i].s = INVALID_SOCKET;
1585     }
1586     }
1587    
1588     // The router module has already set the shutdown flag.
1589     WaitForSingleObject( tcp_handle, INFINITE );
1590     WaitForSingleObject( tcp_l_handle, INFINITE );
1591    
1592     for( int i=0; i<MAX_SOCKETS; i++ ) {
1593     if(sockets[i].ev != WSA_INVALID_EVENT) {
1594     _WSACloseEvent(sockets[i].ev);
1595     sockets[i].ev = WSA_INVALID_EVENT;
1596     }
1597     }
1598     for( int i=0; i<MAX_SOCKETS; i++ ) {
1599     if(l_sockets[i].ev != WSA_INVALID_EVENT) {
1600     _WSACloseEvent(l_sockets[i].ev);
1601     l_sockets[i].ev = WSA_INVALID_EVENT;
1602     }
1603     }
1604    
1605     DeleteCriticalSection( &tcp_section );
1606     }