ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/Windows/router/tcp.cpp
Revision: 1.2
Committed: 2004-12-26T23:24:33Z (19 years, 10 months ago) by gbeauche
Branch: MAIN
CVS Tags: nigel-build-19, nigel-build-17
Changes since 1.1: +1 -1 lines
Log Message:
cross compile fixes

File Contents

# Content
1 /*
2 * tcp.cpp - ip router
3 *
4 * Basilisk II (C) 1997-2001 Christian Bauer
5 *
6 * Windows platform specific code copyright (C) Lauri Pesonen
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22
23 /*
24 * Features implemented:
25 * state machine, flow control, sequence numbers, RST/SYN/FIN/ACK/PSH
26 *
27 * Features not implemented:
28 * oob data, urgent pointer, window sliding, some options
29 * "Half-Nagle" implementation is a bit weird (mac-router interface; winsock has it on by default)
30 *
31 *
32 * All possible tcp state machine transitions:
33 *
34 * CLOSED -> LISTEN passive open
35 * CLOSED -> SYN_SENT active open SYN->
36 *
37 * LISTEN -> SYN_SENT send data SYN->
38 * LISTEN -> SYN_RCVD ->SYN SYN+ACK->
39 *
40 * SYN_SENT -> SYN_RCVD ->SYN SYN+ACK->
41 * SYN_SENT -> ESTABLISHED ->SYN+ACK ACK->
42 * SYN_SENT -> CLOSED close/timeout
43 *
44 * SYN_RCVD -> CLOSED timeout RST->
45 * SYN_RCVD -> LISTEN ->RST
46 * SYN_RCVD -> ESTABLISHED ->ACK
47 * SYN_RCVD -> FINWAIT_1 close FIN->
48 *
49 * ESTABLISHED -> FINWAIT_1 close FIN->
50 * ESTABLISHED -> CLOSE_WAIT ->FIN ACK->
51 *
52 * CLOSE_WAIT -> LAST_ACK close FIN->
53 *
54 * LAST_ACK -> CLOSED ->ACK
55 *
56 * FINWAIT_1 -> CLOSING ->FIN ACK->
57 * FINWAIT_1 -> FINWAIT_2 ->ACK
58 * FINWAIT_1 -> TIME_WAIT ->FIN+ACK ACK->
59 *
60 * FINWAIT_2 -> TIME_WAIT ->FIN ACK->
61 *
62 * CLOSING -> TIME_WAIT ->ACK
63 *
64 * TIME_WAIT -> CLOSED timeout (2*msl)
65 *
66 */
67
68 #include "sysdeps.h"
69
70 #define WIN32_LEAN_AND_MEAN
71 #include <windows.h>
72 #include <process.h>
73
74 #include "cpu_emulation.h"
75 #include "ws2tcpip.h"
76 #include "ether_windows.h"
77 #include "ether.h"
78 #include "prefs.h"
79 #include "router.h"
80 #include "router_types.h"
81 #include "dynsockets.h"
82 #include "iphelp.h"
83 #include "tcp.h"
84 #include "dump.h"
85 #include "mib/interfaces.h"
86 #include "ftp.h"
87
88 #if DEBUG
89 #pragma optimize("",off)
90 #endif
91
92 #include "debug.h"
93
94 // If you need more, use multiple threads.
95 #define MAX_SOCKETS MAXIMUM_WAIT_OBJECTS
96
97 // If true, always sends the PSH tcp flag with data.
98 // Otherwise only when a full buffer was received.
99 #define PUSH_ALWAYS 0
100
101 // In milliseconds. A TCP implementation should implement
102 // this dynamically, adapting the timeout value to match to the
103 // averaged packet round-trip time.
104 #define RESEND_TIMEOUT 750
105
106 // Just time out incoming connections after 5 secs if Mac has no time to reply
107 // No backlogs.
108 #define SYN_FLOOD_PROTECTION_TIMEOUT 5000
109
110 const int MAX_SEGMENT_SIZE = 1460;
111
112 // Shorthands
113 #define ISSET(f,x) ( ((f) & (x)) != 0 )
114 #define ISCLEAR(f,x) ( ((f) & (x)) == 0 )
115
116 // Local aliases
117 #define URG tcp_flags_URG
118 #define ACK tcp_flags_ACK
119 #define PSH tcp_flags_PSH
120 #define RST tcp_flags_RST
121 #define SYN tcp_flags_SYN
122 #define FIN tcp_flags_FIN
123
124 // Local aliases
125 #define CLOSED tcp_state_closed
126 #define LISTEN tcp_state_listen
127 #define SYN_SENT tcp_state_syn_sent
128 #define SYN_RCVD tcp_state_syn_rcvd
129 #define ESTABLISHED tcp_state_established
130 #define CLOSE_WAIT tcp_state_close_wait
131 #define LAST_ACK tcp_state_last_ack
132 #define FINWAIT_1 tcp_state_finwait_1
133 #define FINWAIT_2 tcp_state_finwait_2
134 #define CLOSING tcp_state_closing
135 #define TIME_WAIT tcp_state_time_wait
136
137 // For debugging only
138 static const char *_tcp_state_name[] = {
139 "CLOSED",
140 "LISTEN",
141 "SYN_SENT",
142 "SYN_RCVD",
143 "ESTABLISHED",
144 "CLOSE_WAIT",
145 "LAST_ACK",
146 "FINWAIT_1",
147 "FINWAIT_2",
148 "CLOSING",
149 "TIME_WAIT"
150 };
151 #define STATENAME(i) _tcp_state_name[i]
152
153 static CRITICAL_SECTION tcp_section;
154
155 typedef struct {
156 SOCKET s;
157 int state;
158
159 uint32 ip_src; // "source" is the mac, dest is the remote host,
160 uint32 ip_dest; // no matter who opened the connection.
161 uint16 src_port; // all in host byte order.
162 uint16 dest_port;
163
164 struct sockaddr_in from; // remote host address, network byte order.
165 int from_len;
166
167 // note: no true windows sliding, only one buffer.
168 WSABUF buffers_read[1]; // data from remote host to Mac
169 DWORD buffer_count_read;
170 DWORD bytes_received;
171 DWORD flags_read;
172 WSAOVERLAPPED overlapped_read;
173
174 WSABUF buffers_write[1]; // data from Mac to remote host
175 DWORD buffer_count_write;
176 DWORD bytes_written;
177 DWORD flags_write;
178 WSAOVERLAPPED overlapped_write;
179
180 bool remote_closed; // remote will not send any more data
181 bool accept_more_data_from_mac; // are we ready to accept more data from mac
182
183 uint32 seq_in; // will ack this mac sequence number
184 uint32 seq_out; // next sequence number to mac (unless a resend is needed)
185 uint32 mac_ack; // mac has acked this byte count. can be used to determined when to send some more data
186
187 uint32 bytes_to_send; // total send block size
188 uint32 bytes_remaining_to_send; // unsent byte count
189
190 uint16 mac_window; // mac tcp receive window, slides according to the window principle
191 uint16 our_window; // not really used
192 uint16 mac_mss; // maximum segment size that mac reported at SYN handshaking
193
194 // resend info
195 uint32 last_seq_out; // remember last packet seq number if a resend is needed
196 uint32 resend_timeout; // currently set t0 0.75 secs but not updated
197 uint32 stream_to_mac_stalled_until; // tick count indicating resend time
198
199 DWORD time_wait; // do a graceful close after MSL*2
200 DWORD msl;
201
202 int child;
203
204 WSAEVENT ev; // used to signal remote-initiated close and host-initiated connect.
205
206 bool in_use;
207 } tcp_socket_t;
208
209 static tcp_socket_t sockets[MAX_SOCKETS];
210
211 typedef struct {
212 SOCKET s;
213 uint16 port;
214 uint32 ip;
215 uint32 iface;
216 bool once;
217 int parent;
218 WSAEVENT ev;
219 } tcp_listening_socket_t;
220
221 static tcp_listening_socket_t l_sockets[MAX_SOCKETS];
222
223 static void CALLBACK tcp_read_completion(
224 DWORD error,
225 DWORD bytes_read,
226 LPWSAOVERLAPPED lpOverlapped,
227 DWORD flags
228 );
229
230 static void CALLBACK tcp_write_completion(
231 DWORD error,
232 DWORD bytes_read,
233 LPWSAOVERLAPPED lpOverlapped,
234 DWORD flags
235 );
236
237 // socket utilities assume that the critical section has already been entered.
238 static void free_socket( const int t )
239 {
240 _WSAResetEvent( sockets[t].ev );
241 if(sockets[t].s != INVALID_SOCKET) {
242 _closesocket( sockets[t].s );
243 sockets[t].s = INVALID_SOCKET;
244 }
245 sockets[t].state = CLOSED;
246 sockets[t].stream_to_mac_stalled_until = 0;
247 sockets[t].in_use = false;
248 sockets[t].time_wait = 0;
249
250 // if there was an attached listening socket (ftp), close it.
251 int lst = sockets[t].child;
252 if( lst >= 0 ) {
253 if(l_sockets[lst].s != INVALID_SOCKET) {
254 D(bug(" closing listening socket %d\r\n", lst));
255 _closesocket( l_sockets[lst].s );
256 l_sockets[lst].s = INVALID_SOCKET;
257 }
258 l_sockets[lst].port = 0;
259 l_sockets[lst].parent = -1;
260 }
261 sockets[t].child = -1;
262 }
263
264 static int alloc_socket()
265 {
266 static int last_allocated_socket = -1;
267
268 int i = last_allocated_socket;
269 for( int j=0; j<MAX_SOCKETS; j++ ) {
270 if( ++i >= MAX_SOCKETS ) i = 0;
271 if( !sockets[i].in_use ) {
272 D(bug("<%d> Socket allocated\r\n", i));
273
274 last_allocated_socket = i;
275 sockets[i].in_use = true;
276
277 sockets[i].s = INVALID_SOCKET;
278 sockets[i].state = CLOSED;
279 sockets[i].remote_closed = false;
280
281 sockets[i].accept_more_data_from_mac = false;
282
283 sockets[i].ip_src = sockets[i].ip_dest = 0;
284 // sockets[i].src_port = sockets[i].dest_port = 0;
285
286 memset( &sockets[i].overlapped_read, 0, sizeof(sockets[i].overlapped_read) );
287 sockets[i].overlapped_read.hEvent = (HANDLE)i;
288 memset( &sockets[i].overlapped_write, 0, sizeof(sockets[i].overlapped_write) );
289 sockets[i].overlapped_write.hEvent = (HANDLE)i;
290
291 sockets[i].bytes_received = 0;
292 sockets[i].bytes_written = 0;
293
294 sockets[i].flags_read = 0;
295 sockets[i].flags_write = 0;
296
297 // sockets[i].from_len = sizeof(struct sockaddr_in);
298 // memset( &sockets[i].from, 0, sizeof(sockets[i].from) );
299 // sockets[i].from.sin_family = AF_INET;
300
301 sockets[i].buffer_count_read = 1;
302 sockets[i].buffers_read[0].len = MAX_SEGMENT_SIZE;
303 if(!sockets[i].buffers_read[0].buf) {
304 sockets[i].buffers_read[0].buf = new char [sockets[i].buffers_read[0].len];
305 }
306
307 sockets[i].buffer_count_write = 1;
308 sockets[i].buffers_write[0].len = MAX_SEGMENT_SIZE;
309 if(!sockets[i].buffers_write[0].buf) {
310 sockets[i].buffers_write[0].buf = new char [sockets[i].buffers_write[0].len];
311 }
312
313 sockets[i].mac_window = MAX_SEGMENT_SIZE; // updated for all mac datagrams
314 sockets[i].our_window = MAX_SEGMENT_SIZE; // should use about 8-16 kB, really
315 sockets[i].mac_mss = 0; // not known yet
316
317 sockets[i].time_wait = 0;
318 sockets[i].msl = 5000L; // The round-trip time can be hard to estimate.
319
320 sockets[i].seq_in = 0;
321 sockets[i].seq_out = 0x00000001;
322 sockets[i].mac_ack = 0;
323 sockets[i].stream_to_mac_stalled_until = 0;
324
325 sockets[i].resend_timeout = RESEND_TIMEOUT;
326
327 sockets[i].child = -1;
328
329 break;
330 }
331 }
332 if(i == MAX_SOCKETS) {
333 D(bug("Out of free sockets\r\n"));
334 i = -1;
335 }
336 return i;
337 }
338
339 static int alloc_new_socket( const uint16 src_port, const uint16 dest_port, const uint32 ip_dest )
340 {
341 int t = alloc_socket();
342
343 if(t >= 0) {
344 sockets[t].s = _socket( AF_INET, SOCK_STREAM, IPPROTO_TCP );
345 if(sockets[t].s == INVALID_SOCKET) {
346 free_socket( t );
347 t = -1;
348 } else {
349 sockets[t].src_port = src_port;
350 sockets[t].dest_port = dest_port;
351
352 sockets[t].from_len = sizeof(sockets[t].from);
353 memset( &sockets[t].from, 0, sockets[t].from_len );
354 sockets[t].from.sin_family = AF_INET;
355 sockets[t].from.sin_port = htons(dest_port);
356 sockets[t].from.sin_addr.s_addr = htonl(ip_dest);
357
358 struct sockaddr_in to;
359 memset( &to, 0, sizeof(to) );
360 to.sin_family = AF_INET;
361
362 if( _bind ( sockets[t].s, (const struct sockaddr *)&to, sizeof(to) ) == 0 ) {
363 D(bug("<%d> socket bound\r\n", t));
364 } else {
365 if( _WSAGetLastError() == WSAEINPROGRESS ) {
366 D(bug("<%d> bind: a blocking call is in progress.\r\n", t));
367 } else {
368 D(bug("<%d> bind failed with error code %d\r\n", t, _WSAGetLastError()));
369 }
370 free_socket( t );
371 t = -1;
372 }
373 }
374 }
375 return t;
376 }
377
378 static int get_socket_index( const uint16 src_port, const uint16 dest_port )
379 {
380 for( int i=0; i<MAX_SOCKETS; i++ ) {
381 if(sockets[i].in_use && sockets[i].src_port == src_port && sockets[i].dest_port == dest_port ) {
382 return i;
383 }
384 }
385 return -1;
386 }
387
388 static int get_socket_index( const uint16 src_port )
389 {
390 for( int i=0; i<MAX_SOCKETS; i++ ) {
391 if(sockets[i].in_use && sockets[i].src_port == src_port ) {
392 return i;
393 }
394 }
395 return -1;
396 }
397
398 static int find_socket( const uint16 src_port, const uint16 dest_port )
399 {
400 int i = get_socket_index( src_port, dest_port );
401 if( i < 0 ) {
402 i = get_socket_index( src_port );
403 if( i >= 0 ) {
404 if( sockets[i].s == INVALID_SOCKET ) {
405 D(bug("find_socket reusing slot %d...\r\n", i));
406 sockets[i].in_use = false;
407 } else {
408 D(bug("find_socket forcing close %d...\r\n", i));
409 free_socket( i );
410 }
411 i = -1;
412 }
413 }
414
415 D(bug("<%d> find_socket(%d,%d): %s\r\n", i, src_port, dest_port, i>=0 ? "found" : "not found"));
416
417 return i;
418 }
419
420 static int alloc_listen_socket( const uint16 port, const uint32 ip, const uint32 iface, const bool once )
421 {
422 static int last_allocated_socket = -1;
423
424 int i = last_allocated_socket;
425
426 for( int j=0; j<MAX_SOCKETS; j++ ) {
427 if( ++i >= MAX_SOCKETS ) i = 0;
428 if( l_sockets[i].port == 0 ) {
429 D(bug("[%d] Slot allocated for listening port %d\r\n", i, port));
430 l_sockets[i].port = port;
431 l_sockets[i].ip = ip;
432 l_sockets[i].iface = iface;
433 l_sockets[i].once = once;
434 l_sockets[i].parent = -1;
435 last_allocated_socket = i;
436 _WSAResetEvent( l_sockets[i].ev );
437 return i;
438 }
439 }
440 return -1;
441 }
442
443 static void tcp_start_listen( const int i )
444 {
445 if( l_sockets[i].port ) {
446 uint32 iface = l_sockets[i].iface;
447
448 D(bug("[%d] binding to interface 0x%08X\r\n", i, iface));
449
450 l_sockets[i].s = _socket( AF_INET, SOCK_STREAM, IPPROTO_TCP );
451 if(l_sockets[i].s != INVALID_SOCKET) {
452 struct sockaddr_in to;
453 memset( &to, 0, sizeof(to) );
454 to.sin_family = AF_INET;
455 to.sin_port = htons( l_sockets[i].port );
456 to.sin_addr.s_addr = htonl( iface );
457
458 if( _bind ( l_sockets[i].s, (const struct sockaddr *)&to, sizeof(to) ) == 0 )
459 {
460 D(bug("[%d] socket bound to port %d on interface 0x%08X\r\n", i, l_sockets[i].port, iface));
461 if( _listen( l_sockets[i].s, SOMAXCONN ) == SOCKET_ERROR ) {
462 D(bug("[%d] listen() failed with error code %d\r\n", i, _WSAGetLastError()));
463 } else {
464 D(bug("[%d] listening to port %d\r\n", i, l_sockets[i].port));
465 _WSAResetEvent( l_sockets[i].ev );
466 if( SOCKET_ERROR == _WSAEventSelect( l_sockets[i].s, l_sockets[i].ev, FD_ACCEPT ) ) {
467 D(bug("[%d] WSAEventSelect() failed with error code %d\r\n", i, _WSAGetLastError()));
468 }
469 }
470 } else {
471 D(bug("[%d] bind to port %d failed with error code %d\r\n", i, l_sockets[i].port, _WSAGetLastError()));
472 }
473 } else {
474 D(bug("[%d] could not create a socket for port %d, error = %d\r\n", i, l_sockets[i].port, _WSAGetLastError()));
475 }
476 }
477 }
478
479 static void set_ttl( const int t, const uint8 ttl )
480 {
481 int _ttl = ttl; // defensive programming, I know VCx
482
483 if(_setsockopt( sockets[t].s, IPPROTO_IP, IP_TTL, (const char *)&_ttl, sizeof(int) ) == SOCKET_ERROR ) {
484 D(bug("<%d> could not set ttl to %d, error=%d\r\n", t, ttl, _WSAGetLastError()));
485 } else {
486 D(bug("<%d> ttl set to %d.\r\n", t, ttl));
487 }
488 }
489
490 static void tcp_reply( const int flags, const int t )
491 {
492 int tcp_size = sizeof(tcp_t);
493
494 tcp_t *tcp = (tcp_t *)malloc( tcp_size );
495 if(tcp) {
496 memcpy( tcp->ip.mac.dest, ether_addr, 6 );
497 memcpy( tcp->ip.mac.src, router_mac_addr, 6 );
498 tcp->ip.mac.type = htons(mac_type_ip4);
499
500 tcp->ip.version = 4;
501 tcp->ip.header_len = 5;
502 tcp->ip.tos = 0;
503 tcp->ip.total_len = htons(tcp_size - sizeof(mac_t));
504 tcp->ip.ident = htons(next_ip_ident_number++);
505 tcp->ip.flags_n_frag_offset = 0;
506 tcp->ip.ttl = 128;
507 tcp->ip.proto = ip_proto_tcp;
508 tcp->ip.src = htonl(sockets[t].ip_dest);
509 tcp->ip.dest = htonl(sockets[t].ip_src);
510 make_ip4_checksum( (ip_t *)tcp );
511
512 D(bug("<%d> Reply: Seq=%d, Ack=%d\r\n", t, sockets[t].seq_out, sockets[t].seq_in));
513
514 tcp->src_port = htons(sockets[t].dest_port);
515 tcp->dest_port = htons(sockets[t].src_port);
516 tcp->seq = htonl(sockets[t].seq_out);
517 tcp->ack = htonl(sockets[t].seq_in);
518 tcp->header_len = (uint8)( 20 << 2 );
519 tcp->flags = flags;
520 tcp->window = htons( sockets[t].our_window );
521 tcp->urgent_ptr = 0;
522 make_tcp_checksum( tcp, tcp_size );
523
524 // dump_bytes( (uint8 *)tcp, tcp_size );
525
526 enqueue_packet( (uint8 *)tcp, tcp_size );
527 free(tcp);
528 }
529 }
530
531 static bool has_mac_read_space( const int t )
532 {
533 uint32 pending_bytes = sockets[t].seq_out - sockets[t].mac_ack;
534 uint32 mac_can_accept_bytes = sockets[t].mac_window - pending_bytes;
535
536 D(bug("<%d> mac_can_accept_bytes = %d\r\n", t, mac_can_accept_bytes));
537
538 // Modified Nagle, effectively disabling window sliding (which I don't support anyway):
539 return pending_bytes == 0;
540
541 // Use more of window bandwidth
542 // Enabling this would require that the buffers seq numbers are stored somewhere
543 // return mac_can_accept_bytes >= sockets[t].buffers_read[0].len;
544 }
545
546 static bool b_recfrom( const int t )
547 {
548 bool result;
549
550 if( !has_mac_read_space(t) ) {
551 D(bug("<%d> read stalled, mac cannot accept any more data\r\n", t));
552
553 sockets[t].stream_to_mac_stalled_until = GetTickCount() + sockets[t].resend_timeout;
554 return true;
555 }
556
557 int ret = _WSARecv(
558 sockets[t].s,
559 sockets[t].buffers_read,
560 sockets[t].buffer_count_read,
561 &sockets[t].bytes_received,
562 &sockets[t].flags_read,
563 &sockets[t].overlapped_read,
564 tcp_read_completion
565 );
566
567 if(ret == SOCKET_ERROR) {
568 int socket_error = _WSAGetLastError();
569 if(socket_error == WSA_IO_PENDING) {
570 D(bug("<%d> WSARecv() i/o pending\r\n", t));
571 result = true;
572 } else {
573 D(bug("<%d> WSARecv() returned error %d\r\n", t, socket_error));
574 result = false;
575 }
576 } else /*if(ret == 0) */ {
577 D(bug("<%d> WSARecv() ok\r\n", t));
578 // Completion routine call is already scheduled.
579 result = true;
580 }
581 return result;
582 }
583
584 static bool b_send( const int t )
585 {
586 int ret = _WSASend(
587 sockets[t].s,
588 sockets[t].buffers_write,
589 sockets[t].buffer_count_write,
590 &sockets[t].bytes_written,
591 sockets[t].flags_write,
592 &sockets[t].overlapped_write,
593 tcp_write_completion
594 );
595
596 bool result;
597 if(ret == SOCKET_ERROR) {
598 int socket_error = _WSAGetLastError();
599 if(socket_error == WSA_IO_PENDING) {
600 D(bug("<%d> WSASend() i/o pending\r\n", t));
601 result = true;
602 } else {
603 D(bug("<%d> WSASend() returned %d\r\n", t, socket_error));
604 result = false;
605 }
606 } else /*if(ret == 0) */ {
607 D(bug("<%d> WSASend() ok\r\n", t));
608 // Completion routine call is already scheduled.
609 result = true;
610 }
611 return result;
612 }
613
614 static void send_buffer( const int t, const bool resending )
615 {
616 if(resending) {
617 if(sockets[t].last_seq_out == 0) {
618 D(bug("<%d> resend failure\r\n", t ));
619 return;
620 }
621 sockets[t].seq_out = sockets[t].last_seq_out;
622 } else {
623 sockets[t].last_seq_out = sockets[t].seq_out;
624 }
625
626 D(bug("<%d> %s data to Mac: Seq=%d, Ack=%d\r\n", t, (resending ? "resending" : "sending"), sockets[t].seq_out, sockets[t].seq_in));
627
628 uint32 bytes_read = sockets[t].bytes_received;
629
630 if( sockets[t].mac_mss && bytes_read > sockets[t].mac_mss ) {
631 D(bug("<%d> impossible: %d bytes to send, Mac mss is only %d\r\n", t, sockets[t].mac_mss && bytes_read, sockets[t].mac_mss));
632 }
633
634 int tcp_size = sizeof(tcp_t) + bytes_read;
635
636 tcp_t *tcp = (tcp_t *)malloc( tcp_size );
637 if(tcp) {
638 // Build MAC
639 // memcpy( tcp->ip.mac.dest, sockets[t].mac_src, 6 );
640 memcpy( tcp->ip.mac.dest, ether_addr, 6 );
641 memcpy( tcp->ip.mac.src, router_mac_addr, 6 );
642 tcp->ip.mac.type = htons(mac_type_ip4);
643
644 // Build IP
645 tcp->ip.version = 4;
646 tcp->ip.header_len = 5;
647 tcp->ip.tos = 0;
648 tcp->ip.total_len = htons(sizeof(tcp_t) - sizeof(mac_t) + bytes_read); // no options
649 tcp->ip.ident = htons(next_ip_ident_number++);
650 tcp->ip.flags_n_frag_offset = 0;
651 tcp->ip.ttl = 128; // one hop actually!
652 tcp->ip.proto = ip_proto_tcp;
653 tcp->ip.src = htonl(sockets[t].ip_dest);
654 tcp->ip.dest = htonl(sockets[t].ip_src);
655 make_ip4_checksum( (ip_t *)tcp );
656
657 // Copy payload (used by tcp checksum)
658 memcpy( (char *)tcp + sizeof(tcp_t), sockets[t].buffers_read[0].buf, bytes_read );
659
660 // Build tcp
661 tcp->src_port = htons(sockets[t].dest_port);
662 tcp->dest_port = htons(sockets[t].src_port);
663
664 tcp->seq = htonl(sockets[t].seq_out);
665 tcp->ack = htonl(sockets[t].seq_in);
666
667 tcp->header_len = (uint8)( 20 << 2 );
668 #if PUSH_ALWAYS
669 tcp->flags = ACK|PSH;
670 #else
671 tcp->flags = (bytes_read == MAX_SEGMENT_SIZE) ? ACK : (ACK|PSH);
672 #endif
673 tcp->window = htons( sockets[t].our_window );
674 tcp->urgent_ptr = 0;
675 make_tcp_checksum( tcp, tcp_size );
676
677 sockets[t].seq_out += bytes_read;
678
679 // dump_bytes( (uint8 *)tcp, tcp_size );
680
681 enqueue_packet( (uint8 *)tcp, tcp_size );
682 free(tcp);
683 }
684 }
685
686 static void CALLBACK tcp_read_completion(
687 DWORD error,
688 DWORD bytes_read,
689 LPWSAOVERLAPPED lpOverlapped,
690 DWORD flags
691 )
692 {
693 EnterCriticalSection( &tcp_section );
694
695 const int t = (int)lpOverlapped->hEvent;
696
697 sockets[t].bytes_received = bytes_read;
698
699 D(bug("<%d> tcp_read_completion(error=%d, bytes_read=%d)\r\n", t, error, bytes_read));
700
701 D(bug("<%d> tcp_read_completion() start, old state = %s\r\n", t, STATENAME(sockets[t].state)));
702
703 if(!sockets[t].in_use) {
704 D(bug("<%d> ignoring canceled read\r\n", t));
705 } else {
706 if( error != 0 ) {
707 D(bug("<%d> resetting after read error\r\n", t));
708 tcp_reply( RST, t );
709 free_socket(t);
710 } else {
711 if(bytes_read == 0) {
712 _closesocket( sockets[t].s );
713 sockets[t].s = INVALID_SOCKET;
714 } else if( bytes_read > 0) {
715 send_buffer( t, false );
716 }
717
718 switch( sockets[t].state ) {
719 case SYN_RCVD:
720 if( bytes_read == 0 ) {
721 D(bug("<%d> Closing: SYN_RCVD -> FINWAIT_1\r\n", t));
722 tcp_reply( ACK|FIN, t );
723 sockets[t].seq_out++;
724 sockets[t].state = FINWAIT_1;
725 }
726 break;
727 case ESTABLISHED:
728 if( bytes_read == 0 ) {
729 D(bug("<%d> Closing: ESTABLISHED -> FINWAIT_1\r\n", t));
730 tcp_reply( ACK|FIN, t );
731 sockets[t].seq_out++;
732 sockets[t].state = FINWAIT_1;
733 }
734 break;
735 case LISTEN:
736 tcp_reply( SYN, t );
737 sockets[t].seq_out++;
738 sockets[t].state = SYN_SENT;
739 sockets[t].time_wait = GetTickCount() + SYN_FLOOD_PROTECTION_TIMEOUT;
740 D(bug("<%d> LISTEN -> SYN_SENT\r\n", t));
741 break;
742 case CLOSE_WAIT:
743 if( bytes_read == 0) {
744 tcp_reply( ACK|FIN, t );
745 sockets[t].seq_out++;
746 sockets[t].state = LAST_ACK;
747 D(bug("<%d> Closing: CLOSE_WAIT -> LAST_ACK\r\n", t));
748 if(sockets[t].remote_closed) {
749 // Just in case that mac gets out of sync.
750 _closesocket(sockets[t].s);
751 sockets[t].s = INVALID_SOCKET;
752 }
753 }
754 break;
755 default:
756 break;
757 }
758
759 if(!is_router_shutting_down && sockets[t].s != INVALID_SOCKET) {
760 if(sockets[t].state != LISTEN) {
761 b_recfrom(t);
762 }
763 }
764 }
765 }
766
767 LeaveCriticalSection( &tcp_section );
768 }
769
770 static void CALLBACK tcp_write_completion(
771 DWORD error,
772 DWORD bytes_written,
773 LPWSAOVERLAPPED lpOverlapped,
774 DWORD flags
775 )
776 {
777 EnterCriticalSection( &tcp_section );
778
779 const int t = (int)lpOverlapped->hEvent;
780
781 sockets[t].bytes_written = bytes_written;
782 sockets[t].bytes_remaining_to_send -= bytes_written;
783
784 D(bug("<%d> tcp_write_completion(error=%d, bytes_written=%d)\r\n", t, error, bytes_written));
785
786 if(!sockets[t].in_use) {
787 D(bug("<%d> ignoring canceled write\r\n", t));
788 } else {
789 if(is_router_shutting_down || sockets[t].s == INVALID_SOCKET) {
790 D(bug("<%d> is not alive for sending.\r\n", t));
791 } else {
792 if( sockets[t].bytes_remaining_to_send <= 0 ) {
793 D(bug("<%d> all data sent, accepting some more.\r\n", t));
794 sockets[t].seq_in += sockets[t].bytes_to_send;
795 sockets[t].bytes_to_send = sockets[t].bytes_remaining_to_send = 0; // superfluous
796 tcp_reply( ACK, t );
797 sockets[t].accept_more_data_from_mac = true;
798 } else {
799 D(bug("<%d> %d bytes (of %d total) remaining, sending.\r\n", t, sockets[t].bytes_remaining_to_send, sockets[t].bytes_to_send));
800 sockets[t].buffers_write[0].len = sockets[t].bytes_remaining_to_send;
801 char *p = sockets[t].buffers_write[0].buf;
802 memmove( p, &p[bytes_written], sockets[t].bytes_remaining_to_send );
803 if(!b_send(t)) {
804 } else {
805 }
806 }
807 }
808 }
809
810 LeaveCriticalSection( &tcp_section );
811 }
812
813 static void tcp_connect_callback( const int t )
814 {
815 D(bug("<%d> tcp_connect_callback() start, old state = %s\r\n", t, STATENAME(sockets[t].state)));
816
817 switch( sockets[t].state ) {
818 case LISTEN:
819 tcp_reply( SYN|ACK, t );
820 sockets[t].seq_out++;
821 sockets[t].state = SYN_RCVD;
822 D(bug("<%d> Connect: LISTEN -> SYN_RCVD\r\n", t));
823 break;
824 default:
825 break;
826 }
827 D(bug("<%d> tcp_connect_callback() end, new state = %s\r\n", t, STATENAME(sockets[t].state)));
828 }
829
830 static void tcp_accept_callback( const int lst )
831 {
832 D(bug("[%d] tcp_accept_callback()\r\n", lst));
833
834 struct sockaddr_in to;
835 memset( &to, 0, sizeof(to) );
836 to.sin_family = AF_INET;
837 int tolen = sizeof(to);
838
839 SOCKET s = _accept( l_sockets[lst].s, (struct sockaddr *)&to, &tolen );
840 if( s == INVALID_SOCKET ) {
841 D(bug("[%d] connection not accepted, error code %d\r\n", lst, _WSAGetLastError()));
842 } else {
843 _WSAEventSelect( s, 0, 0 );
844
845 uint16 src_port = l_sockets[lst].port;
846 uint16 dest_port = ntohs(to.sin_port);
847 uint32 ip_dest = ntohl(to.sin_addr.s_addr);
848
849 D(bug("[%d] connection accepted, local port:%d, remote %s:%d\r\n", lst, src_port, _inet_ntoa(to.sin_addr), dest_port));
850
851 if( l_sockets[lst].ip != 0 && l_sockets[lst].ip != ip_dest ) {
852 _closesocket( s );
853 D(bug("[%d] authorization failure. connection closed.\r\n", lst ));
854 } else {
855 int t = alloc_new_socket( src_port, dest_port, ip_dest );
856 if( t < 0 ) {
857 D(bug("<%d> out of slot space, connection dropped\r\n", t ));
858 free_socket(t);
859 } else {
860 sockets[t].s = s;
861 sockets[t].state = LISTEN;
862 sockets[t].src_port = src_port;
863 sockets[t].dest_port = dest_port;
864 sockets[t].ip_src = macos_ip_address;
865 sockets[t].ip_dest = ip_dest;
866
867 sockets[t].seq_out = 0x00000001;
868 sockets[t].seq_in = 0; // not known yet
869 sockets[t].mac_ack = sockets[t].seq_out; // zero out pending bytes
870
871 tcp_reply( SYN, t );
872 sockets[t].seq_out++;
873 sockets[t].state = SYN_SENT;
874 sockets[t].time_wait = GetTickCount() + SYN_FLOOD_PROTECTION_TIMEOUT;
875 D(bug("<%d> Connect: LISTEN -> SYN_SENT\r\n", t));
876
877 _WSAResetEvent( sockets[t].ev );
878 if( SOCKET_ERROR == _WSAEventSelect( sockets[t].s, sockets[t].ev, FD_CLOSE ) ) {
879 D(bug("<%d> WSAEventSelect() failed with error code %d\r\n", t, _WSAGetLastError()));
880 }
881
882 // No data from the remote host is needed until the connection is established.
883 // So don't initiate read yet.
884 }
885 }
886 }
887 }
888
889 /*
890 MSS is the only option I care about, and since I'm on ethernet
891 I already pretty much know everything needed.
892
893 AFAIK window scaling is not in effect unless both parties specify it,
894 and I'm not doing it.
895 */
896 static void process_options( const int t, const uint8 *opt, int len, uint32 &mss )
897 {
898 mss = 0;
899
900 while( len > 0 ) {
901 switch( *opt ) {
902 case 0: // End of Option List
903 D(bug("<%d> End of Option List\r\n", t));
904 len = 0;
905 break;
906 case 1: // No-Operation
907 D(bug("<%d> No-Operation\r\n", t));
908 len--;
909 opt++;
910 break;
911 case 2: // Maximum Segment Size
912 {
913 mss = ntohs( *((uint16 *)&opt[2]) );
914 D(bug("<%d> Maximum Segment Size = %d\r\n", t, mss));
915 len -= 4;
916 opt += 4;
917 }
918 break;
919 case 3: // Window Scale
920 {
921 int wscale = opt[2];
922 D(bug("<%d> Window Scale = %d\r\n", t, (int)wscale));
923 len -= 3;
924 opt += 3;
925 }
926 break;
927 case 4: // Sack-Permitted
928 D(bug("<%d> Sack-Permitted option is set\r\n", t));
929 len -= 2;
930 opt += 2;
931 break;
932 case 5: // Sack
933 {
934 int sack_len = opt[1];
935 int hf = (sack_len-2) / 4;
936 D(bug("<%d> Sack, %d half-blocks\r\n", t, hf));
937 len -= sack_len;
938 opt += sack_len;
939 }
940 break;
941 case 8: // Time Stamps
942 {
943 int valve = ntohl( *((uint32 *)&opt[2]) );
944 int ereply = ntohl( *((uint32 *)&opt[6]) );
945 D(bug("<%d> Time Stamps, TS valve = 0x%X, TS echo reply = 0x%X\r\n", t, valve, ereply));
946 len -= 10;
947 opt += 10;
948 }
949 break;
950 default:
951 D(bug("<%d> Unknown tcp header option 0x%02x, breaking out\r\n", t, (int)*opt));
952 len = 0;
953 break;
954 }
955 }
956 }
957
958 void write_tcp( tcp_t *tcp, int len )
959 {
960 if(len < sizeof(tcp_t)) {
961 D(bug("<%d> Too small tcp packet(%d) on unknown slot, dropped\r\n", -1, len));
962 return;
963 }
964 uint16 src_port = ntohs(tcp->src_port);
965 uint16 dest_port = ntohs(tcp->dest_port);
966
967 BOOL ok = true;
968 BOOL handle_data = false;
969 BOOL initiate_read = false;
970
971 EnterCriticalSection( &tcp_section );
972
973 int t = find_socket( src_port, dest_port );
974
975 if(t < 0) {
976 t = alloc_new_socket( src_port, dest_port, ntohl(tcp->ip.dest) );
977 ok = t >= 0;
978 }
979
980 if(ok) {
981 D(bug("<%d> write_tcp %d bytes from port %d to port %d\r\n", t, len, src_port, dest_port));
982 } else {
983 D(bug("<%d> FAILED write_tcp %d bytes from port %d to port %d\r\n", t, len, src_port, dest_port));
984 }
985
986 if( ok && ISSET(tcp->flags,RST) ) {
987 D(bug("<%d> RST set, resetting socket\r\n", t));
988 if( sockets[t].s != INVALID_SOCKET ) {
989 D(bug("<%d> doing an extra shutdown (ie4)\r\n", t));
990 _shutdown( sockets[t].s, SD_BOTH );
991 }
992 free_socket( t );
993 ok = false;
994 }
995
996 if(ok) {
997 D(bug("<%d> State machine start = %s\r\n", t, STATENAME(sockets[t].state)));
998
999 // always update receive window
1000 sockets[t].mac_window = ntohs(tcp->window);
1001
1002 int header_len = tcp->header_len >> 2;
1003 int option_bytes = header_len - 20;
1004 char *data = (char *)tcp + sizeof(tcp_t) + option_bytes;
1005 int dlen = len - sizeof(tcp_t) - option_bytes;
1006
1007 if( !ISSET(tcp->flags,ACK) ) {
1008 D(bug("<%d> ACK not set\r\n", t));
1009 }
1010 if( ISSET(tcp->flags,SYN) ) {
1011 D(bug("<%d> SYN set\r\n", t));
1012
1013 // Note that some options are valid even if there is no SYN.
1014 // I don't care about those however.
1015
1016 uint32 new_mss;
1017 process_options( t, (uint8 *)data - option_bytes, option_bytes, new_mss );
1018 if(new_mss) {
1019 sockets[t].mac_mss = (int)new_mss;
1020 if( new_mss < sockets[t].buffers_read[0].len ) {
1021 sockets[t].buffers_read[0].len = new_mss;
1022 }
1023 D(bug("<%d> Max segment size set to %d\r\n", t, new_mss));
1024 }
1025 }
1026 if( ISSET(tcp->flags,FIN) ) {
1027 D(bug("<%d> FIN set\r\n", t));
1028 }
1029
1030 // The sequence number Mac expects to see next time.
1031 sockets[t].mac_ack = ntohl(tcp->ack);
1032
1033 D(bug("<%d> From Mac: Seq=%d, Ack=%d, window=%d, router Seq=%d\r\n", t, ntohl(tcp->seq), sockets[t].mac_ack, sockets[t].mac_window, sockets[t].seq_out));
1034
1035 if( sockets[t].stream_to_mac_stalled_until &&
1036 sockets[t].mac_ack == sockets[t].seq_out &&
1037 (sockets[t].state == ESTABLISHED || sockets[t].state == CLOSE_WAIT) )
1038 {
1039 if( has_mac_read_space(t) ) {
1040 initiate_read = true;
1041 sockets[t].stream_to_mac_stalled_until = 0;
1042 D(bug("<%d> read resumed, mac can accept more data\r\n", t));
1043 }
1044 }
1045
1046 switch( sockets[t].state ) {
1047 case CLOSED:
1048 sockets[t].src_port = src_port;
1049 sockets[t].dest_port = dest_port;
1050 sockets[t].ip_src = ntohl(tcp->ip.src);
1051 sockets[t].ip_dest = ntohl(tcp->ip.dest);
1052
1053 if( ISSET(tcp->flags,SYN) ) {
1054
1055 sockets[t].seq_out = 0x00000001;
1056 sockets[t].seq_in = ntohl(tcp->seq) + 1;
1057
1058 _WSAResetEvent( sockets[t].ev );
1059 if( SOCKET_ERROR == _WSAEventSelect( sockets[t].s, sockets[t].ev, FD_CONNECT | FD_CLOSE ) ) {
1060 D(bug("<%d> WSAEventSelect() failed with error code %d\r\n", t, _WSAGetLastError()));
1061 }
1062
1063 D(bug("<%d> connecting local port %d to remote %s:%d\r\n", t, src_port, _inet_ntoa(sockets[t].from.sin_addr), dest_port));
1064
1065 sockets[t].state = LISTEN;
1066 if( _WSAConnect(
1067 sockets[t].s,
1068 (const struct sockaddr *)&sockets[t].from,
1069 sockets[t].from_len,
1070 NULL, NULL,
1071 NULL, NULL
1072 ) == SOCKET_ERROR )
1073 {
1074 int connect_error = _WSAGetLastError();
1075 if( connect_error == WSAEWOULDBLOCK ) {
1076 D(bug("<%d> WSAConnect() i/o pending.\r\n", t));
1077 } else {
1078 D(bug("<%d> WSAConnect() failed with error %d.\r\n", t, connect_error));
1079 }
1080 } else {
1081 D(bug("<%d> WSAConnect() ok.\r\n", t));
1082 }
1083 } else {
1084 if( ISSET(tcp->flags,FIN) ) {
1085 D(bug("<%d> No SYN but FIN on a closed socket.\r\n", t));
1086 free_socket(t);
1087 } else {
1088 D(bug("<%d> No SYN on a closed socket. resetting.\r\n", t));
1089 free_socket(t);
1090 }
1091 }
1092 break;
1093 case LISTEN:
1094 // handled in connect callback
1095 break;
1096 case SYN_SENT:
1097 if( ISSET(tcp->flags,SYN) && ISSET(tcp->flags,ACK) ) {
1098 sockets[t].seq_in = ntohl(tcp->seq) + 1;
1099 tcp_reply( ACK, t );
1100 sockets[t].state = ESTABLISHED;
1101 initiate_read = true;
1102 sockets[t].accept_more_data_from_mac = true;
1103 sockets[t].time_wait = 0;
1104 } else if( ISSET(tcp->flags,SYN) ) {
1105 sockets[t].seq_in = ntohl(tcp->seq) + 1;
1106 tcp_reply( ACK|SYN, t );
1107 sockets[t].seq_out++;
1108 sockets[t].state = SYN_RCVD;
1109 sockets[t].time_wait = 0;
1110 } else if( ISSET(tcp->flags,ACK) ) {
1111 // What was the bright idea here.
1112 D(bug("<%d> State is SYN_SENT, but got only ACK from Mac??\r\n", t));
1113 sockets[t].state = FINWAIT_2;
1114 sockets[t].time_wait = 0;
1115 }
1116 break;
1117 case SYN_RCVD:
1118 if( ISSET(tcp->flags,ACK) ) {
1119 sockets[t].state = ESTABLISHED;
1120 handle_data = true;
1121 initiate_read = true;
1122 sockets[t].accept_more_data_from_mac = true;
1123 }
1124 break;
1125 case ESTABLISHED:
1126 if( ISSET(tcp->flags,FIN) ) {
1127 sockets[t].seq_in++;
1128 tcp_reply( ACK, t );
1129 _shutdown( sockets[t].s, SD_SEND );
1130 sockets[t].state = CLOSE_WAIT;
1131 }
1132 handle_data = true;
1133 break;
1134 case CLOSE_WAIT:
1135 // handled in tcp_read_completion
1136 break;
1137 case LAST_ACK:
1138 if( ISSET(tcp->flags,ACK) ) {
1139 D(bug("<%d> LAST_ACK received, socket closed\r\n", t));
1140 free_socket( t );
1141 }
1142 break;
1143 case FINWAIT_1:
1144 if( ISSET(tcp->flags,FIN) && ISSET(tcp->flags,ACK) ) {
1145 sockets[t].seq_in++;
1146 tcp_reply( ACK, t );
1147 if(sockets[t].remote_closed) {
1148 _closesocket(sockets[t].s);
1149 sockets[t].s = INVALID_SOCKET;
1150 } else {
1151 _shutdown( sockets[t].s, SD_SEND );
1152 }
1153 sockets[t].state = TIME_WAIT;
1154 sockets[t].time_wait = GetTickCount() + 2 * sockets[t].msl;
1155 } else if( ISSET(tcp->flags,FIN) ) {
1156 sockets[t].seq_in++;
1157 tcp_reply( ACK, t );
1158 if(sockets[t].remote_closed) {
1159 _closesocket(sockets[t].s);
1160 sockets[t].s = INVALID_SOCKET;
1161 } else {
1162 _shutdown( sockets[t].s, SD_SEND );
1163 }
1164 sockets[t].state = CLOSING;
1165 } else if( ISSET(tcp->flags,ACK) ) {
1166 sockets[t].state = FINWAIT_2;
1167 }
1168 break;
1169 case FINWAIT_2:
1170 if( ISSET(tcp->flags,FIN) ) {
1171 sockets[t].seq_in++;
1172 tcp_reply( ACK, t );
1173 if(sockets[t].remote_closed) {
1174 _closesocket(sockets[t].s);
1175 sockets[t].s = INVALID_SOCKET;
1176 } else {
1177 _shutdown( sockets[t].s, SD_SEND );
1178 }
1179 sockets[t].state = TIME_WAIT;
1180 sockets[t].time_wait = GetTickCount() + 2 * sockets[t].msl;
1181 }
1182 break;
1183 case CLOSING:
1184 if( ISSET(tcp->flags,ACK) ) {
1185 sockets[t].state = TIME_WAIT;
1186 sockets[t].time_wait = GetTickCount() + 2 * sockets[t].msl;
1187 }
1188 break;
1189 case TIME_WAIT:
1190 // Catching stray packets: wait MSL * 2 seconds, -> CLOSED
1191 // Timer already set since we might not get here at all.
1192 // I'm using exceptionally low MSL value (5 secs).
1193 D(bug("<%d> time wait, datagram discarded\r\n", t));
1194 break;
1195 }
1196
1197 // The "t" descriptor may already be freed. However, it's safe
1198 // to peek the state value inside the critical section.
1199 D(bug("<%d> State machine end = %s\r\n", t, STATENAME(sockets[t].state)));
1200
1201 D(bug("<%d> handle_data=%d, initiate_read=%d\r\n", t, handle_data, initiate_read));
1202
1203 if( handle_data && dlen && sockets[t].accept_more_data_from_mac ) {
1204 if( sockets[t].seq_in != ntohl(tcp->seq) ) {
1205 D(bug("<%d> dropping duplicate datagram seq=%d, expected=%d\r\n", t, ntohl(tcp->seq), sockets[t].seq_in));
1206 } else {
1207 set_ttl( t, tcp->ip.ttl );
1208
1209 struct sockaddr_in to;
1210 memset( &to, 0, sizeof(to) );
1211 to.sin_family = AF_INET;
1212 to.sin_port = tcp->dest_port;
1213 to.sin_addr.s_addr = tcp->ip.dest;
1214
1215 D(bug("<%d> sending %d bytes to remote host\r\n", t, dlen));
1216
1217 sockets[t].accept_more_data_from_mac = false;
1218
1219 if( dlen > MAX_SEGMENT_SIZE ) {
1220 D(bug("<%d> IMPOSSIBLE: b_send() dropped %d bytes! \r\n", t, dlen-MAX_SEGMENT_SIZE));
1221 dlen = MAX_SEGMENT_SIZE;
1222 }
1223
1224 memcpy( sockets[t].buffers_write[0].buf, data, dlen );
1225
1226 sockets[t].buffers_write[0].len = dlen;
1227 sockets[t].bytes_remaining_to_send = dlen;
1228 sockets[t].bytes_to_send = dlen;
1229
1230 bool send_now = false;
1231 if( ISSET(tcp->flags,PSH) ) {
1232 send_now = true;
1233 } else {
1234 // todo -- delayed send
1235 send_now = true;
1236 }
1237
1238 if(send_now) {
1239
1240 // Patch ftp server or client address if needed.
1241
1242 int lst = 1;
1243 bool is_pasv;
1244 uint16 ftp_data_port = 0;
1245
1246 if(ftp_is_ftp_port(sockets[t].src_port)) {
1247 // Local ftp server may be entering to passive mode.
1248 is_pasv = true;
1249 ftp_parse_port_command(
1250 sockets[t].buffers_write[0].buf,
1251 dlen,
1252 ftp_data_port,
1253 is_pasv
1254 );
1255 } else if(ftp_is_ftp_port(sockets[t].dest_port)) {
1256 // Local ftp client may be using port command.
1257 is_pasv = false;
1258 ftp_parse_port_command(
1259 sockets[t].buffers_write[0].buf,
1260 dlen,
1261 ftp_data_port,
1262 is_pasv
1263 );
1264 }
1265
1266 if(ftp_data_port) {
1267 D(bug("<%d> ftp %s command detected, port %d\r\n", t, (is_pasv ? "SERVER PASV REPLY" : "CLIENT PORT"), ftp_data_port ));
1268
1269 // Note: for security reasons, only allow incoming connection from sockets[t].ip_dest
1270 lst = alloc_listen_socket( ftp_data_port, sockets[t].ip_dest, 0/*iface*/, true );
1271
1272 if(lst < 0) {
1273 D(bug("<%d> no more free slots\r\n", t));
1274 } else {
1275 // First start listening (need to know the local name later)
1276 tcp_start_listen( lst );
1277
1278 // When t is closed, lst must be closed too.
1279 sockets[t].child = lst;
1280 l_sockets[lst].parent = t;
1281
1282 // Find out the local name
1283 struct sockaddr_in name;
1284 int namelen = sizeof(name);
1285 memset( &name, 0, sizeof(name) );
1286 if( _getsockname( sockets[t].s, (struct sockaddr *)&name, &namelen ) == SOCKET_ERROR ) {
1287 D(bug("_getsockname() failed, error=%d\r\n", _WSAGetLastError() ));
1288 }
1289
1290 ftp_modify_port_command(
1291 sockets[t].buffers_write[0].buf,
1292 dlen,
1293 MAX_SEGMENT_SIZE,
1294 ntohl(name.sin_addr.s_addr),
1295 ftp_data_port,
1296 is_pasv
1297 );
1298
1299 sockets[t].buffers_write[0].len = dlen;
1300 sockets[t].bytes_remaining_to_send = dlen;
1301 // Do not change "bytes_to_send" field as it is used for ack calculation
1302 }
1303 } // end of ftp patch
1304
1305 if(!b_send(t)) {
1306 // on error, close the ftp data listening socket if one was created
1307 if(lst >= 0) {
1308 D(bug("[%d] closing listening port %d after write error\r\n", t, l_sockets[lst].port));
1309 _closesocket( l_sockets[lst].s );
1310 l_sockets[lst].s = INVALID_SOCKET;
1311 l_sockets[lst].port = 0;
1312 l_sockets[lst].ip = 0;
1313 l_sockets[lst].parent = -1;
1314 sockets[t].child = -1;
1315 }
1316 }
1317 }
1318 }
1319 }
1320
1321 if(initiate_read) {
1322 if(!b_recfrom(t)) {
1323 // post icmp error message
1324 }
1325 }
1326 }
1327
1328 LeaveCriticalSection( &tcp_section );
1329 }
1330
1331 /*
1332 - Dispatch remote close and connect events.
1333 - Expire time-waits.
1334 - Handle resend timeouts.
1335 */
1336 static WINAPI unsigned int tcp_connect_close_thread(void *arg)
1337 {
1338 WSAEVENT wait_handles[MAX_SOCKETS];
1339
1340 for( int i=0; i<MAX_SOCKETS; i++ ) {
1341 wait_handles[i] = sockets[i].ev;
1342 }
1343
1344 while(!is_router_shutting_down) {
1345 DWORD ret = WaitForMultipleObjects(
1346 MAX_SOCKETS,
1347 wait_handles,
1348 FALSE,
1349 200
1350 );
1351 if(is_router_shutting_down) break;
1352
1353 EnterCriticalSection( &tcp_section );
1354 if( ret >= WAIT_OBJECT_0 && ret < WAIT_OBJECT_0 + MAX_SOCKETS ) {
1355 const int t = ret - WAIT_OBJECT_0;
1356
1357 D(bug("<%d> Event %d\r\n", t, ret));
1358
1359 if(sockets[t].in_use) {
1360 WSANETWORKEVENTS what;
1361
1362 if( _WSAEnumNetworkEvents( sockets[t].s, sockets[t].ev, &what ) != SOCKET_ERROR ) {
1363 if( what.lNetworkEvents & FD_CONNECT ) {
1364 if( what.iErrorCode[FD_CONNECT_BIT] == 0 ) {
1365 D(bug("<%d> Connect ok\r\n", t));
1366 tcp_connect_callback(t);
1367 } else {
1368 D(bug("<%d> Connect error=%d\r\n", t, what.iErrorCode[FD_CONNECT_BIT]));
1369 // Post icmp error
1370 }
1371 } else if( what.lNetworkEvents & FD_CLOSE ) {
1372 if( what.iErrorCode[FD_CLOSE_BIT] == 0 ) {
1373 D(bug("<%d> graceful close, state = %s\r\n", t, STATENAME(sockets[t].state)));
1374 } else {
1375 D(bug("<%d> abortive close, state = %s, code=%d\r\n", t, STATENAME(sockets[t].state), what.iErrorCode[FD_CLOSE_BIT]));
1376 }
1377 sockets[t].remote_closed = true;
1378 }
1379 } else {
1380 int err = _WSAGetLastError();
1381 if( err == WSAENOTSOCK ) {
1382 D(bug("<%d> WSAEnumNetworkEvents: socket is already closed\r\n", t));
1383 } else {
1384 D(bug("<%d> WSAEnumNetworkEvents failed with error code %d, freeing slot\r\n", t, err));
1385 free_socket( t );
1386 }
1387 }
1388 }
1389 _WSAResetEvent( sockets[t].ev );
1390 } else {
1391 static int interval = 5;
1392 if( !--interval ) {
1393 for( int i=0; i<MAX_SOCKETS; i++ ) {
1394 if(sockets[i].in_use) {
1395 DWORD tmw = sockets[i].time_wait;
1396 DWORD stl = sockets[i].stream_to_mac_stalled_until;
1397 if( tmw ) {
1398 if( GetTickCount() >= tmw ) {
1399 if( sockets[i].state == SYN_SENT ) {
1400 /*
1401 A very basic SYN flood protection. Note that watching
1402 SYN_SENT instead of SYN_RCVD, because the state codes are
1403 from the point of view of the Mac-Router interface, not Router-Remote.
1404 */
1405 D(bug("<%d> SYN_SENT time-out expired\r\n", i));
1406 } else {
1407 D(bug("<%d> TIME_WAIT expired\r\n", i));
1408 }
1409 free_socket( i );
1410 }
1411 } else if( stl ) {
1412 if( sockets[i].state == ESTABLISHED ) {
1413 if( GetTickCount() >= stl ) {
1414 D(bug("<%d> RESEND timeout expired\r\n", i));
1415 sockets[i].stream_to_mac_stalled_until = GetTickCount() + sockets[i].resend_timeout;
1416 send_buffer( i, true );
1417 }
1418 } else {
1419 sockets[i].stream_to_mac_stalled_until = 0;
1420 }
1421 }
1422 }
1423 }
1424 interval = 5;
1425 }
1426 }
1427 LeaveCriticalSection( &tcp_section );
1428 }
1429 return 0;
1430 }
1431
1432 static WINAPI unsigned int tcp_listen_thread(void *arg)
1433 {
1434 WSAEVENT wait_handles[MAX_SOCKETS];
1435
1436 for( int i=0; i<MAX_SOCKETS; i++ ) {
1437 wait_handles[i] = l_sockets[i].ev;
1438 tcp_start_listen( i );
1439 }
1440
1441 while(!is_router_shutting_down) {
1442 DWORD ret = WaitForMultipleObjects(
1443 MAX_SOCKETS,
1444 wait_handles,
1445 FALSE,
1446 200
1447 );
1448
1449 if(is_router_shutting_down) break;
1450
1451 EnterCriticalSection( &tcp_section );
1452 if( ret >= WAIT_OBJECT_0 && ret < WAIT_OBJECT_0 + MAX_SOCKETS ) {
1453 const int lst = ret - WAIT_OBJECT_0;
1454
1455 D(bug("[%d] connection attempt to port %d\r\n", lst, l_sockets[lst].port));
1456
1457 WSANETWORKEVENTS what;
1458
1459 if( _WSAEnumNetworkEvents( l_sockets[lst].s, l_sockets[lst].ev, &what ) != SOCKET_ERROR ) {
1460 if( what.lNetworkEvents & FD_ACCEPT ) {
1461 if( what.iErrorCode[FD_ACCEPT_BIT] == 0 ) {
1462 D(bug("[%d] Connect ok\r\n", lst));
1463 tcp_accept_callback(lst);
1464 } else {
1465 D(bug("[%d] Connect error=%d\r\n", lst, what.iErrorCode[FD_ACCEPT_BIT]));
1466 // Post icmp error
1467 }
1468 }
1469 }
1470
1471 // close on errors too
1472 if(l_sockets[lst].once) {
1473 D(bug("[%d] once mode: closing listening socket on port %d\r\n", lst, l_sockets[lst].port));
1474 if( _closesocket( l_sockets[lst].s ) == SOCKET_ERROR ) {
1475 int err = _WSAGetLastError();
1476 D(bug("[%d] close error %d\r\n", lst, err));
1477 }
1478
1479 l_sockets[lst].s = INVALID_SOCKET;
1480 l_sockets[lst].port = 0;
1481 l_sockets[lst].ip = 0;
1482
1483 int t = l_sockets[lst].parent;
1484 if( t >= 0 ) {
1485 sockets[t].child = -1;
1486 }
1487 l_sockets[lst].parent = -1;
1488 }
1489
1490 _WSAResetEvent( l_sockets[lst].ev );
1491 }
1492 LeaveCriticalSection( &tcp_section );
1493 }
1494 return 0;
1495 }
1496
1497 /*
1498 tcp_port=<port> [,<interface to bind>]
1499 tcp_port=21,192.168.0.1
1500 */
1501
1502 static void init_tcp_listen_ports()
1503 {
1504 int32 index = 0;
1505 const char *port_str;
1506 while ((port_str = PrefsFindString("tcp_port", index++)) != NULL) {
1507 uint32 iface = 0;
1508 char *if_str = strchr(port_str,',');
1509 if(if_str) {
1510 *if_str++ = 0;
1511 uint32 if_net = _inet_addr( if_str );
1512 if(if_net == INADDR_NONE) if_net = INADDR_ANY;
1513 iface = ntohl( if_net );
1514 }
1515 uint16 port = (uint16)strtoul( port_str, 0, 0 );
1516 if( port ) {
1517 uint32 ip = 0;
1518 bool once = false;
1519 alloc_listen_socket( port, ip, iface, once );
1520 }
1521 }
1522 }
1523
1524 static HANDLE tcp_handle = 0;
1525 static HANDLE tcp_l_handle = 0;
1526
1527 void init_tcp()
1528 {
1529 InitializeCriticalSection( &tcp_section );
1530
1531 for( int i=0; i<MAX_SOCKETS; i++ ) {
1532 memset( &sockets[i], 0, sizeof(tcp_socket_t) );
1533 sockets[i].s = INVALID_SOCKET;
1534 sockets[i].state = CLOSED;
1535 sockets[i].ev = _WSACreateEvent();
1536 sockets[i].child = -1;
1537 }
1538
1539 for( int i=0; i<MAX_SOCKETS; i++ ) {
1540 memset( &l_sockets[i], 0, sizeof(tcp_listening_socket_t) );
1541 l_sockets[i].s = INVALID_SOCKET;
1542 l_sockets[i].ev = _WSACreateEvent();
1543 l_sockets[i].parent = -1;
1544 /*
1545 l_sockets[i].port = 0;
1546 l_sockets[i].ip = 0;
1547 l_sockets[i].iface = 0;
1548 l_sockets[i].once = false;
1549 */
1550 }
1551
1552 init_tcp_listen_ports();
1553
1554 unsigned int tcp_tid;
1555 tcp_handle = (HANDLE)_beginthreadex( 0, 0, tcp_connect_close_thread, 0, 0, &tcp_tid );
1556
1557 unsigned int tcp_l_tid;
1558 tcp_l_handle = (HANDLE)_beginthreadex( 0, 0, tcp_listen_thread, 0, 0, &tcp_l_tid );
1559 }
1560
1561 void final_tcp()
1562 {
1563 D(bug("closing all tcp sockets\r\n"));
1564 for( int i=0; i<MAX_SOCKETS; i++ ) {
1565 if(sockets[i].s != INVALID_SOCKET) {
1566 D(bug(" closing socket %d\r\n", i));
1567 }
1568 free_socket( i );
1569 if(sockets[i].buffers_write[0].buf) {
1570 delete [] sockets[i].buffers_write[0].buf;
1571 sockets[i].buffers_write[0].buf = 0;
1572 }
1573 if(sockets[i].buffers_read[0].buf) {
1574 delete [] sockets[i].buffers_read[0].buf;
1575 sockets[i].buffers_read[0].buf = 0;
1576 }
1577 }
1578
1579 D(bug("closing all tcp listening socket\r\n"));
1580 for( int i=0; i<MAX_SOCKETS; i++ ) {
1581 if(l_sockets[i].s != INVALID_SOCKET) {
1582 D(bug(" closing listening socket %d\r\n", i));
1583 _closesocket( l_sockets[i].s );
1584 l_sockets[i].s = INVALID_SOCKET;
1585 }
1586 }
1587
1588 // The router module has already set the shutdown flag.
1589 WaitForSingleObject( tcp_handle, INFINITE );
1590 WaitForSingleObject( tcp_l_handle, INFINITE );
1591
1592 for( int i=0; i<MAX_SOCKETS; i++ ) {
1593 if(sockets[i].ev != WSA_INVALID_EVENT) {
1594 _WSACloseEvent(sockets[i].ev);
1595 sockets[i].ev = WSA_INVALID_EVENT;
1596 }
1597 }
1598 for( int i=0; i<MAX_SOCKETS; i++ ) {
1599 if(l_sockets[i].ev != WSA_INVALID_EVENT) {
1600 _WSACloseEvent(l_sockets[i].ev);
1601 l_sockets[i].ev = WSA_INVALID_EVENT;
1602 }
1603 }
1604
1605 DeleteCriticalSection( &tcp_section );
1606 }