diff options
author | Corinna Vinschen <corinna@vinschen.de> | 2022-08-04 17:58:50 +0300 |
---|---|---|
committer | Corinna Vinschen <corinna@vinschen.de> | 2022-08-05 13:02:11 +0300 |
commit | 007e23d6390af11582e55453269b7a51c723d2dd (patch) | |
tree | 8e8cff3ca23f5e56d9766a5ee6c6abb366611b07 /winsup/cygwin/fhandler/socket_inet.cc | |
parent | 1e428bee1c5ef7c76ba4e46e6693b913edc9bbf3 (diff) |
Cygwin: Reorganize cygwin source dir
Create subdirs and move files accordingly:
- DevDocs: doc files
- fhandler: fhandler sources, split fhandler.cc into base.cc and null.cc
- local_includes: local include files
- scripts: scripts called during build
- sec: security sources
Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
Diffstat (limited to 'winsup/cygwin/fhandler/socket_inet.cc')
-rw-r--r-- | winsup/cygwin/fhandler/socket_inet.cc | 2404 |
1 files changed, 2404 insertions, 0 deletions
diff --git a/winsup/cygwin/fhandler/socket_inet.cc b/winsup/cygwin/fhandler/socket_inet.cc new file mode 100644 index 000000000..63cc498f1 --- /dev/null +++ b/winsup/cygwin/fhandler/socket_inet.cc @@ -0,0 +1,2404 @@ +/* fhandler_socket_inet.cc. + + See fhandler.h for a description of the fhandler classes. + + This file is part of Cygwin. + + This software is a copyrighted work licensed under the terms of the + Cygwin license. Please consult the file "CYGWIN_LICENSE" for + details. */ + +#define __INSIDE_CYGWIN_NET__ +#define USE_SYS_TYPES_FD_SET + +#include "winsup.h" +/* 2014-04-24: Current Mingw headers define sockaddr_in6 using u_long (8 byte) + because a redefinition for LP64 systems is missing. This leads to a wrong + definition and size of sockaddr_in6 when building with winsock headers. + This definition is also required to use the right u_long type in subsequent + function calls. */ +#undef u_long +#define u_long __ms_u_long +#include <w32api/ws2tcpip.h> +#include <w32api/mswsock.h> +#include <w32api/mstcpip.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <unistd.h> +#include <asm/byteorder.h> +#include <sys/socket.h> +#include <sys/param.h> +#include <sys/statvfs.h> +#include <cygwin/acl.h> +#include "cygerrno.h" +#include "path.h" +#include "fhandler.h" +#include "dtable.h" +#include "cygheap.h" +#include "shared_info.h" +#include "wininfo.h" +#include "tls_pbuf.h" + +#define ASYNC_MASK (FD_READ|FD_WRITE|FD_OOB|FD_ACCEPT|FD_CONNECT) +#define EVENT_MASK (FD_READ|FD_WRITE|FD_OOB|FD_ACCEPT|FD_CONNECT|FD_CLOSE) + +#define LOCK_EVENTS \ + if (wsock_mtx && \ + WaitForSingleObject (wsock_mtx, INFINITE) != WAIT_FAILED) \ + { + +#define UNLOCK_EVENTS \ + ReleaseMutex (wsock_mtx); \ + } + +/* Maximum number of concurrently opened sockets from all Cygwin processes + per session. Note that shared sockets (through dup/fork/exec) are + counted as one socket. */ +#define NUM_SOCKS 2048U + +#define LOCK_EVENTS \ + if (wsock_mtx && \ + WaitForSingleObject (wsock_mtx, INFINITE) != WAIT_FAILED) \ + { + +#define UNLOCK_EVENTS \ + ReleaseMutex (wsock_mtx); \ + } + +static wsa_event wsa_events[NUM_SOCKS] __attribute__((section (".cygwin_dll_common"), shared)); + +static LONG socket_serial_number __attribute__((section (".cygwin_dll_common"), shared)); + +static HANDLE wsa_slot_mtx; + +static PWCHAR +sock_shared_name (PWCHAR buf, LONG num) +{ + __small_swprintf (buf, L"socket.%d", num); + return buf; +} + +static wsa_event * +search_wsa_event_slot (LONG new_serial_number) +{ + WCHAR name[32], searchname[32]; + UNICODE_STRING uname; + OBJECT_ATTRIBUTES attr; + NTSTATUS status; + + if (!wsa_slot_mtx) + { + RtlInitUnicodeString (&uname, sock_shared_name (name, 0)); + InitializeObjectAttributes (&attr, &uname, OBJ_INHERIT | OBJ_OPENIF, + get_session_parent_dir (), + everyone_sd (CYG_MUTANT_ACCESS)); + status = NtCreateMutant (&wsa_slot_mtx, CYG_MUTANT_ACCESS, &attr, FALSE); + if (!NT_SUCCESS (status)) + api_fatal ("Couldn't create/open shared socket mutex %S, %y", + &uname, status); + } + switch (WaitForSingleObject (wsa_slot_mtx, INFINITE)) + { + case WAIT_OBJECT_0: + case WAIT_ABANDONED: + break; + default: + api_fatal ("WFSO failed for shared socket mutex, %E"); + break; + } + unsigned int slot = new_serial_number % NUM_SOCKS; + while (wsa_events[slot].serial_number) + { + HANDLE searchmtx; + RtlInitUnicodeString (&uname, sock_shared_name (searchname, + wsa_events[slot].serial_number)); + InitializeObjectAttributes (&attr, &uname, 0, get_session_parent_dir (), + NULL); + status = NtOpenMutant (&searchmtx, READ_CONTROL, &attr); + if (!NT_SUCCESS (status)) + break; + /* Mutex still exists, attached socket is active, try next slot. */ + NtClose (searchmtx); + slot = (slot + 1) % NUM_SOCKS; + if (slot == (new_serial_number % NUM_SOCKS)) + { + /* Did the whole array once. Too bad. */ + debug_printf ("No free socket slot"); + ReleaseMutex (wsa_slot_mtx); + return NULL; + } + } + memset (&wsa_events[slot], 0, sizeof (wsa_event)); + wsa_events[slot].serial_number = new_serial_number; + ReleaseMutex (wsa_slot_mtx); + return wsa_events + slot; +} + +/* cygwin internal: map sockaddr into internet domain address */ +static int +get_inet_addr_inet (const struct sockaddr *in, int inlen, + struct sockaddr_storage *out, int *outlen) +{ + switch (in->sa_family) + { + case AF_INET: + memcpy (out, in, inlen); + *outlen = inlen; + /* If the peer address given in connect or sendto is the ANY address, + Winsock fails with WSAEADDRNOTAVAIL, while Linux converts that into + a connection/send attempt to LOOPBACK. We're doing the same here. */ + if (((struct sockaddr_in *) out)->sin_addr.s_addr == htonl (INADDR_ANY)) + ((struct sockaddr_in *) out)->sin_addr.s_addr = htonl (INADDR_LOOPBACK); + return 0; + case AF_INET6: + memcpy (out, in, inlen); + *outlen = inlen; + /* See comment in AF_INET case. */ + if (IN6_IS_ADDR_UNSPECIFIED (&((struct sockaddr_in6 *) out)->sin6_addr)) + ((struct sockaddr_in6 *) out)->sin6_addr = in6addr_loopback; + return 0; + default: + set_errno (EAFNOSUPPORT); + return SOCKET_ERROR; + } +} + +/* There's no DLL which exports the symbol WSARecvMsg. One has to call + WSAIoctl as below to fetch the function pointer. Why on earth did the + MS developers decide not to export a normal symbol for these extension + functions? */ +inline int +get_ext_funcptr (SOCKET sock, void *funcptr) +{ + DWORD bret; + const GUID guid = WSAID_WSARECVMSG; + return WSAIoctl (sock, SIO_GET_EXTENSION_FUNCTION_POINTER, + (void *) &guid, sizeof (GUID), funcptr, sizeof (void *), + &bret, NULL, NULL); +} + +fhandler_socket_wsock::fhandler_socket_wsock () : + fhandler_socket (), + wsock_events (NULL), + wsock_mtx (NULL), + wsock_evt (NULL), + status (), + prot_info_ptr (NULL) +{ + need_fork_fixup (true); +} + +fhandler_socket_wsock::~fhandler_socket_wsock () +{ + if (prot_info_ptr) + cfree (prot_info_ptr); +} + +bool +fhandler_socket_wsock::init_events () +{ + LONG new_serial_number; + WCHAR name[32]; + UNICODE_STRING uname; + OBJECT_ATTRIBUTES attr; + NTSTATUS status; + + do + { + new_serial_number = + InterlockedIncrement (&socket_serial_number); + if (!new_serial_number) /* 0 is reserved for global mutex */ + InterlockedIncrement (&socket_serial_number); + set_ino (new_serial_number); + RtlInitUnicodeString (&uname, sock_shared_name (name, new_serial_number)); + InitializeObjectAttributes (&attr, &uname, OBJ_INHERIT | OBJ_OPENIF, + get_session_parent_dir (), + everyone_sd (CYG_MUTANT_ACCESS)); + status = NtCreateMutant (&wsock_mtx, CYG_MUTANT_ACCESS, &attr, FALSE); + if (!NT_SUCCESS (status)) + { + debug_printf ("NtCreateMutant(%S), %y", &uname, status); + set_errno (ENOBUFS); + return false; + } + if (status == STATUS_OBJECT_NAME_EXISTS) + NtClose (wsock_mtx); + } + while (status == STATUS_OBJECT_NAME_EXISTS); + if ((wsock_evt = CreateEvent (&sec_all, TRUE, FALSE, NULL)) + == WSA_INVALID_EVENT) + { + debug_printf ("CreateEvent, %E"); + set_errno (ENOBUFS); + NtClose (wsock_mtx); + return false; + } + if (WSAEventSelect (get_socket (), wsock_evt, EVENT_MASK) == SOCKET_ERROR) + { + debug_printf ("WSAEventSelect, %E"); + set_winsock_errno (); + NtClose (wsock_evt); + NtClose (wsock_mtx); + return false; + } + if (!(wsock_events = search_wsa_event_slot (new_serial_number))) + { + set_errno (ENOBUFS); + NtClose (wsock_evt); + NtClose (wsock_mtx); + return false; + } + if (get_socket_type () == SOCK_DGRAM) + wsock_events->events = FD_WRITE; + return true; +} + +int +fhandler_socket_wsock::evaluate_events (const long event_mask, long &events, + const bool erase) +{ + int ret = 0; + long events_now = 0; + + WSANETWORKEVENTS evts = { 0 }; + if (!(WSAEnumNetworkEvents (get_socket (), wsock_evt, &evts))) + { + if (evts.lNetworkEvents) + { + LOCK_EVENTS; + wsock_events->events |= evts.lNetworkEvents; + events_now = (wsock_events->events & event_mask); + if (evts.lNetworkEvents & FD_CONNECT) + { + wsock_events->connect_errorcode = evts.iErrorCode[FD_CONNECT_BIT]; + + /* Setting the connect_state and calling the AF_LOCAL handshake + here allows to handle this stuff from a single point. This + is independent of FD_CONNECT being requested. Consider a + server calling connect(2) and then immediately poll(2) with + only polling for POLLIN (example: postfix), or select(2) just + asking for descriptors ready to read. + + Something weird occurs in Winsock: If you fork off and call + recv/send on the duplicated, already connected socket, another + FD_CONNECT event is generated in the child process. This + would trigger a call to af_local_connect which obviously fail. + Avoid this by calling set_connect_state only if connect_state + is connect_pending. */ + if (connect_state () == connect_pending) + { + if (wsock_events->connect_errorcode) + connect_state (connect_failed); + else if (af_local_connect ()) + { + wsock_events->connect_errorcode = WSAGetLastError (); + connect_state (connect_failed); + } + else + connect_state (connected); + } + } + UNLOCK_EVENTS; + if ((evts.lNetworkEvents & FD_OOB) && wsock_events->owner) + kill (wsock_events->owner, SIGURG); + } + } + + LOCK_EVENTS; + if ((events = events_now) != 0 + || (events = (wsock_events->events & event_mask)) != 0) + { + if (events & FD_CONNECT) + { + int wsa_err = wsock_events->connect_errorcode; + if (wsa_err) + { + /* CV 2014-04-23: This is really weird. If you call connect + asynchronously on a socket and then select, an error like + "Connection refused" is set in the event and in the SO_ERROR + socket option. If you call connect, then dup, then select, + the error is set in the event, but not in the SO_ERROR socket + option, despite the dup'ed socket handle referring to the same + socket. We're trying to workaround this problem here by + taking the connect errorcode from the event and write it back + into the SO_ERROR socket option. + + CV 2014-06-16: Call WSASetLastError *after* setsockopt since, + apparently, setsockopt sets the last WSA error code to 0 on + success. */ + ::setsockopt (get_socket (), SOL_SOCKET, SO_ERROR, + (const char *) &wsa_err, sizeof wsa_err); + WSASetLastError (wsa_err); + ret = SOCKET_ERROR; + } + /* Since FD_CONNECT is only given once, we have to keep FD_CONNECT + for connection failed sockets to have consistent behaviour in + programs calling poll/select multiple times. Example test to + non-listening port: curl -v 127.0.0.1:47 */ + if (connect_state () != connect_failed) + wsock_events->events &= ~FD_CONNECT; + wsock_events->events |= FD_WRITE; + wsock_events->connect_errorcode = 0; + } + if (events & FD_CLOSE) + { + if (evts.iErrorCode[FD_CLOSE_BIT]) + { + WSASetLastError (evts.iErrorCode[FD_CLOSE_BIT]); + ret = SOCKET_ERROR; + } + /* This test makes accept/connect behave as on Linux when accept/ + connect is called on a socket for which shutdown has been called. + The second half of this code is in the shutdown method. Note that + we only do this when called from accept/connect, not from select. + In this case erase == false, just as with read (MSG_PEEK). */ + if (erase) + { + if ((event_mask & FD_ACCEPT) && saw_shutdown_read ()) + { + WSASetLastError (WSAEINVAL); + ret = SOCKET_ERROR; + } + if (event_mask & FD_CONNECT) + { + WSASetLastError (WSAECONNRESET); + ret = SOCKET_ERROR; + } + } + } + if (erase) + wsock_events->events &= ~(events & ~(FD_WRITE | FD_CLOSE)); + } + UNLOCK_EVENTS; + + return ret; +} + +int +fhandler_socket_wsock::wait_for_events (const long event_mask, + const DWORD flags) +{ + if (async_io ()) + return 0; + + int ret; + long events = 0; + DWORD wfmo_timeout = 50; + DWORD timeout; + + WSAEVENT ev[3] = { wsock_evt, NULL, NULL }; + wait_signal_arrived here (ev[1]); + DWORD ev_cnt = 2; + if ((ev[2] = pthread::get_cancel_event ()) != NULL) + ++ev_cnt; + + if (is_nonblocking () || (flags & MSG_DONTWAIT)) + timeout = 0; + else if (event_mask & FD_READ) + timeout = rcvtimeo (); + else if (event_mask & FD_WRITE) + timeout = sndtimeo (); + else + timeout = INFINITE; + + while (!(ret = evaluate_events (event_mask, events, !(flags & MSG_PEEK))) + && !events) + { + if (timeout == 0) + { + WSASetLastError (WSAEWOULDBLOCK); + return SOCKET_ERROR; + } + + if (timeout < wfmo_timeout) + wfmo_timeout = timeout; + switch (WSAWaitForMultipleEvents (ev_cnt, ev, FALSE, wfmo_timeout, FALSE)) + { + case WSA_WAIT_TIMEOUT: + case WSA_WAIT_EVENT_0: + if (timeout != INFINITE) + timeout -= wfmo_timeout; + break; + + case WSA_WAIT_EVENT_0 + 1: + if (_my_tls.call_signal_handler ()) + break; + WSASetLastError (WSAEINTR); + return SOCKET_ERROR; + + case WSA_WAIT_EVENT_0 + 2: + pthread::static_cancel_self (); + break; + + default: + /* wsock_evt can be NULL. We're generating the same errno values + as for sockets on which shutdown has been called. */ + if (WSAGetLastError () != WSA_INVALID_HANDLE) + WSASetLastError (WSAEFAULT); + else + WSASetLastError ((event_mask & FD_CONNECT) ? WSAECONNRESET + : WSAEINVAL); + return SOCKET_ERROR; + } + } + return ret; +} + +void +fhandler_socket_wsock::release_events () +{ + if (WaitForSingleObject (wsock_mtx, INFINITE) != WAIT_FAILED) + { + HANDLE evt = wsock_evt; + HANDLE mtx = wsock_mtx; + + wsock_evt = wsock_mtx = NULL; + ReleaseMutex (mtx); + NtClose (evt); + NtClose (mtx); + } +} + +void +fhandler_socket_wsock::set_close_on_exec (bool val) +{ + set_no_inheritance (wsock_mtx, val); + set_no_inheritance (wsock_evt, val); + if (need_fixup_before ()) + { + close_on_exec (val); + debug_printf ("set close_on_exec for %s to %d", get_name (), val); + } + else + fhandler_base::set_close_on_exec (val); +} + +/* Called if a freshly created socket is not inheritable. In that case we + have to use fixup_before_fork_exec. See comment in set_socket_handle for + a description of the problem. */ +void +fhandler_socket_wsock::init_fixup_before () +{ + prot_info_ptr = (LPWSAPROTOCOL_INFOW) + cmalloc_abort (HEAP_BUF, sizeof (WSAPROTOCOL_INFOW)); + cygheap->fdtab.inc_need_fixup_before (); +} + +int +fhandler_socket_wsock::fixup_before_fork_exec (DWORD win_pid) +{ + SOCKET ret = WSADuplicateSocketW (get_socket (), win_pid, prot_info_ptr); + if (ret) + set_winsock_errno (); + else + debug_printf ("WSADuplicateSocket succeeded (%x)", prot_info_ptr->dwProviderReserved); + return (int) ret; +} + +void +fhandler_socket_wsock::fixup_after_fork (HANDLE parent) +{ + fork_fixup (parent, wsock_mtx, "wsock_mtx"); + fork_fixup (parent, wsock_evt, "wsock_evt"); + + if (!need_fixup_before ()) + { + fhandler_base::fixup_after_fork (parent); + return; + } + + SOCKET new_sock = WSASocketW (FROM_PROTOCOL_INFO, FROM_PROTOCOL_INFO, + FROM_PROTOCOL_INFO, prot_info_ptr, 0, + WSA_FLAG_OVERLAPPED); + if (new_sock == INVALID_SOCKET) + { + set_winsock_errno (); + set_handle ((HANDLE) INVALID_SOCKET); + } + else + { + /* Even though the original socket was not inheritable, the duplicated + socket is potentially inheritable again. */ + SetHandleInformation ((HANDLE) new_sock, HANDLE_FLAG_INHERIT, 0); + set_handle ((HANDLE) new_sock); + debug_printf ("WSASocket succeeded (%p)", new_sock); + } +} + +void +fhandler_socket_wsock::fixup_after_exec () +{ + if (need_fixup_before () && !close_on_exec ()) + fixup_after_fork (NULL); /* No parent handle required. */ +} + +int +fhandler_socket_wsock::dup (fhandler_base *child, int flags) +{ + debug_printf ("here"); + fhandler_socket_wsock *fhs = (fhandler_socket_wsock *) child; + + if (!DuplicateHandle (GetCurrentProcess (), wsock_mtx, + GetCurrentProcess (), &fhs->wsock_mtx, + 0, TRUE, DUPLICATE_SAME_ACCESS)) + { + __seterrno (); + return -1; + } + if (!DuplicateHandle (GetCurrentProcess (), wsock_evt, + GetCurrentProcess (), &fhs->wsock_evt, + 0, TRUE, DUPLICATE_SAME_ACCESS)) + { + __seterrno (); + NtClose (fhs->wsock_mtx); + return -1; + } + if (!need_fixup_before ()) + { + int ret = fhandler_base::dup (child, flags); + if (ret) + { + NtClose (fhs->wsock_evt); + NtClose (fhs->wsock_mtx); + } + return ret; + } + + cygheap->user.deimpersonate (); + fhs->init_fixup_before (); + fhs->set_handle (get_handle ()); + int ret = fhs->fixup_before_fork_exec (GetCurrentProcessId ()); + cygheap->user.reimpersonate (); + if (!ret) + { + fhs->fixup_after_fork (GetCurrentProcess ()); + if (fhs->get_handle() != (HANDLE) INVALID_SOCKET) + return 0; + } + cygheap->fdtab.dec_need_fixup_before (); + NtClose (fhs->wsock_evt); + NtClose (fhs->wsock_mtx); + return -1; +} + +int +fhandler_socket_wsock::set_socket_handle (SOCKET sock, int af, int type, + int flags) +{ + DWORD hdl_flags; + bool lsp_fixup = false; + int file_flags = O_RDWR | O_BINARY; + + /* Usually sockets are inheritable IFS objects. Unfortunately some virus + scanners or other network-oriented software replace normal sockets + with their own kind, which is running through a filter driver called + "layered service provider" (LSP) which, fortunately, are deprecated. + + LSP sockets are not kernel objects. They are typically not marked as + inheritable, nor are they IFS handles. They are in fact not inheritable + to child processes, and it does not help to mark them inheritable via + SetHandleInformation. Subsequent socket calls in the child process fail + with error 10038, WSAENOTSOCK. + + There's a neat way to workaround these annoying LSP sockets. WSAIoctl + allows to fetch the underlying base socket, which is a normal, inheritable + IFS handle. So we fetch the base socket, duplicate it, and close the + original socket. Now we have a standard IFS socket which (hopefully) + works as expected. + + If that doesn't work for some reason, mark the sockets for duplication + via WSADuplicateSocket/WSASocket. This requires to start the child + process in SUSPENDED state so we only do this if really necessary. */ + if (!GetHandleInformation ((HANDLE) sock, &hdl_flags) + || !(hdl_flags & HANDLE_FLAG_INHERIT)) + { + int ret; + SOCKET base_sock; + DWORD bret; + + lsp_fixup = true; + debug_printf ("LSP handle: %p", sock); + ret = WSAIoctl (sock, SIO_BASE_HANDLE, NULL, 0, (void *) &base_sock, + sizeof (base_sock), &bret, NULL, NULL); + if (ret) + debug_printf ("WSAIoctl: %u", WSAGetLastError ()); + else if (base_sock != sock) + { + if (GetHandleInformation ((HANDLE) base_sock, &hdl_flags) + && (flags & HANDLE_FLAG_INHERIT)) + { + if (!DuplicateHandle (GetCurrentProcess (), (HANDLE) base_sock, + GetCurrentProcess (), (PHANDLE) &base_sock, + 0, TRUE, DUPLICATE_SAME_ACCESS)) + debug_printf ("DuplicateHandle failed, %E"); + else + { + ::closesocket (sock); + sock = base_sock; + lsp_fixup = false; + } + } + } + } + set_handle ((HANDLE) sock); + set_addr_family (af); + set_socket_type (type); + if (!init_events ()) + return -1; + if (flags & SOCK_NONBLOCK) + file_flags |= O_NONBLOCK; + if (flags & SOCK_CLOEXEC) + { + set_close_on_exec (true); + file_flags |= O_CLOEXEC; + } + set_flags (file_flags); + if (lsp_fixup) + init_fixup_before (); + set_unique_id (); + if (get_socket_type () == SOCK_DGRAM) + { + /* Workaround the problem that a missing listener on a UDP socket + in a call to sendto will result in select/WSAEnumNetworkEvents + reporting that the socket has pending data and a subsequent call + to recvfrom will return -1 with error set to WSAECONNRESET. + + This problem is a regression introduced in Windows 2000. + Instead of fixing the problem, a new socket IOCTL code has + been added, see http://support.microsoft.com/kb/263823 */ + BOOL cr = FALSE; + DWORD blen; + if (WSAIoctl (sock, SIO_UDP_CONNRESET, &cr, sizeof cr, NULL, 0, + &blen, NULL, NULL) == SOCKET_ERROR) + debug_printf ("Reset SIO_UDP_CONNRESET: WinSock error %u", + WSAGetLastError ()); + } + rmem () = 212992; + wmem () = 212992; + return 0; +} + +fhandler_socket_inet::fhandler_socket_inet () : + fhandler_socket_wsock (), + oobinline (false), + tcp_quickack (false), + tcp_fastopen (false), + tcp_keepidle (7200), /* WinSock default */ + tcp_keepcnt (10), /* WinSock default */ + tcp_keepintvl (1) /* WinSock default */ +{ +} + +fhandler_socket_inet::~fhandler_socket_inet () +{ +} + +int +fhandler_socket_inet::socket (int af, int type, int protocol, int flags) +{ + SOCKET sock; + int ret; + + /* This test should be covered by ::socket, but make sure we don't + accidentally try anything else. */ + if (type != SOCK_STREAM && type != SOCK_DGRAM && type != SOCK_RAW) + { + set_errno (EINVAL); + return -1; + } + sock = ::socket (af, type, protocol); + if (sock == INVALID_SOCKET) + { + set_winsock_errno (); + return -1; + } + ret = set_socket_handle (sock, af, type, flags); + if (ret < 0) + ::closesocket (sock); + return ret; +} + +int +fhandler_socket_inet::socketpair (int af, int type, int protocol, int flags, + fhandler_socket *fh_out) +{ + set_errno (EAFNOSUPPORT); + return -1; +} + +int +fhandler_socket_inet::bind (const struct sockaddr *name, int namelen) +{ + int res = -1; + + if (!saw_reuseaddr ()) + { + /* If the application didn't explicitely request SO_REUSEADDR, + enforce POSIX standard socket binding behaviour by setting the + SO_EXCLUSIVEADDRUSE socket option. See cygwin_setsockopt() + for a more detailed description. */ + int on = 1; + int ret = ::setsockopt (get_socket (), SOL_SOCKET, + SO_EXCLUSIVEADDRUSE, + (const char *) &on, sizeof on); + debug_printf ("%d = setsockopt(SO_EXCLUSIVEADDRUSE), %E", ret); + } + if (::bind (get_socket (), name, namelen)) + set_winsock_errno (); + else + res = 0; + + return res; +} + +int +fhandler_socket_inet::connect (const struct sockaddr *name, int namelen) +{ + struct sockaddr_storage sst; + bool reset = (name->sa_family == AF_UNSPEC + && get_socket_type () == SOCK_DGRAM); + + if (reset) + { + if (connect_state () == unconnected) + return 0; + /* To reset a connected DGRAM socket, call Winsock's connect + function with the address member of the sockaddr structure + filled with zeroes. */ + memset (&sst, 0, sizeof sst); + sst.ss_family = get_addr_family (); + } + else if (get_inet_addr_inet (name, namelen, &sst, &namelen) == SOCKET_ERROR) + return SOCKET_ERROR; + + /* Initialize connect state to "connect_pending". In the SOCK_STREAM + case, the state is ultimately set to "connected" or "connect_failed" in + wait_for_events when the FD_CONNECT event occurs. Note that the + underlying OS sockets are always non-blocking in this case and a + successfully initiated non-blocking Winsock connect always returns + WSAEWOULDBLOCK. Thus it's safe to rely on event handling. For DGRAM + sockets, however, connect can return immediately. + + Check for either unconnected or connect_failed since in both cases it's + allowed to retry connecting the socket. It's also ok (albeit ugly) to + call connect to check if a previous non-blocking connect finished. + + Set connect_state before calling connect, otherwise a race condition with + an already running select or poll might occur. */ + if (connect_state () == unconnected || connect_state () == connect_failed) + connect_state (connect_pending); + + int res = ::connect (get_socket (), (struct sockaddr *) &sst, namelen); + if (!res) + { + if (reset) + connect_state (unconnected); + else + connect_state (connected); + } + else if (!is_nonblocking () + && res == SOCKET_ERROR + && WSAGetLastError () == WSAEWOULDBLOCK) + res = wait_for_events (FD_CONNECT | FD_CLOSE, 0); + + if (res) + { + DWORD err = WSAGetLastError (); + + /* Some applications use the ugly technique to check if a non-blocking + connect succeeded by calling connect again, until it returns EISCONN. + This circumvents the event handling and connect_state is never set. + Thus we check for this situation here. */ + if (err == WSAEISCONN) + connect_state (connected); + /* Winsock returns WSAEWOULDBLOCK if the non-blocking socket cannot be + conected immediately. Convert to POSIX/Linux compliant EINPROGRESS. */ + else if (is_nonblocking () && err == WSAEWOULDBLOCK) + WSASetLastError (WSAEINPROGRESS); + /* Winsock returns WSAEINVAL if the socket is already a listener. + Convert to POSIX/Linux compliant EISCONN. */ + else if (err == WSAEINVAL && connect_state () == listener) + WSASetLastError (WSAEISCONN); + /* Any other error except WSAEALREADY means the connect failed. */ + else if (connect_state () == connect_pending && err != WSAEALREADY) + connect_state (connect_failed); + set_winsock_errno (); + } + + return res; +} + +int +fhandler_socket_inet::listen (int backlog) +{ + int res = ::listen (get_socket (), backlog); + if (res && WSAGetLastError () == WSAEINVAL) + { + /* It's perfectly valid to call listen on an unbound INET socket. + In this case the socket is automatically bound to an unused + port number, listening on all interfaces. On WinSock, listen + fails with WSAEINVAL when it's called on an unbound socket. + So we have to bind manually here to have POSIX semantics. */ + if (get_addr_family () == AF_INET) + { + struct sockaddr_in sin; + sin.sin_family = AF_INET; + sin.sin_port = 0; + sin.sin_addr.s_addr = INADDR_ANY; + if (!::bind (get_socket (), (struct sockaddr *) &sin, sizeof sin)) + res = ::listen (get_socket (), backlog); + } + else if (get_addr_family () == AF_INET6) + { + struct sockaddr_in6 sin6; + memset (&sin6, 0, sizeof sin6); + sin6.sin6_family = AF_INET6; + if (!::bind (get_socket (), (struct sockaddr *) &sin6, sizeof sin6)) + res = ::listen (get_socket (), backlog); + } + } + if (!res) + connect_state (listener); /* gets set to connected on accepted socket. */ + else + set_winsock_errno (); + return res; +} + +int +fhandler_socket_inet::accept4 (struct sockaddr *peer, int *len, int flags) +{ + int ret = -1; + /* Allows NULL peer and len parameters. */ + struct sockaddr_storage lpeer; + int llen = sizeof (struct sockaddr_storage); + + /* Windows event handling does not check for the validity of the desired + flags so we have to do it here. */ + if (connect_state () != listener) + { + WSASetLastError (WSAEINVAL); + set_winsock_errno (); + return -1; + } + + SOCKET res = INVALID_SOCKET; + while (!(res = wait_for_events (FD_ACCEPT | FD_CLOSE, 0)) + && (res = ::accept (get_socket (), (struct sockaddr *) &lpeer, &llen)) + == INVALID_SOCKET + && WSAGetLastError () == WSAEWOULDBLOCK) + ; + if (res == INVALID_SOCKET) + set_winsock_errno (); + else + { + cygheap_fdnew fd; + + if (fd >= 0) + { + fhandler_socket_inet *sock = (fhandler_socket_inet *) + build_fh_dev (dev ()); + if (sock && sock->set_socket_handle (res, get_addr_family (), + get_socket_type (), + get_socket_flags ()) == 0) + { + sock->async_io (false); /* set_socket_handle disables async. */ + /* No locking necessary at this point. */ + sock->wsock_events->events = wsock_events->events | FD_WRITE; + sock->wsock_events->owner = wsock_events->owner; + sock->connect_state (connected); + fd = sock; + if (fd <= 2) + set_std_handle (fd); + ret = fd; + if (peer) + { + memcpy (peer, &lpeer, MIN (*len, llen)); + *len = llen; + } + } + else + delete sock; + } + if (ret == -1) + ::closesocket (res); + } + return ret; +} + +int +fhandler_socket_inet::getsockname (struct sockaddr *name, int *namelen) +{ + int res = -1; + + /* WinSock just returns WSAEFAULT if the buffer is too small. Use a + big enough local buffer and truncate later as necessary, per POSIX. */ + struct sockaddr_storage sock; + int len = sizeof sock; + res = ::getsockname (get_socket (), (struct sockaddr *) &sock, &len); + if (!res) + { + memcpy (name, &sock, MIN (*namelen, len)); + *namelen = len; + } + else + { + if (WSAGetLastError () == WSAEINVAL) + { + /* WinSock returns WSAEINVAL if the socket is locally + unbound. Per SUSv3 this is not an error condition. + We're faking a valid return value here by creating the + same content in the sockaddr structure as on Linux. */ + memset (&sock, 0, sizeof sock); + sock.ss_family = get_addr_family (); + switch (get_addr_family ()) + { + case AF_INET: + res = 0; + len = (int) sizeof (struct sockaddr_in); + break; + case AF_INET6: + res = 0; + len = (int) sizeof (struct sockaddr_in6); + break; + default: + WSASetLastError (WSAEOPNOTSUPP); + break; + } + if (!res) + { + memcpy (name, &sock, MIN (*namelen, len)); + *namelen = len; + } + } + if (res) + set_winsock_errno (); + } + return res; +} + +int +fhandler_socket_inet::getpeername (struct sockaddr *name, int *namelen) +{ + /* Always use a local big enough buffer and truncate later as necessary + per POSIX. WinSock unfortunately only returns WSAEFAULT if the buffer + is too small. */ + struct sockaddr_storage sock; + int len = sizeof sock; + int res = ::getpeername (get_socket (), (struct sockaddr *) &sock, &len); + if (res) + set_winsock_errno (); + else + { + memcpy (name, &sock, MIN (*namelen, len)); + *namelen = len; + } + return res; +} + +int +fhandler_socket_wsock::shutdown (int how) +{ + int res = ::shutdown (get_socket (), how); + + /* Linux allows to call shutdown for any socket, even if it's not connected. + This also disables to call accept on this socket, if shutdown has been + called with the SHUT_RD or SHUT_RDWR parameter. In contrast, WinSock + only allows to call shutdown on a connected socket. The accept function + is in no way affected. So, what we do here is to fake success, and to + change the event settings so that an FD_CLOSE event is triggered for the + calling Cygwin function. The evaluate_events method handles the call + from accept specially to generate a Linux-compatible behaviour. */ + if (res && WSAGetLastError () != WSAENOTCONN) + set_winsock_errno (); + else + { + res = 0; + switch (how) + { + case SHUT_RD: + saw_shutdown_read (true); + wsock_events->events |= FD_CLOSE; + SetEvent (wsock_evt); + break; + case SHUT_WR: + saw_shutdown_write (true); + break; + case SHUT_RDWR: + saw_shutdown_read (true); + saw_shutdown_write (true); + wsock_events->events |= FD_CLOSE; + SetEvent (wsock_evt); + break; + } + } + return res; +} + +int +fhandler_socket_wsock::close () +{ + int res = 0; + + release_events (); + while ((res = ::closesocket (get_socket ())) != 0) + { + if (WSAGetLastError () != WSAEWOULDBLOCK) + { + set_winsock_errno (); + res = -1; + break; + } + if (cygwait (10) == WAIT_SIGNALED) + { + set_errno (EINTR); + res = -1; + break; + } + WSASetLastError (0); + } + return res; +} + +ssize_t +fhandler_socket_inet::recv_internal (LPWSAMSG wsamsg, bool use_recvmsg) +{ + ssize_t res = 0; + DWORD ret = 0, wret; + int evt_mask = (wsamsg->dwFlags & MSG_OOB) ? FD_OOB : FD_READ; + LPWSABUF &wsabuf = wsamsg->lpBuffers; + ULONG &wsacnt = wsamsg->dwBufferCount; + static NO_COPY LPFN_WSARECVMSG WSARecvMsg; + bool read_oob = false; + + /* CV 2014-10-26: Do not check for the connect_state at this point. In + certain scenarios there's no way to check the connect state reliably. + Example (hexchat): Parent process creates socket, forks, child process + calls connect, parent process calls read. Even if the event handling + allows to check for FD_CONNECT in the parent, there is always yet another + scenario we can easily break. */ + + DWORD wait_flags = wsamsg->dwFlags; + bool waitall = !!(wait_flags & MSG_WAITALL); + wsamsg->dwFlags &= (MSG_OOB | MSG_PEEK | MSG_DONTROUTE); + if (use_recvmsg) + { + if (!WSARecvMsg + && get_ext_funcptr (get_socket (), &WSARecvMsg) == SOCKET_ERROR) + { + if (wsamsg->Control.len > 0) + { + set_winsock_errno (); + return SOCKET_ERROR; + } + use_recvmsg = false; + } + else /* Only MSG_PEEK is supported by WSARecvMsg. */ + wsamsg->dwFlags &= MSG_PEEK; + } + if (waitall) + { + if (get_socket_type () != SOCK_STREAM) + { + WSASetLastError (WSAEOPNOTSUPP); + set_winsock_errno (); + return SOCKET_ERROR; + } + if (is_nonblocking () || (wsamsg->dwFlags & (MSG_OOB | MSG_PEEK))) + waitall = false; + } + + /* recv() returns EINVAL if MSG_OOB flag is set in inline mode. */ + if (oobinline && (wsamsg->dwFlags & MSG_OOB)) + { + set_errno (EINVAL); + return SOCKET_ERROR; + } + + /* Check whether OOB data is ready or not */ + if (get_socket_type () == SOCK_STREAM) + if ((wsamsg->dwFlags & MSG_OOB) || oobinline) + { + u_long atmark = 0; + /* SIOCATMARK = _IOR('s',7,u_long) */ + int err = ::ioctlsocket (get_socket (), _IOR('s',7,u_long), &atmark); + if (err) + { + set_winsock_errno (); + return SOCKET_ERROR; + } + /* If there is no OOB data, recv() with MSG_OOB returns EINVAL. + Note: The return value of SIOCATMARK in non-inline mode of + winsock is FALSE if OOB data exists, TRUE otherwise. */ + if (atmark && (wsamsg->dwFlags & MSG_OOB)) + { + /* No OOB data */ + set_errno (EINVAL); + return SOCKET_ERROR; + } + /* Inline mode for out-of-band (OOB) data of winsock is + completely broken. That is, SIOCATMARK always returns + TRUE in inline mode. Due to this problem, application + cannot determine OOB data at all. Therefore the behavior + of a socket with SO_OOBINLINE set is simulated using + a socket with SO_OOBINLINE not set. In this fake inline + mode, the order of the OOB and non-OOB data is not + preserved. OOB data is read before non-OOB data sent + prior to the OOB data. However, this most likely is + not a problem in most cases. */ + /* If there is OOB data, read OOB data using MSG_OOB in + fake inline mode. */ + if (!atmark && oobinline) + { + read_oob = true; + evt_mask = FD_OOB; + } + } + + /* Note: Don't call WSARecvFrom(MSG_PEEK) without actually having data + waiting in the buffers, otherwise the event handling gets messed up + for some reason. */ + while (!(res = wait_for_events (evt_mask | FD_CLOSE, wait_flags)) + || saw_shutdown_read ()) + { + DWORD dwFlags = wsamsg->dwFlags | (read_oob ? MSG_OOB : 0); + if (use_recvmsg) + res = WSARecvMsg (get_socket (), wsamsg, &wret, NULL, NULL); + /* This is working around a really weird problem in WinSock. + + Assume you create a socket, fork the process (thus duplicating + the socket), connect the socket in the child, then call recv + on the original socket handle in the parent process. + In this scenario, calls to WinSock's recvfrom and WSARecvFrom + in the parent will fail with WSAEINVAL, regardless whether both + address parameters, name and namelen, are NULL or point to valid + storage. However, calls to recv and WSARecv succeed as expected. + Per MSDN, WSAEINVAL in the context of recv means "The socket has not + been bound". It is as if the recvfrom functions test if the socket + is bound locally, but in the parent process, WinSock doesn't know + about that and fails, while the same test is omitted in the recv + functions. + + This also covers another weird case: WinSock returns WSAEFAULT if + namelen is a valid pointer while name is NULL. Both parameters are + ignored for TCP sockets, so this only occurs when using UDP socket. */ + else if (!wsamsg->name || get_socket_type () == SOCK_STREAM) + res = WSARecv (get_socket (), wsabuf, wsacnt, &wret, &dwFlags, + NULL, NULL); + else + res = WSARecvFrom (get_socket (), wsabuf, wsacnt, &wret, + &dwFlags, wsamsg->name, &wsamsg->namelen, + NULL, NULL); + if (!res) + { + ret += wret; + if (!waitall) + break; + while (wret && wsacnt) + { + if (wsabuf->len > wret) + { + wsabuf->len -= wret; + wsabuf->buf += wret; + wret = 0; + } + else + { + wret -= wsabuf->len; + ++wsabuf; + --wsacnt; + } + } + if (!wsacnt) + break; + } + else if (WSAGetLastError () != WSAEWOULDBLOCK) + break; + } + + if (res) + { + /* According to SUSv3, errno isn't set in that case and no error + condition is returned. */ + if (WSAGetLastError () == WSAEMSGSIZE) + ret += wret; + else if (!ret) + { + /* ESHUTDOWN isn't defined for recv in SUSv3. Simply EOF is returned + in this case. */ + if (WSAGetLastError () == WSAESHUTDOWN) + ret = 0; + else + { + set_winsock_errno (); + return SOCKET_ERROR; + } + } + } + + return ret; +} + +ssize_t +fhandler_socket_wsock::recvfrom (void *in_ptr, size_t len, int flags, + struct sockaddr *from, int *fromlen) +{ + char *ptr = (char *) in_ptr; + + /* size_t is 64 bit, but the len member in WSABUF is 32 bit. + Split buffer if necessary. */ + DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0); + WSABUF wsabuf[bufcnt]; + WSAMSG wsamsg = { from, from && fromlen ? *fromlen : 0, + wsabuf, bufcnt, + { 0, NULL }, + (DWORD) flags }; + /* Don't use len as loop condition, it could be 0. */ + for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr) + { + wsaptr->len = MIN (len, UINT32_MAX); + wsaptr->buf = ptr; + len -= wsaptr->len; + ptr += wsaptr->len; + } + ssize_t ret = recv_internal (&wsamsg, false); + if (fromlen) + *fromlen = wsamsg.namelen; + return ret; +} + +ssize_t +fhandler_socket_wsock::recvmsg (struct msghdr *msg, int flags) +{ + /* Disappointing but true: Even if WSARecvMsg is supported, it's only + supported for datagram and raw sockets. */ + bool use_recvmsg = true; + if (get_socket_type () == SOCK_STREAM || get_addr_family () == AF_LOCAL) + { + use_recvmsg = false; + msg->msg_controllen = 0; + } + + WSABUF wsabuf[msg->msg_iovlen]; + WSABUF *wsaptr = wsabuf + msg->msg_iovlen; + const struct iovec *iovptr = msg->msg_iov + msg->msg_iovlen; + while (--wsaptr >= wsabuf) + { + wsaptr->len = (--iovptr)->iov_len; + wsaptr->buf = (char *) iovptr->iov_base; + } + WSAMSG wsamsg = { (struct sockaddr *) msg->msg_name, msg->msg_namelen, + wsabuf, (DWORD) msg->msg_iovlen, + { (DWORD) msg->msg_controllen, (char *) msg->msg_control }, + (DWORD) flags }; + ssize_t ret = recv_internal (&wsamsg, use_recvmsg); + if (ret >= 0) + { + msg->msg_namelen = wsamsg.namelen; + msg->msg_controllen = wsamsg.Control.len; + msg->msg_flags = wsamsg.dwFlags; + /* if a UDP_GRO packet is present, convert gso_size from Windows DWORD + to Linux-compatible uint16_t. We don't have to change the + msg_control block layout for that, assuming applications do as they + have been told and only use CMSG_FIRSTHDR/CMSG_NXTHDR/CMSG_DATA to + access control messages. The cmsghdr alignment saves our ass here! */ + if (msg->msg_controllen && get_socket_type () == SOCK_DGRAM + && (get_addr_family () == AF_INET || get_addr_family () == AF_INET6)) + { + struct cmsghdr *cmsg; + + for (cmsg = CMSG_FIRSTHDR (msg); + cmsg; + cmsg = CMSG_NXTHDR (msg, cmsg)) + { + if (cmsg->cmsg_level == SOL_UDP + && cmsg->cmsg_type == UDP_GRO) + { + PDWORD gso_size_win = (PDWORD) CMSG_DATA(cmsg); + uint16_t *gso_size_cyg = (uint16_t *) CMSG_DATA(cmsg); + uint16_t gso_size = (uint16_t) *gso_size_win; + *gso_size_cyg = gso_size; + break; + } + } + } + } + return ret; +} + +void +fhandler_socket_wsock::read (void *in_ptr, size_t& len) +{ + char *ptr = (char *) in_ptr; + + /* size_t is 64 bit, but the len member in WSABUF is 32 bit. + Split buffer if necessary. */ + DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0); + WSABUF wsabuf[bufcnt]; + WSAMSG wsamsg = { NULL, 0, wsabuf, bufcnt, { 0, NULL }, 0 }; + /* Don't use len as loop condition, it could be 0. */ + for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr) + { + wsaptr->len = MIN (len, UINT32_MAX); + wsaptr->buf = ptr; + len -= wsaptr->len; + ptr += wsaptr->len; + } + len = recv_internal (&wsamsg, false); +} + +ssize_t +fhandler_socket_wsock::readv (const struct iovec *const iov, const int iovcnt, + ssize_t tot) +{ + WSABUF wsabuf[iovcnt]; + WSABUF *wsaptr = wsabuf + iovcnt; + const struct iovec *iovptr = iov + iovcnt; + while (--wsaptr >= wsabuf) + { + wsaptr->len = (--iovptr)->iov_len; + wsaptr->buf = (char *) iovptr->iov_base; + } + WSAMSG wsamsg = { NULL, 0, wsabuf, (DWORD) iovcnt, { 0, NULL}, 0 }; + return recv_internal (&wsamsg, false); +} + +ssize_t +fhandler_socket_wsock::send_internal (struct _WSAMSG *wsamsg, int flags) +{ + ssize_t res = 0; + DWORD ret = 0, sum = 0; + WSABUF out_buf[wsamsg->dwBufferCount]; + bool use_sendmsg = false; + DWORD wait_flags = flags & MSG_DONTWAIT; + bool nosignal = !!(flags & MSG_NOSIGNAL); + + /* MSG_EOR not supported by any protocol */ + if (flags & MSG_EOR) + { + set_errno (EOPNOTSUPP); + return SOCKET_ERROR; + } + + flags &= (MSG_OOB | MSG_DONTROUTE); + if (wsamsg->Control.len > 0) + use_sendmsg = true; + /* Workaround for MSDN KB 823764: Split a message into chunks <= SO_SNDBUF. + in_idx is the index of the current lpBuffers from the input wsamsg buffer. + in_off is used to keep track of the next byte to write from a wsamsg + buffer which only gets partially written. */ + for (DWORD in_idx = 0, in_off = 0; + in_idx < wsamsg->dwBufferCount; + in_off >= wsamsg->lpBuffers[in_idx].len && (++in_idx, (in_off = 0))) + { + /* Split a message into the least number of pieces to minimize the + number of WsaSendTo calls. Don't split datagram messages (bad idea). + out_idx is the index of the next buffer in the out_buf WSABUF, + also the number of buffers given to WSASendTo. + out_len is the number of bytes in the buffers given to WSASendTo. + Don't split datagram messages (very bad idea). */ + DWORD out_idx = 0; + DWORD out_len = 0; + if (get_socket_type () == SOCK_STREAM) + { + do + { + out_buf[out_idx].buf = wsamsg->lpBuffers[in_idx].buf + in_off; + out_buf[out_idx].len = wsamsg->lpBuffers[in_idx].len - in_off; + out_len += out_buf[out_idx].len; + out_idx++; + } + while (out_len < (unsigned) wmem () + && (in_off = 0, ++in_idx < wsamsg->dwBufferCount)); + /* Tweak len of the last out_buf buffer so the entire number of bytes + is (less than or) equal to wmem (). Fix out_len as well since it's + used in a subsequent test expression. */ + if (out_len > (unsigned) wmem ()) + { + out_buf[out_idx - 1].len -= out_len - (unsigned) wmem (); + out_len = (unsigned) wmem (); + } + /* Add the bytes written from the current last buffer to in_off, + so in_off points to the next byte to be written from that buffer, + or beyond which lets the outper loop skip to the next buffer. */ + in_off += out_buf[out_idx - 1].len; + } + + do + { + if (use_sendmsg) + res = WSASendMsg (get_socket (), wsamsg, flags, &ret, NULL, NULL); + else if (get_socket_type () == SOCK_STREAM) + res = WSASendTo (get_socket (), out_buf, out_idx, &ret, flags, + wsamsg->name, wsamsg->namelen, NULL, NULL); + else + res = WSASendTo (get_socket (), wsamsg->lpBuffers, + wsamsg->dwBufferCount, &ret, flags, + wsamsg->name, wsamsg->namelen, NULL, NULL); + if (res && (WSAGetLastError () == WSAEWOULDBLOCK)) + { + LOCK_EVENTS; + wsock_events->events &= ~FD_WRITE; + UNLOCK_EVENTS; + } + } + while (res && (WSAGetLastError () == WSAEWOULDBLOCK) + && !(res = wait_for_events (FD_WRITE | FD_CLOSE, wait_flags))); + + if (!res) + { + sum += ret; + /* For streams, return to application if the number of bytes written + is less than the number of bytes we intended to write in a single + call to WSASendTo. Otherwise we would have to add code to + backtrack in the input buffers, which is questionable. There was + probably a good reason we couldn't write more. */ + if (get_socket_type () != SOCK_STREAM || ret < out_len) + break; + } + else if (is_nonblocking () || WSAGetLastError() != WSAEWOULDBLOCK) + break; + } + + if (sum) + res = sum; + else if (res == SOCKET_ERROR) + { + set_winsock_errno (); + + /* Special handling for EPIPE and SIGPIPE. + + EPIPE is generated if the local end has been shut down on a connection + oriented socket. In this case the process will also receive a SIGPIPE + unless MSG_NOSIGNAL is set. */ + if ((get_errno () == ECONNABORTED || get_errno () == ESHUTDOWN) + && get_socket_type () == SOCK_STREAM) + { + set_errno (EPIPE); + if (!nosignal) + raise (SIGPIPE); + } + } + + return res; +} + +ssize_t +fhandler_socket_inet::sendto (const void *in_ptr, size_t len, int flags, + const struct sockaddr *to, int tolen) +{ + char *ptr = (char *) in_ptr; + struct sockaddr_storage sst; + + if (to && get_inet_addr_inet (to, tolen, &sst, &tolen) == SOCKET_ERROR) + return SOCKET_ERROR; + + /* size_t is 64 bit, but the len member in WSABUF is 32 bit. + Split buffer if necessary. */ + DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0); + WSABUF wsabuf[bufcnt]; + WSAMSG wsamsg = { to ? (struct sockaddr *) &sst : NULL, tolen, + wsabuf, bufcnt, + { 0, NULL }, + 0 }; + /* Don't use len as loop condition, it could be 0. */ + for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr) + { + wsaptr->len = MIN (len, UINT32_MAX); + wsaptr->buf = ptr; + len -= wsaptr->len; + ptr += wsaptr->len; + } + return send_internal (&wsamsg, flags); +} + +ssize_t +fhandler_socket_inet::sendmsg (const struct msghdr *in_msg, int flags) +{ + struct sockaddr_storage sst; + int len = 0; + DWORD old_gso_size = MAXDWORD; + ssize_t ret; + + /* Copy incoming msghdr into a local copy. We only access this from + here on. Thus, make sure not to manipulate user space data. */ + struct msghdr local_msg = *in_msg; + struct msghdr *msg = &local_msg; + + if (msg->msg_name + && get_inet_addr_inet ((struct sockaddr *) msg->msg_name, + msg->msg_namelen, &sst, &len) == SOCKET_ERROR) + return SOCKET_ERROR; + + /* Check for our optmem_max value */ + if (msg->msg_controllen > NT_MAX_PATH) + { + set_errno (ENOBUFS); + return SOCKET_ERROR; + } + + /* WSASendMsg is supported only for datagram and raw sockets. */ + if (get_socket_type () != SOCK_DGRAM && get_socket_type () != SOCK_RAW) + msg->msg_controllen = 0; + + /* If we actually have control data, copy it to local storage. Control + messages only handled by us have to be dropped from the msg_control + block, and we don't want to change user space data. */ + tmp_pathbuf tp; + if (msg->msg_controllen) + { + void *local_cmsg = tp.c_get (); + memcpy (local_cmsg, msg->msg_control, msg->msg_controllen); + msg->msg_control = local_cmsg; + } + + /* Check for control message we handle inside Cygwin. Right now this + only affects UDP sockets, so check here early. */ + if (msg->msg_controllen && get_socket_type () == SOCK_DGRAM) + { + struct cmsghdr *cmsg; + bool dropped = false; + + for (cmsg = CMSG_FIRSTHDR (msg); + cmsg; + cmsg = dropped ? cmsg : CMSG_NXTHDR (msg, cmsg)) + { + dropped = false; + /* cmsg within bounds? */ + if (cmsg->cmsg_len < sizeof (struct cmsghdr) + || cmsg->cmsg_len > (size_t) msg->msg_controllen + - ((uintptr_t) cmsg + - (uintptr_t) msg->msg_control)) + { + set_errno (EINVAL); + return SOCKET_ERROR; + } + /* UDP_SEGMENT? Override gso_size for this single sendmsg. */ + if (cmsg->cmsg_level == SOL_UDP && cmsg->cmsg_type == UDP_SEGMENT) + { + /* 16 bit unsigned, as on Linux */ + DWORD gso_size = *(uint16_t *) CMSG_DATA(cmsg); + int size = sizeof old_gso_size; + /* Save the old gso_size and set the requested one. */ + if (::getsockopt (get_socket (), IPPROTO_UDP, UDP_SEGMENT, + (char *) &old_gso_size, &size) == SOCKET_ERROR + || ::setsockopt (get_socket (), IPPROTO_UDP, UDP_SEGMENT, + (char *) &gso_size, sizeof gso_size) + == SOCKET_ERROR) + { + set_winsock_errno (); + return SOCKET_ERROR; + } + /* Drop message from msgbuf, Windows doesn't know it. */ + size_t cmsg_size = CMSG_ALIGN (cmsg->cmsg_len); + struct cmsghdr *cmsg_next = CMSG_NXTHDR (msg, cmsg); + if (cmsg_next) + memmove (cmsg, cmsg_next, (char *) msg->msg_control + + msg->msg_controllen + - (char *) cmsg_next); + msg->msg_controllen -= cmsg_size; + dropped = true; + /* Avoid infinite loop */ + if (msg->msg_controllen <= 0) + { + cmsg = NULL; + msg->msg_controllen = 0; + } + } + } + } + + /* Copy over msg_iov into an equivalent WSABUF array. */ + WSABUF wsabuf[msg->msg_iovlen]; + WSABUF *wsaptr = wsabuf; + const struct iovec *iovptr = msg->msg_iov; + for (int i = 0; i < msg->msg_iovlen; ++i) + { + wsaptr->len = iovptr->iov_len; + (wsaptr++)->buf = (char *) (iovptr++)->iov_base; + } + + /* Eventually copy over to a WSAMSG and call send_internal with that. */ + WSAMSG wsamsg = { msg->msg_name ? (struct sockaddr *) &sst : NULL, len, + wsabuf, (DWORD) msg->msg_iovlen, + { (DWORD) msg->msg_controllen, + msg->msg_controllen ? (char *) msg->msg_control : NULL }, + 0 }; + ret = send_internal (&wsamsg, flags); + if (old_gso_size != MAXDWORD) + ::setsockopt (get_socket (), IPPROTO_UDP, UDP_SEGMENT, + (char *) &old_gso_size, sizeof old_gso_size); + return ret; +} + +ssize_t +fhandler_socket_wsock::write (const void *in_ptr, size_t len) +{ + char *ptr = (char *) in_ptr; + + /* size_t is 64 bit, but the len member in WSABUF is 32 bit. + Split buffer if necessary. */ + DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0); + WSABUF wsabuf[bufcnt]; + WSAMSG wsamsg = { NULL, 0, wsabuf, bufcnt, { 0, NULL }, 0 }; + /* Don't use len as loop condition, it could be 0. */ + for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr) + { + wsaptr->len = MIN (len, UINT32_MAX); + wsaptr->buf = ptr; + len -= wsaptr->len; + ptr += wsaptr->len; + } + return send_internal (&wsamsg, 0); +} + +ssize_t +fhandler_socket_wsock::writev (const struct iovec *const iov, const int iovcnt, + ssize_t tot) +{ + WSABUF wsabuf[iovcnt]; + WSABUF *wsaptr = wsabuf; + const struct iovec *iovptr = iov; + for (int i = 0; i < iovcnt; ++i) + { + wsaptr->len = iovptr->iov_len; + (wsaptr++)->buf = (char *) (iovptr++)->iov_base; + } + WSAMSG wsamsg = { NULL, 0, wsabuf, (DWORD) iovcnt, { 0, NULL}, 0 }; + return send_internal (&wsamsg, 0); +} + +#define TCP_MAXRT 5 /* Older systems don't support TCP_MAXRTMS + TCP_MAXRT takes secs, not msecs. */ + +#ifndef SIO_TCP_SET_ACK_FREQUENCY +#define SIO_TCP_SET_ACK_FREQUENCY _WSAIOW(IOC_VENDOR,23) +#endif + +#define MAX_TCP_KEEPIDLE 32767 +#define MAX_TCP_KEEPCNT 255 +#define MAX_TCP_KEEPINTVL 32767 + +#define FIXED_WSOCK_TCP_KEEPCNT 10 + +int +fhandler_socket_inet::set_keepalive (int keepidle, int keepcnt, int keepintvl) +{ + struct tcp_keepalive tka; + int so_keepalive = 0; + int len = sizeof so_keepalive; + int ret; + DWORD dummy; + + /* Per MSDN, + https://docs.microsoft.com/en-us/windows/win32/winsock/sio-keepalive-vals + the subsequent keep-alive settings in struct tcp_keepalive are only used + if the onoff member is != 0. Request the current state of SO_KEEPALIVE, + then set the keep-alive options with onoff set to 1. On success, if + SO_KEEPALIVE was 0, restore to the original SO_KEEPALIVE setting. Per + the above MSDN doc, the SIO_KEEPALIVE_VALS settings are persistent + across switching SO_KEEPALIVE. */ + ret = ::getsockopt (get_socket (), SOL_SOCKET, SO_KEEPALIVE, + (char *) &so_keepalive, &len); + if (ret == SOCKET_ERROR) + debug_printf ("getsockopt (SO_KEEPALIVE) failed, %u\n", WSAGetLastError ()); + tka.onoff = 1; + tka.keepalivetime = keepidle * MSPERSEC; + /* WinSock TCP_KEEPCNT is fixed. But we still want that the keep-alive + times out after TCP_KEEPIDLE + TCP_KEEPCNT * TCP_KEEPINTVL secs. + To that end, we set keepaliveinterval so that + + keepaliveinterval * FIXED_WSOCK_TCP_KEEPCNT == TCP_KEEPINTVL * TCP_KEEPCNT + + FIXME? Does that make sense? + + Sidenote: Given the max values, the entire operation fits into an int. */ + tka.keepaliveinterval = MSPERSEC / FIXED_WSOCK_TCP_KEEPCNT * keepcnt + * keepintvl; + if (WSAIoctl (get_socket (), SIO_KEEPALIVE_VALS, (LPVOID) &tka, sizeof tka, + NULL, 0, &dummy, NULL, NULL) == SOCKET_ERROR) + { + set_winsock_errno (); + return -1; + } + if (!so_keepalive) + { + ret = ::setsockopt (get_socket (), SOL_SOCKET, SO_KEEPALIVE, + (const char *) &so_keepalive, sizeof so_keepalive); + if (ret == SOCKET_ERROR) + debug_printf ("setsockopt (SO_KEEPALIVE) failed, %u\n", + WSAGetLastError ()); + } + return 0; +} + +int +fhandler_socket_inet::setsockopt (int level, int optname, const void *optval, + socklen_t optlen) +{ + bool ignore = false; + int ret = -1; + unsigned int winsock_val; + + /* Preprocessing setsockopt. Set ignore to true if setsockopt call should + get skipped entirely. */ + switch (level) + { + case SOL_SOCKET: + switch (optname) + { + case SO_PEERCRED: + set_errno (ENOPROTOOPT); + return -1; + + case SO_REUSEADDR: + /* Per POSIX we must not be able to reuse a complete duplicate of a + local TCP address (same IP, same port), even if SO_REUSEADDR has + been set. This behaviour is maintained in WinSock for backward + compatibility, while the WinSock standard behaviour of stream + socket binding is equivalent to the POSIX behaviour as if + SO_REUSEADDR has been set. The SO_EXCLUSIVEADDRUSE option has + been added to allow an application to request POSIX standard + behaviour in the non-SO_REUSEADDR case. + + To emulate POSIX socket binding behaviour, note that SO_REUSEADDR + has been set but don't call setsockopt. Instead + fhandler_socket::bind sets SO_EXCLUSIVEADDRUSE if the application + did not set SO_REUSEADDR. */ + if (optlen < (socklen_t) sizeof (int)) + { + set_errno (EINVAL); + return ret; + } + if (get_socket_type () == SOCK_STREAM) + ignore = true; + break; + + case SO_RCVTIMEO: + case SO_SNDTIMEO: + if (optlen < (socklen_t) sizeof (struct timeval)) + { + set_errno (EINVAL); + return ret; + } + if (timeval_to_ms ((struct timeval *) optval, + (optname == SO_RCVTIMEO) ? rcvtimeo () + : sndtimeo ())) + ret = 0; + else + set_errno (EDOM); + return ret; + + case SO_OOBINLINE: + /* Inline mode for out-of-band (OOB) data of winsock is + completely broken. That is, SIOCATMARK always returns + TRUE in inline mode. Due to this problem, application + cannot determine OOB data at all. Therefore the behavior + of a socket with SO_OOBINLINE set is simulated using + a socket with SO_OOBINLINE not set. In this fake inline + mode, the order of the OOB and non-OOB data is not + preserved. OOB data is read before non-OOB data sent + prior to the OOB data. However, this most likely is + not a problem in most cases. */ + /* Here, instead of actually setting inline mode, simply + set the variable oobinline. */ + oobinline = *(int *) optval ? true : false; + ignore = true; + break; + + default: + break; + } + break; + + case IPPROTO_IP: + switch (optname) + { + case IP_TOS: + /* Winsock doesn't support setting the IP_TOS field with setsockopt + and TOS was never implemented for TCP anyway. setsockopt returns + WinSock error 10022, WSAEINVAL when trying to set the IP_TOS + field. We just return 0 instead. */ + ignore = true; + break; + + default: + break; + } + break; + + case IPPROTO_IPV6: + switch (optname) + { + case IPV6_TCLASS: + /* Unsupported */ + ignore = true; + break; + + default: + break; + } + break; + + case IPPROTO_TCP: + /* Check for stream socket early on, so we don't have to do this for + every option. Also, WinSock returns EINVAL. */ + if (type != SOCK_STREAM) + { + set_errno (EOPNOTSUPP); + return -1; + } + + switch (optname) + { + case TCP_MAXSEG: + /* Winsock doesn't support setting TCP_MAXSEG, only requesting it + via getsockopt. Make this a no-op. */ + ignore = true; + break; + + case TCP_QUICKACK: + /* Various sources on the net claim that TCP_QUICKACK is supported + by Windows, even using the same optname value of 12. However, + the ws2ipdef.h header calls this option TCP_CONGESTION_ALGORITHM + and there's no official statement, nor official documentation + confirming or denying this option is equivalent to Linux' + TCP_QUICKACK. Also, weirdly, this option takes values from 0..7. + + There is another undocumented option to WSAIoctl called + SIO_TCP_SET_ACK_FREQUENCY which is already used by some + projects, so we're going to use it here, too, for now. + + There's an open issue in the dotnet github, + https://github.com/dotnet/runtime/issues/798 + Hopefully this clarifies the situation in the not too distant + future... */ + { + DWORD dummy; + /* https://stackoverflow.com/questions/55034112/c-disable-delayed-ack-on-windows + claims that valid values for SIO_TCP_SET_ACK_FREQUENCY are + 1..255. In contrast to that, my own testing shows that + valid values are 0 and 1 exclusively. */ + int freq = !!*(int *) optval; + if (WSAIoctl (get_socket (), SIO_TCP_SET_ACK_FREQUENCY, &freq, + sizeof freq, NULL, 0, &dummy, NULL, NULL) + == SOCKET_ERROR) + { + set_winsock_errno (); + return -1; + } + ignore = true; + tcp_quickack = freq ? true : false; + } + break; + + case TCP_MAXRT: + /* Don't let this option slip through from user space. */ + set_errno (EOPNOTSUPP); + return -1; + + case TCP_USER_TIMEOUT: + if (!wincap.has_tcp_maxrtms ()) + { + /* convert msecs to secs. Values < 1000 ms are converted to + 0 secs, just as in WinSock. */ + winsock_val = *(unsigned int *) optval / MSPERSEC; + optname = TCP_MAXRT; + optval = (const void *) &winsock_val; + } + break; + + case TCP_FASTOPEN: + /* Fake FastOpen on older systems. */ + if (!wincap.has_tcp_fastopen ()) + { + ignore = true; + tcp_fastopen = *(int *) optval ? true : false; + } + break; + + case TCP_KEEPIDLE: + /* Handle TCP_KEEPIDLE on older systems. */ + if (!wincap.has_linux_tcp_keepalive_sockopts ()) + { + if (*(int *) optval < 1 || *(int *) optval > MAX_TCP_KEEPIDLE) + { + set_errno (EINVAL); + return -1; + } + if (set_keepalive (*(int *) optval, tcp_keepcnt, tcp_keepintvl)) + return -1; + ignore = true; + tcp_keepidle = *(int *) optval; + } + break; + + case TCP_KEEPCNT: + /* Fake TCP_KEEPCNT on older systems. */ + if (!wincap.has_linux_tcp_keepalive_sockopts ()) + { + if (*(int *) optval < 1 || *(int *) optval > MAX_TCP_KEEPCNT) + { + set_errno (EINVAL); + return -1; + } + if (set_keepalive (tcp_keepidle, *(int *) optval, tcp_keepintvl)) + return -1; + ignore = true; + tcp_keepcnt = *(int *) optval; + } + break; + + case TCP_KEEPINTVL: + /* Handle TCP_KEEPINTVL on older systems. */ + if (!wincap.has_linux_tcp_keepalive_sockopts ()) + { + if (*(int *) optval < 1 || *(int *) optval > MAX_TCP_KEEPINTVL) + { + set_errno (EINVAL); + return -1; + } + if (set_keepalive (tcp_keepidle, tcp_keepcnt, *(int *) optval)) + return -1; + ignore = true; + tcp_keepintvl = *(int *) optval; + } + break; + + default: + break; + } + break; + + case IPPROTO_UDP: + /* Check for dgram socket early on, so we don't have to do this for + every option. Also, WinSock returns EINVAL. */ + if (type != SOCK_DGRAM) + { + set_errno (EOPNOTSUPP); + return -1; + } + if (optlen < (socklen_t) sizeof (int)) + { + set_errno (EINVAL); + return ret; + } + switch (optname) + { + case UDP_SEGMENT: + if (*(int *) optval < 0 || *(int *) optval > USHRT_MAX) + { + set_errno (EINVAL); + return -1; + } + break; + + case UDP_GRO: + /* In contrast to Windows' UDP_RECV_MAX_COALESCED_SIZE option, + Linux' UDP_GRO option is just a bool. The max. packet size + is dynamically evaluated from the MRU. There's no easy, + reliable way to get the MRU. We assume that this is what Windows + will do internally anyway and, given UDP_RECV_MAX_COALESCED_SIZE + defines a *maximum* size for aggregated packages, we just choose + the maximum sensible value. FIXME? IP_MTU_DISCOVER / IP_MTU */ + winsock_val = *(int *) optval ? USHRT_MAX : 0; + optval = &winsock_val; + break; + + default: + break; + } + break; + + default: + break; + } + + /* Call Winsock setsockopt (or not) */ + if (ignore) + ret = 0; + else + { + ret = ::setsockopt (get_socket (), level, optname, (const char *) optval, + optlen); + if (ret == SOCKET_ERROR) + { + set_winsock_errno (); + return ret; + } + } + + if (optlen == (socklen_t) sizeof (int)) + debug_printf ("setsockopt optval=%x", *(int *) optval); + + /* Postprocessing setsockopt, setting fhandler_socket members, etc. */ + switch (level) + { + case SOL_SOCKET: + switch (optname) + { + case SO_REUSEADDR: + saw_reuseaddr (*(int *) optval); + break; + + case SO_RCVBUF: + rmem (*(int *) optval); + break; + + case SO_SNDBUF: + wmem (*(int *) optval); + break; + + default: + break; + } + break; + + default: + break; + } + + return ret; +} + +int +fhandler_socket_inet::getsockopt (int level, int optname, const void *optval, + socklen_t *optlen) +{ + bool onebyte = false; + int ret = -1; + + /* Preprocessing getsockopt. */ + switch (level) + { + case SOL_SOCKET: + switch (optname) + { + case SO_PEERCRED: + set_errno (ENOPROTOOPT); + return -1; + + case SO_REUSEADDR: + { + unsigned int *reuseaddr = (unsigned int *) optval; + + if (*optlen < (socklen_t) sizeof *reuseaddr) + { + set_errno (EINVAL); + return -1; + } + *reuseaddr = saw_reuseaddr(); + *optlen = (socklen_t) sizeof *reuseaddr; + return 0; + } + + case SO_RCVTIMEO: + case SO_SNDTIMEO: + { + struct timeval *time_out = (struct timeval *) optval; + + if (*optlen < (socklen_t) sizeof *time_out) + { + set_errno (EINVAL); + return -1; + } + DWORD ms = (optname == SO_RCVTIMEO) ? rcvtimeo () : sndtimeo (); + if (ms == 0 || ms == INFINITE) + { + time_out->tv_sec = 0; + time_out->tv_usec = 0; + } + else + { + time_out->tv_sec = ms / MSPERSEC; + time_out->tv_usec = ((ms % MSPERSEC) * USPERSEC) / MSPERSEC; + } + *optlen = (socklen_t) sizeof *time_out; + return 0; + } + + case SO_TYPE: + { + unsigned int *type = (unsigned int *) optval; + *type = get_socket_type (); + *optlen = (socklen_t) sizeof *type; + return 0; + } + + case SO_OOBINLINE: + *(int *) optval = oobinline ? 1 : 0; + return 0; + + default: + break; + } + break; + + case IPPROTO_IP: + break; + + case IPPROTO_TCP: + /* Check for stream socket early on, so we don't have to do this for + every option. Also, WinSock returns EINVAL. */ + if (type != SOCK_STREAM) + { + set_errno (EOPNOTSUPP); + return -1; + } + + switch (optname) + { + case TCP_QUICKACK: + *(int *) optval = tcp_quickack ? 1 : 0; + *optlen = sizeof (int); + return 0; + + case TCP_MAXRT: + /* Don't let this option slip through from user space. */ + set_errno (EOPNOTSUPP); + return -1; + + case TCP_USER_TIMEOUT: + /* Older systems don't support TCP_MAXRTMS, just call TCP_MAXRT. */ + if (!wincap.has_tcp_maxrtms ()) + optname = TCP_MAXRT; + break; + + case TCP_FASTOPEN: + /* Fake FastOpen on older systems */ + if (!wincap.has_tcp_fastopen ()) + { + *(int *) optval = tcp_fastopen ? 1 : 0; + *optlen = sizeof (int); + return 0; + } + break; + + case TCP_KEEPIDLE: + /* Use stored value on older systems */ + if (!wincap.has_linux_tcp_keepalive_sockopts ()) + { + *(int *) optval = tcp_keepidle; + *optlen = sizeof (int); + return 0; + } + break; + + case TCP_KEEPCNT: + /* Use stored value on older systems */ + if (!wincap.has_linux_tcp_keepalive_sockopts ()) + { + *(int *) optval = tcp_keepcnt; + *optlen = sizeof (int); + return 0; + } + break; + + case TCP_KEEPINTVL: + /* Use stored value on older systems */ + if (!wincap.has_linux_tcp_keepalive_sockopts ()) + { + *(int *) optval = tcp_keepintvl; + *optlen = sizeof (int); + return 0; + } + break; + + default: + break; + } + break; + + case IPPROTO_UDP: + /* Check for dgram socket early on, so we don't have to do this for + every option. Also, WinSock returns EINVAL. */ + if (type != SOCK_DGRAM) + { + set_errno (EOPNOTSUPP); + return -1; + } + break; + + default: + break; + } + + /* Call Winsock getsockopt */ + ret = ::getsockopt (get_socket (), level, optname, (char *) optval, + (int *) optlen); + if (ret == SOCKET_ERROR) + { + set_winsock_errno (); + return ret; + } + + /* Postprocessing getsockopt, setting fhandler_socket members, etc. Set + onebyte true for options returning BOOLEAN instead of a boolean DWORD. */ + switch (level) + { + case SOL_SOCKET: + switch (optname) + { + case SO_ERROR: + { + int *e = (int *) optval; + debug_printf ("WinSock SO_ERROR = %d", *e); + *e = find_winsock_errno (*e); + } + break; + + case SO_KEEPALIVE: + case SO_DONTROUTE: + onebyte = true; + break; + + default: + break; + } + break; + + case IPPROTO_TCP: + switch (optname) + { + case TCP_NODELAY: + onebyte = true; + break; + + case TCP_MAXRT: /* After above conversion from TCP_USER_TIMEOUT */ + /* convert secs to msecs */ + *(unsigned int *) optval *= MSPERSEC; + break; + + case TCP_FASTOPEN: + onebyte = true; + break; + + default: + break; + } + break; + + case IPPROTO_UDP: + switch (optname) + { + case UDP_GRO: + /* Convert to bool option */ + *(unsigned int *) optval = *(unsigned int *) optval ? 1 : 0; + break; + + default: + break; + } + break; + + default: + break; + } + + if (onebyte) + { + /* Regression in 6.0 kernel and later: instead of a 4 byte BOOL value, a + 1 byte BOOLEAN value is returned, in contrast to older systems and + the documentation. Since an int type is expected by the calling + application, we convert the result here. */ + BOOLEAN *in = (BOOLEAN *) optval; + int *out = (int *) optval; + *out = *in; + *optlen = 4; + } + + return ret; +} + +int +fhandler_socket_wsock::ioctl (unsigned int cmd, void *p) +{ + int res; + + switch (cmd) + { + /* Here we handle only ioctl commands which are understood by Winsock. + However, we have a problem, which is, the different size of u_long + in Windows and 64 bit Cygwin. This affects the definitions of + FIOASYNC, etc, because they are defined in terms of sizeof(u_long). + So we have to use case labels which are independent of the sizeof + u_long. Since we're redefining u_long at the start of this file to + matching Winsock's idea of u_long, we can use the real definitions in + calls to Windows. In theory we also have to make sure to convert the + different ideas of u_long between the application and Winsock, but + fortunately, the parameters defined as u_long pointers are on Linux + and BSD systems defined as int pointer, so the applications will + use a type of the expected size. Hopefully. */ + case FIOASYNC: + case _IOW('f', 125, u_long): + res = WSAAsyncSelect (get_socket (), winmsg, WM_ASYNCIO, + *(int *) p ? ASYNC_MASK : 0); + syscall_printf ("Async I/O on socket %s", + *(int *) p ? "started" : "cancelled"); + async_io (*(int *) p != 0); + /* If async_io is switched off, revert the event handling. */ + if (*(int *) p == 0) + WSAEventSelect (get_socket (), wsock_evt, EVENT_MASK); + break; + case FIONREAD: + case _IOR('f', 127, u_long): + /* Make sure to use the Winsock definition of FIONREAD. */ + res = ::ioctlsocket (get_socket (), _IOR('f', 127, u_long), (u_long *) p); + if (res == SOCKET_ERROR) + set_winsock_errno (); + break; + case FIONBIO: + case SIOCATMARK: + /* Sockets are always non-blocking internally. So we just note the + state here. */ + /* Convert the different idea of u_long in the definition of cmd. */ + if (((cmd >> 16) & IOCPARM_MASK) == sizeof (unsigned long)) + cmd = (cmd & ~(IOCPARM_MASK << 16)) | (sizeof (u_long) << 16); + if (cmd == FIONBIO) + { + syscall_printf ("socket is now %sblocking", + *(int *) p ? "non" : ""); + set_nonblocking (*(int *) p); + res = 0; + } + else + res = ::ioctlsocket (get_socket (), cmd, (u_long *) p); + /* In winsock, the return value of SIOCATMARK is FALSE if + OOB data exists, TRUE otherwise. This is almost opposite + to expectation. */ + /* SIOCATMARK = _IOR('s',7,u_long) */ + if (cmd == _IOR('s',7,u_long) && !res) + *(u_long *)p = !*(u_long *)p; + break; + default: + res = fhandler_socket::ioctl (cmd, p); + break; + } + syscall_printf ("%d = ioctl_socket(%x, %p)", res, cmd, p); + return res; +} + +int +fhandler_socket_wsock::fcntl (int cmd, intptr_t arg) +{ + int res = 0; + + switch (cmd) + { + case F_SETOWN: + { + pid_t pid = (pid_t) arg; + LOCK_EVENTS; + wsock_events->owner = pid; + UNLOCK_EVENTS; + debug_printf ("owner set to %d", pid); + } + break; + case F_GETOWN: + res = wsock_events->owner; + break; + default: + res = fhandler_socket::fcntl (cmd, arg); + break; + } + return res; +} |