From a998dd705576606f85e3598e5a38ea8d3805f798 Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Mon, 24 Mar 2008 14:48:58 +0000 Subject: * fhandler.cc (fhandler_base::dup): Drop setting flags in the parent. Implement advisory file locking. * cygheap.h (struct init_cygheap): Add inode_list member. * cygwin.din (lockf): Export. * dcrt0.cc (child_info_spawn::handle_spawn): Call fixup_lockf_after_exec. * dtable.h (class dtable): Add fhandler_disk_file as friend class. * fhandler.cc (fhandler_base::close): Call del_my_locks if node is set. (fhandler_base::fhandler_base): Initialize node to NULL. (fhandler_base::fixup_after_fork): Ditto. * fhandler.h (class fhandler_base): Add member node. * fhandler_disk_file.cc (fhandler_disk_file::lock): Delete. * flock.cc: Implement all advisory file locking here. (fhandler_disk_file::lock): Implement here. (flock): Call fcntl with F_FLOCK bit set. Remove test main function. (lockf): New function. * fork.cc (frok::child): Call fixup_lockf_after_fork. * ntdll.h (DIRECTORY_ALL_ACCESS): Define. (struct _OBJECT_BASIC_INFORMATION): Define. (enum _EVENT_TYPE): Define. (NtCreateDirectoryObject): Declare. (NtCreateEvent): Declare. (NtCreateMutant): Declare. (NtOpenEvent): Declare. (NtOpenMutant): Declare. * include/cygwin/version.h: Bump API minor number. --- winsup/cygwin/flock.cc | 1368 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 1348 insertions(+), 20 deletions(-) (limited to 'winsup/cygwin/flock.cc') diff --git a/winsup/cygwin/flock.cc b/winsup/cygwin/flock.cc index b15962d94..1367060ea 100644 --- a/winsup/cygwin/flock.cc +++ b/winsup/cygwin/flock.cc @@ -1,13 +1,6 @@ -/* One of many ways to emulate flock() on top of real (good) POSIX locks. - * - * This flock() emulation is based upon source taken from the Red Hat - * implementation used in their imap-2002d SRPM. - * - * $RH: flock.c,v 1.2 2000/08/23 17:07:00 nalin Exp $ - */ -/* flock.c +/* flock.cc. NT specific implementation of advisory file locking. - Copyright 2003 Red Hat, Inc. + Copyright 2003, 2008 Red Hat, Inc. This file is part of Cygwin. @@ -15,11 +8,1314 @@ Cygwin license. Please consult the file "CYGWIN_LICENSE" for details. */ +/* The basic mechanism as well as the datastructures used in the below + implementation are taken from the FreeBSD repository on 2008-03-18. + The essential code of the lf_XXX functions has been taken from the + module src/sys/kern/kern_lockf.c. It has been adapted to use NT + global namespace subdirs and event objects for synchronization + purposes. + + So, the following copyright applies to most of the code in the lf_XXX + functions. + + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Scooter Morris at Genentech Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94 +*/ + +/* + * The flock() function is based upon source taken from the Red Hat + * implementation used in their imap-2002d SRPM. + * + * $RH: flock.c,v 1.2 2000/08/23 17:07:00 nalin Exp $ + */ + +/* The lockf function has been taken from FreeBSD with the following + * copyright. + * + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Klaus Klein. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * $NetBSD: lockf.c,v 1.1 1997/12/20 20:23:18 kleink Exp $ +*/ + #include "winsup.h" -#include "cygerrno.h" +#include #include #include #include +#include +#include "cygerrno.h" +#include "perprocess.h" +#include "security.h" +#include "cygwin/version.h" +#include "path.h" +#include "fhandler.h" +#include "dtable.h" +#include "cygheap.h" +#include "shared_info.h" +#include "pinfo.h" +#include "sigproc.h" +#include "cygtls.h" +#include "tls_pbuf.h" +#include "ntdll.h" +#include +#include + +/* Right now we implement flock(2) locks using the POSIX semantics + in terms of inheritance and removal of locks. + + TODO: How to implement real BSD flock semantics? + + From the Linux man page: + + Locks created by flock() are associated with an open file table + entry. This means that duplicate file descriptors (created by, + for example, fork(2) or dup(2)) refer to the same lock, and + this lock may be modified or released using any of these + descriptors. Furthermore, the lock is released either by an + explicit LOCK_UN operation on any of these duplicate + descriptors, or when all such descriptors have been closed. + + If a process uses open(2) (or similar) to obtain more than one + descrip- tor for the same file, these descriptors are treated + independently by flock(). An attempt to lock the file using + one of these file descriptors may be denied by a lock that the + calling process has already placed via another descriptor. */ + +#define F_WAIT 0x10 /* Wait until lock is granted */ +#define F_FLOCK 0x20 /* Use flock(2) semantics for lock */ +#define F_POSIX 0x40 /* Use POSIX semantics for lock */ + +#ifndef OFF_MAX +#define OFF_MAX LLONG_MAX +#endif + +static NO_COPY muto lockf_guard; + +#define INODE_LIST_LOCK() (lockf_guard.init ("lockf_guard")->acquire ()) +#define INODE_LIST_UNLOCK() (lockf_guard.release ()) + +#define LOCK_OBJ_NAME_LEN 56 + +/* This function takes the own process security descriptor DACL and adds + SYNCHRONIZE permissions for everyone. This allows to wait any process + to wait for this process to set the event object to signalled in case + the lock gets removed or replaced. */ +static void +allow_others_to_sync () +{ + static NO_COPY bool done; + + if (done) + return; + + NTSTATUS status; + PACL dacl; + LPVOID ace; + ULONG len; + + /* Get this process DACL. We use a temporary path buffer in TLS space + to avoid having to alloc 64K from the stack. */ + tmp_pathbuf tp; + PSECURITY_DESCRIPTOR sd = (PSECURITY_DESCRIPTOR) tp.w_get (); + status = NtQuerySecurityObject (hMainProc, DACL_SECURITY_INFORMATION, sd, + NT_MAX_PATH * sizeof (WCHAR), &len); + if (!NT_SUCCESS (status)) + { + debug_printf ("NtQuerySecurityObject: %p", status); + return; + } + /* Create a valid dacl pointer and set it's size to be as big as + there's room in the temporary buffer. Note that the descriptor + is in self-relative format. */ + dacl = (PACL) ((char *) sd + (uintptr_t) sd->Dacl); + dacl->AclSize = NT_MAX_PATH * sizeof (WCHAR) - ((char *) dacl - (char *) sd); + /* Allow everyone to SYNCHRONIZE with this process. */ + if (!AddAccessAllowedAce (dacl, ACL_REVISION, SYNCHRONIZE, + well_known_world_sid)) + { + debug_printf ("AddAccessAllowedAce: %lu", GetLastError ()); + return; + } + /* Set the size of the DACL correctly. */ + if (!FindFirstFreeAce (dacl, &ace)) + { + debug_printf ("FindFirstFreeAce: %lu", GetLastError ()); + return; + } + dacl->AclSize = (char *) ace - (char *) dacl; + /* Write the DACL back. */ + status = NtSetSecurityObject (hMainProc, DACL_SECURITY_INFORMATION, sd); + if (!NT_SUCCESS (status)) + { + debug_printf ("NtSetSecurityObject: %p", status); + return; + } + done = true; +} + +/* Helper function to create an event security descriptor which only allows + SYNCHRONIZE access to everyone. Only the creating process has all access + rights. */ +static PSECURITY_DESCRIPTOR +everyone_sync_sd () +{ + static PSECURITY_DESCRIPTOR psd; + + if (!psd) + { + const size_t acl_len = sizeof (ACL) + + sizeof (ACCESS_ALLOWED_ACE) + MAX_SID_LEN; + psd = (PSECURITY_DESCRIPTOR) + malloc (sizeof (SECURITY_DESCRIPTOR) + acl_len); + InitializeSecurityDescriptor (psd, SECURITY_DESCRIPTOR_REVISION); + PACL dacl = (PACL) (psd + 1); + InitializeAcl (dacl, acl_len, ACL_REVISION); + if (!AddAccessAllowedAce (dacl, ACL_REVISION, SYNCHRONIZE, + well_known_world_sid)) + { + debug_printf ("AddAccessAllowedAce: %lu", GetLastError ()); + return NULL; + } + LPVOID ace; + if (!FindFirstFreeAce (dacl, &ace)) + { + debug_printf ("FindFirstFreeAce: %lu", GetLastError ()); + return NULL; + } + dacl->AclSize = (char *) ace - (char *) dacl; + SetSecurityDescriptorDacl (psd, TRUE, dacl, FALSE); + } + return psd; +} + +/* This function returns a handle to the top-level directory in the global + NT namespace used to implement advisory locking. */ +static HANDLE +get_lock_parent_dir () +{ + static HANDLE dir; + UNICODE_STRING uname; + OBJECT_ATTRIBUTES attr; + NTSTATUS status; + + INODE_LIST_LOCK(); + if (!dir) + { + RtlInitUnicodeString (&uname, L"\\BaseNamedObjects\\cygwin-fcntl-lk"); + InitializeObjectAttributes (&attr, &uname, OBJ_INHERIT, NULL, + sec_all.lpSecurityDescriptor); + status = NtOpenDirectoryObject (&dir, DIRECTORY_ALL_ACCESS, &attr); + if (!NT_SUCCESS (status)) + { + status = NtCreateDirectoryObject (&dir, DIRECTORY_ALL_ACCESS, &attr); + if (!NT_SUCCESS (status)) + api_fatal ("NtCreateDirectoryObject: %p", status); + } + } + INODE_LIST_UNLOCK (); + return dir; +} + +/* Per lock class. */ +class lockf_t +{ + public: + short lf_flags; /* Semantics: F_POSIX, F_FLOCK, F_WAIT */ + short lf_type; /* Lock type: F_RDLCK, F_WRLCK */ + _off64_t lf_start; /* Byte # of the start of the lock */ + _off64_t lf_end; /* Byte # of the end of the lock (-1=EOF) */ + /* We need the Cygwin PID for F_GETLK, the Win PID for synchronization. */ + pid_t lf_id; /* (P)Id of the resource holding the lock */ + DWORD lf_wid; /* Win PID of the resource holding the lock */ + class lockf_t **lf_head; /* Back pointer to the head of the lockf_t list */ + class inode_t *lf_inode; /* Back pointer to the inode_t */ + class lockf_t *lf_next; /* Pointer to the next lock on this inode_t */ + HANDLE lf_obj; /* Handle to the lock event object. */ + + lockf_t () + : lf_flags (0), lf_type (0), lf_start (0), lf_end (0), lf_id (0), + lf_wid (0), lf_head (NULL), lf_inode (NULL), lf_next (NULL), lf_obj (NULL) + {} + lockf_t (class inode_t *node, class lockf_t **head, short flags, short type, + _off64_t start, _off64_t end, pid_t id, DWORD wid) + : lf_flags (flags), lf_type (type), lf_start (start), lf_end (end), + lf_id (id), lf_wid (wid), lf_head (head), lf_inode (node), + lf_next (NULL), lf_obj (NULL) + {} + ~lockf_t (); + + void *operator new (size_t size) + { return ccalloc (HEAP_FHANDLER, 1, sizeof (lockf_t)); } + void operator delete (void *p) + { cfree (p); } + + void create_lock_obj (); + HANDLE open_lock_obj () const; + ULONG get_lock_obj_handle_count () const; + void del_lock_obj (); +}; + +/* Per inode_t class */ +class inode_t +{ + friend class lockf_t; + + public: + LIST_ENTRY (inode_t) i_next; + lockf_t *i_lockf; /* List of locks of this process. */ + lockf_t *i_all_lf; /* Temp list of all locks for this file. */ + + dev_t i_dev; + ino_t i_ino; + private: + HANDLE i_dir; /* Not inherited! */ + HANDLE i_mtx; /* Not inherited! */ + + void del_locks (lockf_t **head); + + public: + inode_t (dev_t dev, ino_t ino); + ~inode_t (); + + void *operator new (size_t size) + { return ccalloc (HEAP_FHANDLER, 1, sizeof (inode_t)); } + void operator delete (void *p) + { cfree (p); } + + static inode_t *get (dev_t dev, ino_t ino); + + void LOCK () { WaitForSingleObject (i_mtx, INFINITE); } + void UNLOCK () { ReleaseMutex (i_mtx); } + + void get_all_locks_list (); + void del_all_locks_list () { del_locks (&i_all_lf); } + + void del_my_locks () { LOCK (); del_locks (&i_lockf); UNLOCK (); +} + +}; + +/* Used to delete all locks on a file hold by this process. Called from + close(2). This implements fcntl semantics. + TODO: flock(2) semantics. */ +void +del_my_locks (inode_t *node) +{ + INODE_LIST_LOCK (); + node->del_my_locks (); + LIST_REMOVE (node, i_next); + INODE_LIST_UNLOCK (); +} + +/* The global inode_t list header. inode_t structs are created when a lock + is requested on a file the first time from this process. */ +/* Called in a forked child to get rid of all inodes and locks hold by the + parent process. Child processes don't inherit locks. */ +void +fixup_lockf_after_fork () +{ + inode_t *node, *next_node; + + LIST_FOREACH_SAFE (node, &cygheap->inode_list, i_next, next_node) + delete node; + LIST_INIT (&cygheap->inode_list); +} + +/* Called in an execed child. The exec'ed process must allow SYNCHRONIZE + access to everyone if at least one inode exists. + The lock owner's Windows PID changed and all lock event objects have to + be relabeled so that waiting processes know which process to wait on. */ +void +fixup_lockf_after_exec () +{ + inode_t *node; + + INODE_LIST_LOCK (); + if (LIST_FIRST (&cygheap->inode_list)) + allow_others_to_sync (); + LIST_FOREACH (node, &cygheap->inode_list, i_next) + { + node->LOCK (); + for (lockf_t *lock = node->i_lockf; lock; lock = lock->lf_next) + { + lock->del_lock_obj (); + lock->lf_wid = myself->dwProcessId; + lock->create_lock_obj (); + } + node->UNLOCK (); + } + LIST_INIT (&cygheap->inode_list); + INODE_LIST_UNLOCK (); +} + +/* static method to return a pointer to the inode_t structure for a specific + file. The file is specified by the device and inode_t number. If inode_t + doesn't exist, create it. */ +inode_t * +inode_t::get (dev_t dev, ino_t ino) +{ + inode_t *node; + + INODE_LIST_LOCK (); + LIST_FOREACH (node, &cygheap->inode_list, i_next) + if (node->i_dev == dev && node->i_ino == ino) + break; + if (!node) + { + node = new inode_t (dev, ino); + LIST_INSERT_HEAD (&cygheap->inode_list, node, i_next); + } + INODE_LIST_UNLOCK (); + return node; +} + +inode_t::inode_t (dev_t dev, ino_t ino) +: i_lockf (NULL), i_all_lf (NULL), i_dev (dev), i_ino (ino) +{ + HANDLE parent_dir; + WCHAR name[32]; + UNICODE_STRING uname; + OBJECT_ATTRIBUTES attr; + NTSTATUS status; + + parent_dir = get_lock_parent_dir (); + /* Create a subdir which is named after the device and inode_t numbers + of the given file, in hex notation. */ + int len = __small_swprintf (name, L"%08x-%016X", dev, ino); + RtlInitCountedUnicodeString (&uname, name, len * sizeof (WCHAR)); + InitializeObjectAttributes (&attr, &uname, OBJ_INHERIT, parent_dir, + sec_all.lpSecurityDescriptor); + status = NtOpenDirectoryObject (&i_dir, DIRECTORY_ALL_ACCESS, &attr); + if (!NT_SUCCESS (status)) + { + status = NtCreateDirectoryObject (&i_dir, DIRECTORY_ALL_ACCESS, &attr); + if (!NT_SUCCESS (status)) + api_fatal ("NtCreateDirectoryObject: %p", status); + } + /* Create a mutex object in the file specific dir, which is used for + access synchronization on the dir and its objects. */ + RtlInitUnicodeString (&uname, L"mtx"); + InitializeObjectAttributes (&attr, &uname, OBJ_INHERIT, i_dir, + sec_all.lpSecurityDescriptor); + status = NtOpenMutant (&i_mtx, MUTANT_ALL_ACCESS, &attr); + if (!NT_SUCCESS (status)) + { + status = NtCreateMutant (&i_mtx, MUTANT_ALL_ACCESS, &attr, FALSE); + if (!NT_SUCCESS (status)) + api_fatal ("NtCreateMutant: %p", status); + } +} + +inode_t::~inode_t () +{ + del_locks (&i_lockf); +} + +void +inode_t::del_locks (lockf_t **head) +{ + lockf_t *lock, *n_lock; + for (lock = *head; lock && (n_lock = lock->lf_next, 1); lock = n_lock) + delete lock; + *head = NULL; +} + +/* Enumerate all lock event objects for this file and create a lockf_t + list in the i_all_lf member. This list is searched in lf_getblock + for locks which potentially block our lock request. */ +void +inode_t::get_all_locks_list () +{ + struct fdbi + { + DIRECTORY_BASIC_INFORMATION dbi; + WCHAR buf[2][NAME_MAX + 1]; + } f; + ULONG context; + NTSTATUS status; + + del_all_locks_list (); + for (BOOLEAN restart = TRUE; + NT_SUCCESS (status = NtQueryDirectoryObject (i_dir, &f, sizeof f, TRUE, + restart, &context, NULL)); + restart = FALSE) + { + if (f.dbi.ObjectName.Length != LOCK_OBJ_NAME_LEN * sizeof (WCHAR)) + continue; + wchar_t *wc = f.dbi.ObjectName.Buffer, *endptr; + /* "%02x-%01x-%016X-%016X-%08x-%08x", + lf_flags, lf_type, lf_start, lf_end, lf_id, lf_wid */ + wc[LOCK_OBJ_NAME_LEN] = L'\0'; + short flags = wcstol (wc, &endptr, 16); + if ((flags & ~(F_WAIT | F_FLOCK | F_POSIX)) != 0 + || (flags & (F_FLOCK | F_POSIX) == (F_FLOCK | F_POSIX))) + continue; + short type = wcstol (endptr + 1, &endptr, 16); + if (type != F_RDLCK && type != F_WRLCK || !endptr || *endptr != L'-') + continue; + _off64_t start = (_off64_t) wcstoull (endptr + 1, &endptr, 16); + if (start < 0 || !endptr || *endptr != L'-') + continue; + _off64_t end = (_off64_t) wcstoull (endptr + 1, &endptr, 16); + if (end < -1LL || (end > 0 && end < start) || !endptr || *endptr != L'-') + continue; + pid_t id = wcstoul (endptr + 1, &endptr, 16); + if (!endptr || *endptr != L'-') + continue; + DWORD wid = wcstoul (endptr + 1, &endptr, 16); + if (endptr && *endptr != L'\0') + continue; + lockf_t *lock = new lockf_t (this, &i_all_lf, flags, type, start, end, + id, wid); + if (i_all_lf) + lock->lf_next = i_all_lf; + i_all_lf = lock; + } +} + +/* Create the lock event object in the file's subdir in the NT global + namespace. The name is constructed from the lock properties which + identify it uniquely, all values in hex. See the __small_swprintf + call right at the start. */ +void +lockf_t::create_lock_obj () +{ + WCHAR name[LOCK_OBJ_NAME_LEN]; + UNICODE_STRING uname; + OBJECT_ATTRIBUTES attr; + NTSTATUS status; + + __small_swprintf (name, L"%02x-%01x-%016X-%016X-%08x-%08x", + lf_flags, lf_type, lf_start, lf_end, lf_id, lf_wid); + RtlInitCountedUnicodeString (&uname, name, + LOCK_OBJ_NAME_LEN * sizeof (WCHAR)); + InitializeObjectAttributes (&attr, &uname, OBJ_INHERIT, lf_inode->i_dir, + everyone_sync_sd ()); + status = NtCreateEvent (&lf_obj, EVENT_ALL_ACCESS, &attr, + NotificationEvent, FALSE); + if (!NT_SUCCESS (status)) + api_fatal ("NtCreateEvent: %p", status); +} + +/* Open a lock event object for SYNCHRONIZE access (to wait for it). */ +HANDLE +lockf_t::open_lock_obj () const +{ + WCHAR name[LOCK_OBJ_NAME_LEN]; + UNICODE_STRING uname; + OBJECT_ATTRIBUTES attr; + NTSTATUS status; + HANDLE obj; + + __small_swprintf (name, L"%02x-%01x-%016X-%016X-%08x-%08x", + lf_flags, lf_type, lf_start, lf_end, lf_id, lf_wid); + RtlInitCountedUnicodeString (&uname, name, + LOCK_OBJ_NAME_LEN * sizeof (WCHAR)); + InitializeObjectAttributes (&attr, &uname, OBJ_INHERIT, lf_inode->i_dir, + NULL); + status = NtOpenEvent (&obj, SYNCHRONIZE, &attr); + if (!NT_SUCCESS (status)) + api_fatal ("NtOpenEvent: %p", status); + return obj; +} + +/* Get the handle count of a lock object. */ +ULONG +lockf_t::get_lock_obj_handle_count () const +{ + OBJECT_BASIC_INFORMATION obi; + NTSTATUS status; + ULONG hdl_cnt = 0; + + if (lf_obj) + { + status = NtQueryObject (lf_obj, ObjectBasicInformation, + &obi, sizeof obi, NULL); + if (!NT_SUCCESS (status)) + small_printf ("NtQueryObject: %p\n", status); + else + hdl_cnt = obi.HandleCount; + } + return hdl_cnt; +} + +/* Close a lock event handle. The important thing here is to signal it + before closing the handle. This way all threads waiting for this + lock can wake up. */ +void +lockf_t::del_lock_obj () +{ + if (lf_obj) + { + SetEvent (lf_obj); + NtClose (lf_obj); + lf_obj = NULL; + } +} + +lockf_t::~lockf_t () +{ + del_lock_obj (); +} + +/* + * This variable controls the maximum number of processes that will + * be checked in doing deadlock detection. + */ +#if 0 /*TODO*/ +#define MAXDEPTH 50 +static int maxlockdepth = MAXDEPTH; +#endif + +#define NOLOCKF (struct lockf_t *)0 +#define SELF 0x1 +#define OTHERS 0x2 +static int lf_clearlock (lockf_t *, lockf_t **); +static int lf_findoverlap (lockf_t *, lockf_t *, int, lockf_t ***, lockf_t **); +static lockf_t *lf_getblock (lockf_t *, inode_t *node); +static int lf_getlock (lockf_t *, inode_t *, struct __flock64 *); +static int lf_setlock (lockf_t *, inode_t *, lockf_t **); +static void lf_split (lockf_t *, lockf_t *, lockf_t **); +static void lf_wakelock (lockf_t *); + +int +fhandler_disk_file::lock (int a_op, struct __flock64 *fl) +{ + _off64_t start, end, oadd; + lockf_t *n; + int error = 0; + struct __stat64 stat; + + short a_flags = fl->l_type & (F_POSIX | F_FLOCK); + short type = fl->l_type & (F_RDLCK | F_WRLCK | F_UNLCK); + + if (!a_flags) + a_flags = F_POSIX; /* default */ + if (a_op == F_SETLKW) + { + a_op = F_SETLK; + a_flags |= F_WAIT; + } + if (a_op == F_SETLK) + switch (type) + { + case F_UNLCK: + a_op = F_UNLCK; + break; + case F_RDLCK: + if (!(get_access () & GENERIC_READ)) + { + set_errno (EBADF); + return -1; + } + break; + case F_WRLCK: + if (!(get_access () & GENERIC_WRITE)) + { + set_errno (EBADF); + return -1; + } + break; + default: + set_errno (EINVAL); + return -1; + } + + if (fstat_by_handle (&stat) == -1) + return -1; + + /* + * Convert the flock structure into a start and end. + */ + switch (fl->l_whence) + { + case SEEK_SET: + start = fl->l_start; + break; + + case SEEK_CUR: + if ((start = lseek (0, SEEK_CUR)) == ILLEGAL_SEEK) + return -1; + break; + + case SEEK_END: + if (fl->l_start > 0 && stat.st_size > OFF_MAX - fl->l_start) + { + set_errno (EOVERFLOW); + return -1; + } + start = stat.st_size + fl->l_start; + break; + + default: + return (EINVAL); + } + if (start < 0) + { + set_errno (EINVAL); + return -1; + } + if (fl->l_len < 0) + { + if (start == 0) + { + set_errno (EINVAL); + return -1; + } + end = start - 1; + start += fl->l_len; + if (start < 0) + { + set_errno (EINVAL); + return -1; + } + } + else if (fl->l_len == 0) + end = -1; + else + { + oadd = fl->l_len - 1; + if (oadd > OFF_MAX - start) + { + set_errno (EOVERFLOW); + return -1; + } + end = start + oadd; + } + + if (!node) + { + node = inode_t::get (stat.st_dev, stat.st_ino); + need_fork_fixup (true); + } + /* Unlock the fd table which has been locked in fcntl_worker, otherwise + a F_SETLKW waits forever... */ + cygheap->fdtab.unlock (); + + lockf_t **head = &node->i_lockf; + + /* + * Avoid the common case of unlocking when inode_t has no locks. + */ + if (*head == NULL) + { + if (a_op != F_SETLK) + { + fl->l_type = F_UNLCK; + return 0; + } + } + /* + * Allocate a spare structure in case we have to split. + */ + lockf_t *clean = NULL; + if (a_op == F_SETLK || a_op == F_UNLCK) + clean = new lockf_t (); + /* + * Create the lockf_t structure + */ + lockf_t *lock = new lockf_t (node, head, a_flags, type, start, end, + getpid (), myself->dwProcessId); + + node->LOCK (); + switch (a_op) + { + case F_SETLK: + error = lf_setlock (lock, node, &clean); + break; + + case F_UNLCK: + error = lf_clearlock (lock, &clean); + lock->lf_next = clean; + clean = lock; + break; + + case F_GETLK: + error = lf_getlock (lock, node, fl); + lock->lf_next = clean; + clean = lock; + break; + + default: + lock->lf_next = clean; + clean = lock; + error = EINVAL; + break; + } + for (lock = clean; lock != NULL; ) + { + n = lock->lf_next; + delete lock; + lock = n; + } + node->UNLOCK (); + if (error) + { + set_errno (error); + return -1; + } + return 0; +} + +/* + * Set a byte-range lock. + */ +static int +lf_setlock (lockf_t *lock, inode_t *node, lockf_t **clean) +{ + lockf_t *block; + lockf_t **head = lock->lf_head; + lockf_t **prev, *overlap; + int ovcase, priority, old_prio, needtolink; + + /* + * Set the priority + */ + priority = old_prio = GetThreadPriority (GetCurrentThread ()); + if (lock->lf_type == F_WRLCK && priority <= THREAD_PRIORITY_ABOVE_NORMAL) + priority = THREAD_PRIORITY_HIGHEST; + /* + * Scan lock list for this file looking for locks that would block us. + */ + while ((block = lf_getblock(lock, node))) + { + /* + * Free the structure and return if nonblocking. + */ + if ((lock->lf_flags & F_WAIT) == 0) + { + node->del_all_locks_list (); + lock->lf_next = *clean; + *clean = lock; + return EAGAIN; + } + /* + * We are blocked. Since flock style locks cover + * the whole file, there is no chance for deadlock. + * For byte-range locks we must check for deadlock. + * + * Deadlock detection is done by looking through the + * wait channels to see if there are any cycles that + * involve us. MAXDEPTH is set just to make sure we + * do not go off into neverland. + */ + /* FIXME: We check the handle count of all the lock event objects + this process holds. If it's > 1, another process is + waiting for one of our locks. This method isn't overly + intelligent. If it turns out to be too dumb, we might + have to remove it or to find another method. */ + for (lockf_t *lk = node->i_lockf; lk; lk = lk->lf_next) + if (lk->get_lock_obj_handle_count () > 1) + return EDEADLK; + + /* + * For flock type locks, we must first remove + * any shared locks that we hold before we sleep + * waiting for an exclusive lock. + */ + if ((lock->lf_flags & F_FLOCK) && lock->lf_type == F_WRLCK) + { + lock->lf_type = F_UNLCK; + (void) lf_clearlock (lock, clean); + lock->lf_type = F_WRLCK; + } + + /* + * Add our lock to the blocked list and sleep until we're free. + * Remember who blocked us (for deadlock detection). + */ + /* TODO */ + + /* Wait for the blocking object and its holding process. */ + HANDLE obj = block->open_lock_obj (); + HANDLE proc = OpenProcess (SYNCHRONIZE, FALSE, block->lf_wid); + if (!proc) + api_fatal ("OpenProcess: %E"); + HANDLE w4[3] = { obj, proc, signal_arrived }; + node->del_all_locks_list (); + //SetThreadPriority (GetCurrentThread (), priority); + node->UNLOCK (); + DWORD ret = WaitForMultipleObjects (3, w4, FALSE, INFINITE); + NtClose (proc); + NtClose (obj); + node->LOCK (); + //SetThreadPriority (GetCurrentThread (), old_prio); + switch (ret) + { + case WAIT_OBJECT_0: + case WAIT_OBJECT_0 + 1: + /* The lock object has been set to signalled or the process + holding the lock has exited. */ + break; + case WAIT_OBJECT_0 + 2: + /* A signal came in. */ + _my_tls.call_signal_handler (); + return EINTR; + default: + return geterrno_from_win_error (); + } + } + node->del_all_locks_list (); + allow_others_to_sync (); + /* + * No blocks!! Add the lock. Note that we will + * downgrade or upgrade any overlapping locks this + * process already owns. + * + * Handle any locks that overlap. + */ + prev = head; + block = *head; + needtolink = 1; + for (;;) + { + ovcase = lf_findoverlap (block, lock, SELF, &prev, &overlap); + if (ovcase) + block = overlap->lf_next; + /* + * Six cases: + * 0) no overlap + * 1) overlap == lock + * 2) overlap contains lock + * 3) lock contains overlap + * 4) overlap starts before lock + * 5) overlap ends after lock + */ + switch (ovcase) + { + case 0: /* no overlap */ + if (needtolink) + { + *prev = lock; + lock->lf_next = overlap; + lock->create_lock_obj (); + } + break; + + case 1: /* overlap == lock */ + /* + * If downgrading lock, others may be + * able to acquire it. + * Cygwin: Always wake lock. + */ + lf_wakelock (overlap); + overlap->lf_type = lock->lf_type; + overlap->create_lock_obj (); + lock->lf_next = *clean; + *clean = lock; + break; + + case 2: /* overlap contains lock */ + /* + * Check for common starting point and different types. + */ + if (overlap->lf_type == lock->lf_type) + { + lock->lf_next = *clean; + *clean = lock; + break; + } + if (overlap->lf_start == lock->lf_start) + { + *prev = lock; + lock->lf_next = overlap; + overlap->lf_start = lock->lf_end + 1; + } + else + lf_split (overlap, lock, clean); + lf_wakelock (overlap); + overlap->create_lock_obj (); + lock->create_lock_obj (); + if (lock->lf_next && !lock->lf_next->lf_obj) + lock->lf_next->create_lock_obj (); + break; + + case 3: /* lock contains overlap */ + /* + * If downgrading lock, others may be able to + * acquire it, otherwise take the list. + * Cygwin: Always wake old lock and create new lock. + */ + lf_wakelock (overlap); + /* + * Add the new lock if necessary and delete the overlap. + */ + if (needtolink) + { + *prev = lock; + lock->lf_next = overlap->lf_next; + prev = &lock->lf_next; + lock->create_lock_obj (); + needtolink = 0; + } + else + *prev = overlap->lf_next; + overlap->lf_next = *clean; + *clean = overlap; + continue; + + case 4: /* overlap starts before lock */ + /* + * Add lock after overlap on the list. + */ + lock->lf_next = overlap->lf_next; + overlap->lf_next = lock; + overlap->lf_end = lock->lf_start - 1; + prev = &lock->lf_next; + lf_wakelock (overlap); + overlap->create_lock_obj (); + lock->create_lock_obj (); + needtolink = 0; + continue; + + case 5: /* overlap ends after lock */ + /* + * Add the new lock before overlap. + */ + if (needtolink) { + *prev = lock; + lock->lf_next = overlap; + } + overlap->lf_start = lock->lf_end + 1; + lf_wakelock (overlap); + lock->create_lock_obj (); + overlap->create_lock_obj (); + break; + } + break; + } + return 0; +} + +/* + * Remove a byte-range lock on an inode_t. + * + * Generally, find the lock (or an overlap to that lock) + * and remove it (or shrink it), then wakeup anyone we can. + */ +static int +lf_clearlock (lockf_t *unlock, lockf_t **clean) +{ + lockf_t **head = unlock->lf_head; + lockf_t *lf = *head; + lockf_t *overlap, **prev; + int ovcase; + + if (lf == NOLOCKF) + return 0; + prev = head; + while ((ovcase = lf_findoverlap (lf, unlock, SELF, &prev, &overlap))) + { + /* + * Wakeup the list of locks to be retried. + */ + lf_wakelock (overlap); + + switch (ovcase) + { + case 1: /* overlap == lock */ + *prev = overlap->lf_next; + overlap->lf_next = *clean; + *clean = overlap; + break; + + case 2: /* overlap contains lock: split it */ + if (overlap->lf_start == unlock->lf_start) + { + overlap->lf_start = unlock->lf_end + 1; + overlap->create_lock_obj (); + break; + } + lf_split (overlap, unlock, clean); + overlap->lf_next = unlock->lf_next; + overlap->create_lock_obj (); + if (overlap->lf_next && !overlap->lf_next->lf_obj) + overlap->lf_next->create_lock_obj (); + break; + + case 3: /* lock contains overlap */ + *prev = overlap->lf_next; + lf = overlap->lf_next; + overlap->lf_next = *clean; + *clean = overlap; + continue; + + case 4: /* overlap starts before lock */ + overlap->lf_end = unlock->lf_start - 1; + prev = &overlap->lf_next; + lf = overlap->lf_next; + overlap->create_lock_obj (); + continue; + + case 5: /* overlap ends after lock */ + overlap->lf_start = unlock->lf_end + 1; + overlap->create_lock_obj (); + break; + } + break; + } + return 0; +} + +/* + * Check whether there is a blocking lock, + * and if so return its process identifier. + */ +static int +lf_getlock (lockf_t *lock, inode_t *node, struct __flock64 *fl) +{ + lockf_t *block; + + if ((block = lf_getblock (lock, node))) + { + fl->l_type = block->lf_type; + fl->l_whence = SEEK_SET; + fl->l_start = block->lf_start; + if (block->lf_end == -1) + fl->l_len = 0; + else + fl->l_len = block->lf_end - block->lf_start + 1; + if (block->lf_flags & F_POSIX) + fl->l_pid = block->lf_id; + else + fl->l_pid = -1; + } + else + fl->l_type = F_UNLCK; + node->del_all_locks_list (); + return 0; +} + +/* + * Walk the list of locks for an inode_t and + * return the first blocking lock. + */ +static lockf_t * +lf_getblock (lockf_t *lock, inode_t *node) +{ + node->get_all_locks_list (); + lockf_t **prev, *overlap, *lf = node->i_all_lf; + int ovcase; + + prev = lock->lf_head; + while ((ovcase = lf_findoverlap (lf, lock, OTHERS, &prev, &overlap))) + { + /* + * We've found an overlap, see if it blocks us + */ + if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK)) + return overlap; + /* + * Nope, point to the next one on the list and + * see if it blocks us + */ + lf = overlap->lf_next; + } + return NOLOCKF; +} + +/* + * Walk the list of locks for an inode_t to + * find an overlapping lock (if any). + * + * NOTE: this returns only the FIRST overlapping lock. There + * may be more than one. + */ +static int +lf_findoverlap (lockf_t *lf, lockf_t *lock, int type, lockf_t ***prev, + lockf_t **overlap) +{ + _off64_t start, end; + + *overlap = lf; + if (lf == NOLOCKF) + return 0; + + start = lock->lf_start; + end = lock->lf_end; + while (lf != NOLOCKF) + { + if ((type & OTHERS) && lf->lf_id == lock->lf_id) + { + *prev = &lf->lf_next; + *overlap = lf = lf->lf_next; + continue; + } + /* + * OK, check for overlap + * + * Six cases: + * 0) no overlap + * 1) overlap == lock + * 2) overlap contains lock + * 3) lock contains overlap + * 4) overlap starts before lock + * 5) overlap ends after lock + */ + if ((lf->lf_end != -1 && start > lf->lf_end) || + (end != -1 && lf->lf_start > end)) + { + /* Case 0 */ + if ((type & SELF) && end != -1 && lf->lf_start > end) + return 0; + *prev = &lf->lf_next; + *overlap = lf = lf->lf_next; + continue; + } + if ((lf->lf_start == start) && (lf->lf_end == end)) + { + /* Case 1 */ + return 1; + } + if ((lf->lf_start <= start) && (end != -1) && + ((lf->lf_end >= end) || (lf->lf_end == -1))) + { + /* Case 2 */ + return 2; + } + if (start <= lf->lf_start && (end == -1 || + (lf->lf_end != -1 && end >= lf->lf_end))) + { + /* Case 3 */ + return 3; + } + if ((lf->lf_start < start) && + ((lf->lf_end >= start) || (lf->lf_end == -1))) + { + /* Case 4 */ + return 4; + } + if ((lf->lf_start > start) && (end != -1) && + ((lf->lf_end > end) || (lf->lf_end == -1))) + { + /* Case 5 */ + return 5; + } + api_fatal ("lf_findoverlap: default\n"); + } + return 0; +} + +/* + * Split a lock and a contained region into + * two or three locks as necessary. + */ +static void +lf_split (lockf_t *lock1, lockf_t *lock2, lockf_t **split) +{ + lockf_t *splitlock; + + /* + * Check to see if spliting into only two pieces. + */ + if (lock1->lf_start == lock2->lf_start) + { + lock1->lf_start = lock2->lf_end + 1; + lock2->lf_next = lock1; + return; + } + if (lock1->lf_end == lock2->lf_end) + { + lock1->lf_end = lock2->lf_start - 1; + lock2->lf_next = lock1->lf_next; + lock1->lf_next = lock2; + return; + } + /* + * Make a new lock consisting of the last part of + * the encompassing lock. We use the preallocated + * splitlock so we don't have to block. + */ + splitlock = *split; + assert (splitlock != NULL); + *split = splitlock->lf_next; + memcpy (splitlock, lock1, sizeof *splitlock); + /* We have to unset the obj HANDLE here which has been copied by the + above memcpy, so that the calling function recognizes the new object. + See post-lf_split handling in lf_setlock and lf_clearlock. */ + splitlock->lf_obj = NULL; + splitlock->lf_start = lock2->lf_end + 1; + lock1->lf_end = lock2->lf_start - 1; + /* + * OK, now link it in + */ + splitlock->lf_next = lock1->lf_next; + lock2->lf_next = splitlock; + lock1->lf_next = lock2; +} + +/* + * Wakeup a blocklist + * Cygwin: Just signal the lock which gets removed. This unblocks + * all threads waiting for this lock. + */ +static void +lf_wakelock (lockf_t *listhead) +{ + listhead->del_lock_obj (); +} int flock (int fd, int operation) @@ -38,7 +1334,7 @@ flock (int fd, int operation) switch (operation & (~LOCK_NB)) { case LOCK_EX: - l.l_type = F_WRLCK; + l.l_type = F_WRLCK | F_FLOCK; i = fcntl_worker (fd, cmd, &l); if (i == -1) { @@ -49,7 +1345,7 @@ flock (int fd, int operation) } break; case LOCK_SH: - l.l_type = F_RDLCK; + l.l_type = F_RDLCK | F_FLOCK; i = fcntl_worker (fd, cmd, &l); if (i == -1) { @@ -60,7 +1356,7 @@ flock (int fd, int operation) } break; case LOCK_UN: - l.l_type = F_UNLCK; + l.l_type = F_UNLCK | F_FLOCK; i = fcntl_worker (fd, cmd, &l); if (i == -1) { @@ -78,12 +1374,44 @@ flock (int fd, int operation) return i; } -#ifdef FLOCK_EMULATE_IS_MAIN -int -main (int argc, char **argv) +extern "C" int +lockf (int filedes, int function, _off64_t size) { - int fd = open (argv[1], O_WRONLY); - flock (fd, LOCK_EX); - return 0; + struct flock fl; + int cmd; + + fl.l_start = 0; + fl.l_len = size; + fl.l_whence = SEEK_CUR; + + switch (function) + { + case F_ULOCK: + cmd = F_SETLK; + fl.l_type = F_UNLCK; + break; + case F_LOCK: + cmd = F_SETLKW; + fl.l_type = F_WRLCK; + break; + case F_TLOCK: + cmd = F_SETLK; + fl.l_type = F_WRLCK; + break; + case F_TEST: + fl.l_type = F_WRLCK; + if (fcntl_worker (filedes, F_GETLK, &fl) == -1) + return -1; + if (fl.l_type == F_UNLCK || fl.l_pid == getpid ()) + return 0; + errno = EAGAIN; + return -1; + /* NOTREACHED */ + default: + errno = EINVAL; + return -1; + /* NOTREACHED */ + } + + return fcntl_worker (filedes, cmd, &fl); } -#endif -- cgit v1.2.3