Update copyright dates with scripts/update-copyrights.

[thirdparty/glibc.git] / nscd / nscd_helper.c
diff --git a/nscd/nscd_helper.c b/nscd/nscd_helper.c

index fd749446be3a1f7bfc5ba60c92f40e2f75fee88b..22905d0b839671cc42d606230921a70827b356bc 100644 (file)
--- a/nscd/nscd_helper.c
+++ b/nscd/nscd_helper.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1998-2002,2003,2004,2005,2006 Free Software Foundation, Inc.
+/* Copyright (C) 1998-2017 Free Software Foundation, Inc.
     This file is part of the GNU C Library.
     Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
  
@@ -13,16 +13,19 @@
     Lesser General Public License for more details.
  
     You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
  
  #include <assert.h>
  #include <errno.h>
  #include <fcntl.h>
  #include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
  #include <time.h>
  #include <unistd.h>
+#include <stdint.h>
  #include <sys/mman.h>
  #include <sys/poll.h>
  #include <sys/socket.h>
@@ -32,10 +35,50 @@
  #include <sys/un.h>
  #include <not-cancel.h>
  #include <nis/rpcsvc/nis.h>
+#include <kernel-features.h>
  
  #include "nscd-client.h"
  
  
+/* Extra time we wait if the socket is still receiving data.  This
+   value is in milliseconds.  Note that the other side is nscd on the
+   local machine and it is already transmitting data.  So the wait
+   time need not be long.  */
+#define EXTRA_RECEIVE_TIME 200
+
+
+static int
+wait_on_socket (int sock, long int usectmo)
+{
+  struct pollfd fds[1];
+  fds[0].fd = sock;
+  fds[0].events = POLLIN | POLLERR | POLLHUP;
+  int n = __poll (fds, 1, usectmo);
+  if (n == -1 && __builtin_expect (errno == EINTR, 0))
+    {
+      /* Handle the case where the poll() call is interrupted by a
+        signal.  We cannot just use TEMP_FAILURE_RETRY since it might
+        lead to infinite loops.  */
+      struct timeval now;
+      (void) __gettimeofday (&now, NULL);
+      long int end = now.tv_sec * 1000 + usectmo + (now.tv_usec + 500) / 1000;
+      long int timeout = usectmo;
+      while (1)
+       {
+         n = __poll (fds, 1, timeout);
+         if (n != -1 || errno != EINTR)
+           break;
+
+         /* Recompute the timeout time.  */
+         (void) __gettimeofday (&now, NULL);
+         timeout = end - (now.tv_sec * 1000 + (now.tv_usec + 500) / 1000);
+       }
+    }
+
+  return n;
+}
+
+
  ssize_t
  __readall (int fd, void *buf, size_t len)
  {
@@ -43,9 +86,17 @@ __readall (int fd, void *buf, size_t len)
    ssize_t ret;
    do
      {
+    again:
        ret = TEMP_FAILURE_RETRY (__read (fd, buf, n));
        if (ret <= 0)
-       break;
+       {
+         if (__builtin_expect (ret < 0 && errno == EAGAIN, 0)
+             /* The socket is still receiving data.  Wait a bit more.  */
+             && wait_on_socket (fd, EXTRA_RECEIVE_TIME) > 0)
+           goto again;
+
+         break;
+       }
        buf = (char *) buf + ret;
        n -= ret;
      }
@@ -59,7 +110,15 @@ __readvall (int fd, const struct iovec *iov, int iovcnt)
  {
    ssize_t ret = TEMP_FAILURE_RETRY (__readv (fd, iov, iovcnt));
    if (ret <= 0)
-    return ret;
+    {
+      if (__glibc_likely (ret == 0 || errno != EAGAIN))
+       /* A genuine error or no data to read.  */
+       return ret;
+
+      /* The data has not all yet been received.  Do as if we have not
+        read anything yet.  */
+      ret = 0;
+    }
  
    size_t total = 0;
    for (int i = 0; i < iovcnt; ++i)
@@ -81,9 +140,17 @@ __readvall (int fd, const struct iovec *iov, int iovcnt)
             }
           iovp->iov_base = (char *) iovp->iov_base + r;
           iovp->iov_len -= r;
+       again:
           r = TEMP_FAILURE_RETRY (__readv (fd, iovp, iovcnt));
           if (r <= 0)
-           break;
+           {
+             if (__builtin_expect (r < 0 && errno == EAGAIN, 0)
+                 /* The socket is still receiving data.  Wait a bit more.  */
+                 && wait_on_socket (fd, EXTRA_RECEIVE_TIME) > 0)
+               goto again;
+
+             break;
+           }
           ret += r;
         }
        while (ret < total);
@@ -95,16 +162,20 @@ __readvall (int fd, const struct iovec *iov, int iovcnt)
  
  
  static int
-open_socket (void)
+open_socket (request_type type, const char *key, size_t keylen)
  {
-  int sock = __socket (PF_UNIX, SOCK_STREAM, 0);
+  int sock;
+
+  sock = __socket (PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
    if (sock < 0)
      return -1;
  
-  /* Make socket non-blocking.  */
-  int fl = __fcntl (sock, F_GETFL);
-  if (fl != -1)
-    __fcntl (sock, F_SETFL, fl | O_NONBLOCK);
+  size_t real_sizeof_reqdata = sizeof (request_header) + keylen;
+  struct
+  {
+    request_header req;
+    char key[];
+  } *reqdata = alloca (real_sizeof_reqdata);
  
    struct sockaddr_un sun;
    sun.sun_family = AF_UNIX;
@@ -113,13 +184,56 @@ open_socket (void)
        && errno != EINPROGRESS)
      goto out;
  
-  struct pollfd fds[1];
-  fds[0].fd = sock;
-  fds[0].events = POLLOUT | POLLERR | POLLHUP;
-  if (__poll (fds, 1, 5 * 1000) > 0)
-    /* Success.  We do not check for success of the connect call here.
-       If it failed, the following operations will fail.  */
-    return sock;
+  reqdata->req.version = NSCD_VERSION;
+  reqdata->req.type = type;
+  reqdata->req.key_len = keylen;
+
+  memcpy (reqdata->key, key, keylen);
+
+  bool first_try = true;
+  struct timeval tvend;
+  /* Fake initializing tvend.  */
+  asm ("" : "=m" (tvend));
+  while (1)
+    {
+#ifndef MSG_NOSIGNAL
+# define MSG_NOSIGNAL 0
+#endif
+      ssize_t wres = TEMP_FAILURE_RETRY (__send (sock, reqdata,
+                                                real_sizeof_reqdata,
+                                                MSG_NOSIGNAL));
+      if (__glibc_likely (wres == (ssize_t) real_sizeof_reqdata))
+       /* We managed to send the request.  */
+       return sock;
+
+      if (wres != -1 || errno != EAGAIN)
+       /* Something is really wrong, no chance to continue.  */
+       break;
+
+      /* The daemon is busy wait for it.  */
+      int to;
+      struct timeval now;
+      (void) __gettimeofday (&now, NULL);
+      if (first_try)
+       {
+         tvend.tv_usec = now.tv_usec;
+         tvend.tv_sec = now.tv_sec + 5;
+         to = 5 * 1000;
+         first_try = false;
+       }
+      else
+       to = ((tvend.tv_sec - now.tv_sec) * 1000
+             + (tvend.tv_usec - now.tv_usec) / 1000);
+
+      struct pollfd fds[1];
+      fds[0].fd = sock;
+      fds[0].events = POLLOUT | POLLERR | POLLHUP;
+      if (__poll (fds, 1, to) <= 0)
+       /* The connection timed out or broke down.  */
+       break;
+
+      /* We try to write again.  */
+    }
  
   out:
    close_not_cancel_no_status (sock);
@@ -137,41 +251,11 @@ __nscd_unmap (struct mapped_database *mapped)
  }
  
  
-static int
-wait_on_socket (int sock)
-{
-  struct pollfd fds[1];
-  fds[0].fd = sock;
-  fds[0].events = POLLIN | POLLERR | POLLHUP;
-  int n = __poll (fds, 1, 5 * 1000);
-  if (n == -1 && __builtin_expect (errno == EINTR, 0))
-    {
-      /* Handle the case where the poll() call is interrupted by a
-        signal.  We cannot just use TEMP_FAILURE_RETRY since it might
-        lead to infinite loops.  */
-      struct timeval now;
-      (void) __gettimeofday (&now, NULL);
-      long int end = (now.tv_sec + 5) * 1000 + (now.tv_usec + 500) / 1000;
-      while (1)
-       {
-         long int timeout = end - (now.tv_sec * 1000
-                                   + (now.tv_usec + 500) / 1000);
-         n = __poll (fds, 1, timeout);
-         if (n != -1 || errno != EINTR)
-           break;
-         (void) __gettimeofday (&now, NULL);
-       }
-    }
-
-  return n;
-}
-
-
  /* Try to get a file descriptor for the shared meory segment
     containing the database.  */
-static struct mapped_database *
-get_mapping (request_type type, const char *key,
-            struct mapped_database **mappedp)
+struct mapped_database *
+__nscd_get_mapping (request_type type, const char *key,
+                   struct mapped_database **mappedp)
  {
    struct mapped_database *result = NO_MAPPING;
  #ifdef SCM_RIGHTS
@@ -179,46 +263,28 @@ get_mapping (request_type type, const char *key,
    int saved_errno = errno;
  
    int mapfd = -1;
+  char resdata[keylen];
  
-  /* Send the request.  */
-  struct
-  {
-    request_header req;
-    char key[keylen];
-  } reqdata;
-
-  int sock = open_socket ();
+  /* Open a socket and send the request.  */
+  int sock = open_socket (type, key, keylen);
    if (sock < 0)
      goto out;
  
-  reqdata.req.version = NSCD_VERSION;
-  reqdata.req.type = type;
-  reqdata.req.key_len = keylen;
-  memcpy (reqdata.key, key, keylen);
-
-# ifndef MSG_NOSIGNAL
-#  define MSG_NOSIGNAL 0
-# endif
-  if (__builtin_expect (TEMP_FAILURE_RETRY (__send (sock, &reqdata,
-                                                   sizeof (reqdata),
-                                                   MSG_NOSIGNAL))
-                       != sizeof (reqdata), 0))
-    /* We cannot even write the request.  */
-    goto out_close2;
-
    /* Room for the data sent along with the file descriptor.  We expect
       the key name back.  */
-# define resdata reqdata.key
-  struct iovec iov[1];
+  uint64_t mapsize;
+  struct iovec iov[2];
    iov[0].iov_base = resdata;
    iov[0].iov_len = keylen;
+  iov[1].iov_base = &mapsize;
+  iov[1].iov_len = sizeof (mapsize);
  
    union
    {
      struct cmsghdr hdr;
      char bytes[CMSG_SPACE (sizeof (int))];
    } buf;
-  struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1,
+  struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
                         .msg_control = buf.bytes,
                         .msg_controllen = sizeof (buf) };
    struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
@@ -229,67 +295,84 @@ get_mapping (request_type type, const char *key,
  
    /* This access is well-aligned since BUF is correctly aligned for an
       int and CMSG_DATA preserves this alignment.  */
-  *(int *) CMSG_DATA (cmsg) = -1;
+  memset (CMSG_DATA (cmsg), '\xff', sizeof (int));
  
    msg.msg_controllen = cmsg->cmsg_len;
  
-  if (wait_on_socket (sock) <= 0)
+  if (wait_on_socket (sock, 5 * 1000) <= 0)
      goto out_close2;
  
-  if (__builtin_expect (TEMP_FAILURE_RETRY (__recvmsg (sock, &msg, 0))
-                       != keylen, 0))
-    goto out_close2;
-
-  mapfd = *(int *) CMSG_DATA (cmsg);
+# ifndef MSG_CMSG_CLOEXEC
+#  define MSG_CMSG_CLOEXEC 0
+# endif
+  ssize_t n = TEMP_FAILURE_RETRY (__recvmsg (sock, &msg, MSG_CMSG_CLOEXEC));
  
-  if (__builtin_expect (CMSG_FIRSTHDR (&msg)->cmsg_len
-                       != CMSG_LEN (sizeof (int)), 0))
-    goto out_close;
+  if (__builtin_expect (CMSG_FIRSTHDR (&msg) == NULL
+                       || (CMSG_FIRSTHDR (&msg)->cmsg_len
+                           != CMSG_LEN (sizeof (int))), 0))
+    goto out_close2;
  
-  struct stat64 st;
-  if (__builtin_expect (strcmp (resdata, key) != 0, 0)
-      || __builtin_expect (fstat64 (mapfd, &st) != 0, 0)
-      || __builtin_expect (st.st_size < sizeof (struct database_pers_head), 0))
-    goto out_close;
+  int *ip = (void *) CMSG_DATA (cmsg);
+  mapfd = *ip;
  
-  struct database_pers_head head;
-  if (__builtin_expect (TEMP_FAILURE_RETRY (__pread (mapfd, &head,
-                                                    sizeof (head), 0))
-                       != sizeof (head), 0))
+  if (__glibc_unlikely (n != keylen && n != keylen + sizeof (mapsize)))
      goto out_close;
  
-  if (__builtin_expect (head.version != DB_VERSION, 0)
-      || __builtin_expect (head.header_size != sizeof (head), 0)
-      /* This really should not happen but who knows, maybe the update
-        thread got stuck.  */
-      || __builtin_expect (! head.nscd_certainly_running
-                          && head.timestamp + MAPPING_TIMEOUT < time (NULL),
-                          0))
+  if (__glibc_unlikely (strcmp (resdata, key) != 0))
      goto out_close;
  
-  size_t size = (sizeof (head) + roundup (head.module * sizeof (ref_t), ALIGN)
-                + head.data_size);
+  if (__glibc_unlikely (n == keylen))
+    {
+      struct stat64 st;
+      if (__builtin_expect (fstat64 (mapfd, &st) != 0, 0)
+         || __builtin_expect (st.st_size < sizeof (struct database_pers_head),
+                              0))
+       goto out_close;
  
-  if (__builtin_expect (st.st_size < size, 0))
-    goto out_close;
+      mapsize = st.st_size;
+    }
  
    /* The file is large enough, map it now.  */
-  void *mapping = __mmap (NULL, size, PROT_READ, MAP_SHARED, mapfd, 0);
-  if (__builtin_expect (mapping != MAP_FAILED, 1))
+  void *mapping = __mmap (NULL, mapsize, PROT_READ, MAP_SHARED, mapfd, 0);
+  if (__glibc_likely (mapping != MAP_FAILED))
      {
-      /* Allocate a record for the mapping.  */
-      struct mapped_database *newp = malloc (sizeof (*newp));
-      if (newp == NULL)
+      /* Check whether the database is correct and up-to-date.  */
+      struct database_pers_head *head = mapping;
+
+      if (__builtin_expect (head->version != DB_VERSION, 0)
+         || __builtin_expect (head->header_size != sizeof (*head), 0)
+         /* Catch some misconfiguration.  The server should catch
+            them now but some older versions did not.  */
+         || __builtin_expect (head->module == 0, 0)
+         /* This really should not happen but who knows, maybe the update
+            thread got stuck.  */
+         || __builtin_expect (! head->nscd_certainly_running
+                              && (head->timestamp + MAPPING_TIMEOUT
+                                  < time (NULL)), 0))
         {
-         /* Ugh, after all we went through the memory allocation failed.  */
-         __munmap (mapping, size);
+       out_unmap:
+         __munmap (mapping, mapsize);
           goto out_close;
         }
  
+      size_t size = (sizeof (*head) + roundup (head->module * sizeof (ref_t),
+                                              ALIGN)
+                    + head->data_size);
+
+      if (__glibc_unlikely (mapsize < size))
+       goto out_unmap;
+
+      /* Allocate a record for the mapping.  */
+      struct mapped_database *newp = malloc (sizeof (*newp));
+      if (newp == NULL)
+       /* Ugh, after all we went through the memory allocation failed.  */
+       goto out_unmap;
+
        newp->head = mapping;
-      newp->data = ((char *) mapping + head.header_size
-                   + roundup (head.module * sizeof (ref_t), ALIGN));
+      newp->data = ((char *) mapping + head->header_size
+                   + roundup (head->module * sizeof (ref_t), ALIGN));
        newp->mapsize = size;
+      newp->datasize = head->data_size;
        /* Set counter to 1 to show it is usable.  */
        newp->counter = 1;
  
@@ -313,36 +396,30 @@ get_mapping (request_type type, const char *key,
    return result;
  }
  
-
  struct mapped_database *
  __nscd_get_map_ref (request_type type, const char *name,
-                   struct locked_map_ptr *mapptr, int *gc_cyclep)
+                   volatile struct locked_map_ptr *mapptr, int *gc_cyclep)
  {
    struct mapped_database *cur = mapptr->mapped;
    if (cur == NO_MAPPING)
      return cur;
  
-  int cnt = 0;
-  while (atomic_compare_and_exchange_val_acq (&mapptr->lock, 1, 0) != 0)
-    {
-      // XXX Best number of rounds?
-      if (++cnt > 5)
-       return NO_MAPPING;
-
-      atomic_delay ();
-    }
+  if (!__nscd_acquire_maplock (mapptr))
+    return NO_MAPPING;
  
    cur = mapptr->mapped;
  
-  if (__builtin_expect (cur != NO_MAPPING, 1))
+  if (__glibc_likely (cur != NO_MAPPING))
      {
        /* If not mapped or timestamp not updated, request new map.  */
        if (cur == NULL
           || (cur->head->nscd_certainly_running == 0
-             && cur->head->timestamp + MAPPING_TIMEOUT < time (NULL)))
-       cur = get_mapping (type, name, &mapptr->mapped);
+             && cur->head->timestamp + MAPPING_TIMEOUT < time (NULL))
+         || cur->head->data_size > cur->datasize)
+       cur = __nscd_get_mapping (type, name,
+                                 (struct mapped_database **) &mapptr->mapped);
  
-      if (__builtin_expect (cur != NO_MAPPING, 1))
+      if (__glibc_likely (cur != NO_MAPPING))
         {
           if (__builtin_expect (((*gc_cyclep = cur->head->gc_cycle) & 1) != 0,
                                 0))
@@ -358,32 +435,95 @@ __nscd_get_map_ref (request_type type, const char *name,
  }
  
  
-const struct datahead *
+/* Using sizeof (hashentry) is not always correct to determine the size of
+   the data structure as found in the nscd cache.  The program could be
+   a 64-bit process and nscd could be a 32-bit process.  In this case
+   sizeof (hashentry) would overestimate the size.  The following is
+   the minimum size of such an entry, good enough for our tests here.  */
+#define MINIMUM_HASHENTRY_SIZE \
+  (offsetof (struct hashentry, dellist) + sizeof (int32_t))
+
+
+/* Don't return const struct datahead *, as eventhough the record
+   is normally constant, it can change arbitrarily during nscd
+   garbage collection.  */
+struct datahead *
  __nscd_cache_search (request_type type, const char *key, size_t keylen,
-                    const struct mapped_database *mapped)
+                    const struct mapped_database *mapped, size_t datalen)
  {
    unsigned long int hash = __nis_hash (key, keylen) % mapped->head->module;
+  size_t datasize = mapped->datasize;
  
-  ref_t work = mapped->head->array[hash];
-  while (work != ENDREF)
+  ref_t trail = mapped->head->array[hash];
+  trail = atomic_forced_read (trail);
+  ref_t work = trail;
+  size_t loop_cnt = datasize / (MINIMUM_HASHENTRY_SIZE
+                               + offsetof (struct datahead, data) / 2);
+  int tick = 0;
+
+  while (work != ENDREF && work + MINIMUM_HASHENTRY_SIZE <= datasize)
      {
        struct hashentry *here = (struct hashentry *) (mapped->data + work);
-
-      if (type == here->type && keylen == here->len
-         && memcmp (key, mapped->data + here->key, keylen) == 0)
+      ref_t here_key, here_packet;
+
+#if !_STRING_ARCH_unaligned
+      /* Although during garbage collection when moving struct hashentry
+        records around we first copy from old to new location and then
+        adjust pointer from previous hashentry to it, there is no barrier
+        between those memory writes.  It is very unlikely to hit it,
+        so check alignment only if a misaligned load can crash the
+        application.  */
+      if ((uintptr_t) here & (__alignof__ (*here) - 1))
+       return NULL;
+#endif
+
+      if (type == here->type
+         && keylen == here->len
+         && (here_key = atomic_forced_read (here->key)) + keylen <= datasize
+         && memcmp (key, mapped->data + here_key, keylen) == 0
+         && ((here_packet = atomic_forced_read (here->packet))
+             + sizeof (struct datahead) <= datasize))
         {
           /* We found the entry.  Increment the appropriate counter.  */
-         const struct datahead *dh
-           = (struct datahead *) (mapped->data + here->packet);
+         struct datahead *dh
+           = (struct datahead *) (mapped->data + here_packet);
+
+#if !_STRING_ARCH_unaligned
+         if ((uintptr_t) dh & (__alignof__ (*dh) - 1))
+           return NULL;
+#endif
  
           /* See whether we must ignore the entry or whether something
              is wrong because garbage collection is in progress.  */
-         if (dh->usable && ((char *) dh + dh->allocsize
-                            <= (char *) mapped->head + mapped->mapsize))
+         if (dh->usable
+             && here_packet + dh->allocsize <= datasize
+             && (here_packet + offsetof (struct datahead, data) + datalen
+                 <= datasize))
             return dh;
         }
  
-      work = here->next;
+      work = atomic_forced_read (here->next);
+      /* Prevent endless loops.  This should never happen but perhaps
+        the database got corrupted, accidentally or deliberately.  */
+      if (work == trail || loop_cnt-- == 0)
+       break;
+      if (tick)
+       {
+         struct hashentry *trailelem;
+         trailelem = (struct hashentry *) (mapped->data + trail);
+
+#if !_STRING_ARCH_unaligned
+         /* We have to redo the checks.  Maybe the data changed.  */
+         if ((uintptr_t) trailelem & (__alignof__ (*trailelem) - 1))
+           return NULL;
+#endif
+
+         if (trail + MINIMUM_HASHENTRY_SIZE > datasize)
+           return NULL;
+
+         trail = atomic_forced_read (trailelem->next);
+       }
+      tick = 1 - tick;
      }
  
    return NULL;
@@ -395,28 +535,22 @@ int
  __nscd_open_socket (const char *key, size_t keylen, request_type type,
                     void *response, size_t responselen)
  {
+  /* This should never happen and it is something the nscd daemon
+     enforces, too.  He it helps to limit the amount of stack
+     used.  */
+  if (keylen > MAXKEYLEN)
+    return -1;
+
    int saved_errno = errno;
  
-  int sock = open_socket ();
+  int sock = open_socket (type, key, keylen);
    if (sock >= 0)
      {
-      request_header req;
-      req.version = NSCD_VERSION;
-      req.type = type;
-      req.key_len = keylen;
-
-      struct iovec vec[2];
-      vec[0].iov_base = &req;
-      vec[0].iov_len = sizeof (request_header);
-      vec[1].iov_base = (void *) key;
-      vec[1].iov_len = keylen;
-
-      ssize_t nbytes = TEMP_FAILURE_RETRY (__writev (sock, vec, 2));
-      if (nbytes == (ssize_t) (sizeof (request_header) + keylen)
-         /* Wait for data.  */
-         && wait_on_socket (sock) > 0)
+      /* Wait for data.  */
+      if (wait_on_socket (sock, 5 * 1000) > 0)
         {
-         nbytes = TEMP_FAILURE_RETRY (__read (sock, response, responselen));
+         ssize_t nbytes = TEMP_FAILURE_RETRY (__read (sock, response,
+                                                      responselen));
           if (nbytes == (ssize_t) responselen)
             return sock;
         }