(linenum→info "unix/slp.c:2238")

glibc/2.7/nptl/allocatestack.c

    1: /* Copyright (C) 2002,2003,2004,2005,2006,2007 Free Software Foundation, Inc.
    2:    This file is part of the GNU C Library.
    3:    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
    4: 
    5:    The GNU C Library is free software; you can redistribute it and/or
    6:    modify it under the terms of the GNU Lesser General Public
    7:    License as published by the Free Software Foundation; either
    8:    version 2.1 of the License, or (at your option) any later version.
    9: 
   10:    The GNU C Library is distributed in the hope that it will be useful,
   11:    but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:    Lesser General Public License for more details.
   14: 
   15:    You should have received a copy of the GNU Lesser General Public
   16:    License along with the GNU C Library; if not, write to the Free
   17:    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   18:    02111-1307 USA.  */
   19: 
   20: #include <assert.h>
   21: #include <errno.h>
   22: #include <signal.h>
   23: #include <stdint.h>
   24: #include <string.h>
   25: #include <unistd.h>
   26: #include <sys/mman.h>
   27: #include <sys/param.h>
   28: #include <dl-sysdep.h>
   29: #include <tls.h>
   30: #include <lowlevellock.h>
   31: #include <kernel-features.h>
   32: 
   33: 
   34: #ifndef NEED_SEPARATE_REGISTER_STACK
   35: 
   36: /* Most architectures have exactly one stack pointer.  Some have more.  */
   37: # define STACK_VARIABLES void *stackaddr = NULL
   38: 
   39: /* How to pass the values to the 'create_thread' function.  */
   40: # define STACK_VARIABLES_ARGS stackaddr
   41: 
   42: /* How to declare function which gets there parameters.  */
   43: # define STACK_VARIABLES_PARMS void *stackaddr
   44: 
   45: /* How to declare allocate_stack.  */
   46: # define ALLOCATE_STACK_PARMS void **stack
   47: 
   48: /* This is how the function is called.  We do it this way to allow
   49:    other variants of the function to have more parameters.  */
   50: # define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
   51: 
   52: #else
   53: 
   54: /* We need two stacks.  The kernel will place them but we have to tell
   55:    the kernel about the size of the reserved address space.  */
   56: # define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
   57: 
   58: /* How to pass the values to the 'create_thread' function.  */
   59: # define STACK_VARIABLES_ARGS stackaddr, stacksize
   60: 
   61: /* How to declare function which gets there parameters.  */
   62: # define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
   63: 
   64: /* How to declare allocate_stack.  */
   65: # define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize
   66: 
   67: /* This is how the function is called.  We do it this way to allow
   68:    other variants of the function to have more parameters.  */
   69: # define ALLOCATE_STACK(attr, pd) \
   70:   allocate_stack (attr, pd, &stackaddr, &stacksize)
   71: 
   72: #endif
   73: 
   74: 
   75: /* Default alignment of stack.  */
   76: #ifndef STACK_ALIGN
   77: # define STACK_ALIGN __alignof__ (long double)
   78: #endif
   79: 
   80: /* Default value for minimal stack size after allocating thread
   81:    descriptor and guard.  */
   82: #ifndef MINIMAL_REST_STACK
   83: # define MINIMAL_REST_STACK     4096
   84: #endif
   85: 
   86: 
   87: /* Let the architecture add some flags to the mmap() call used to
   88:    allocate stacks.  */
   89: #ifndef ARCH_MAP_FLAGS
   90: # define ARCH_MAP_FLAGS 0
   91: #endif
   92: 
   93: /* This yields the pointer that TLS support code calls the thread pointer.  */
   94: #if TLS_TCB_AT_TP
   95: # define TLS_TPADJ(pd) (pd)
   96: #elif TLS_DTV_AT_TP
   97: # define TLS_TPADJ(pd) ((struct pthread *)((char *) (pd) + TLS_PRE_TCB_SIZE))
   98: #endif
   99: 
  100: /* Cache handling for not-yet free stacks.  */
  101: 
  102: /* Maximum size in kB of cache.  */
  103: static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default.  */
  104: static size_t stack_cache_actsize;
  105: 
  106: /* Mutex protecting this variable.  */
  107: static int stack_cache_lock = LLL_LOCK_INITIALIZER;
  108: 
  109: /* List of queued stack frames.  */
  110: static LIST_HEAD (stack_cache);
  111: 
  112: /* List of the stacks in use.  */
  113: static LIST_HEAD (stack_used);
  114: 
  115: /* List of the threads with user provided stacks in use.  No need to
  116:    initialize this, since it's done in __pthread_initialize_minimal.  */
  117: list_t __stack_user __attribute__ ((nocommon));
  118: hidden_data_def (__stack_user)
  119: 
  120: #if COLORING_INCREMENT != 0
  121: /* Number of threads created.  */
  122: static unsigned int nptl_ncreated;
  123: #endif
  124: 
  125: 
  126: /* Check whether the stack is still used or not.  */
  127: #define FREE_P(descr) ((descr)->tid <= 0)
  128: 
  129: 
  130: /* We create a double linked list of all cache entries.  Double linked
  131:    because this allows removing entries from the end.  */
  132: 
  133: 
  134: /* Get a stack frame from the cache.  We have to match by size since
  135:    some blocks might be too small or far too large.  */
  136: static struct pthread *
  137: get_cached_stack (size_t *sizep, void **memp)
  138: {
  139:   size_t size = *sizep;
  140:   struct pthread *result = NULL;
  141:   list_t *entry;
  142: 
  143:   lll_lock (stack_cache_lock, LLL_PRIVATE);
  144: 
  145:   /* Search the cache for a matching entry.  We search for the
  146:      smallest stack which has at least the required size.  Note that
  147:      in normal situations the size of all allocated stacks is the
  148:      same.  As the very least there are only a few different sizes.
  149:      Therefore this loop will exit early most of the time with an
  150:      exact match.  */
  151:   list_for_each (entry, &stack_cache)
  152:     {
  153:       struct pthread *curr;
  154: 
  155:       curr = list_entry (entry, struct pthread, list);
  156:       if (FREE_P (curr) && curr->stackblock_size >= size)
  157:         {
  158:           if (curr->stackblock_size == size)
  159:             {
  160:               result = curr;
  161:               break;
  162:             }
  163: 
  164:           if (result == NULL
  165:               || result->stackblock_size > curr->stackblock_size)
  166:             result = curr;
  167:         }
  168:     }
  169: 
  170:   if (__builtin_expect (result == NULL, 0)
  171:       /* Make sure the size difference is not too excessive.  In that
  172:          case we do not use the block.  */
  173:       || __builtin_expect (result->stackblock_size > 4 * size, 0))
  174:     {
  175:       /* Release the lock.  */
  176:       lll_unlock (stack_cache_lock, LLL_PRIVATE);
  177: 
  178:       return NULL;
  179:     }
  180: 
  181:   /* Dequeue the entry.  */
  182:   list_del (&result->list);
  183: 
  184:   /* And add to the list of stacks in use.  */
  185:   list_add (&result->list, &stack_used);
  186: 
  187:   /* And decrease the cache size.  */
  188:   stack_cache_actsize -= result->stackblock_size;
  189: 
  190:   /* Release the lock early.  */
  191:   lll_unlock (stack_cache_lock, LLL_PRIVATE);
  192: 
  193:   /* Report size and location of the stack to the caller.  */
  194:   *sizep = result->stackblock_size;
  195:   *memp = result->stackblock;
  196: 
  197:   /* Cancellation handling is back to the default.  */
  198:   result->cancelhandling = 0;
  199:   result->cleanup = NULL;
  200: 
  201:   /* No pending event.  */
  202:   result->nextevent = NULL;
  203: 
  204:   /* Clear the DTV.  */
  205:   dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
  206:   memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
  207: 
  208:   /* Re-initialize the TLS.  */
  209:   _dl_allocate_tls_init (TLS_TPADJ (result));
  210: 
  211:   return result;
  212: }
  213: 
  214: 
  215: /* Free stacks until cache size is lower than LIMIT.  */
  216: static void
  217: free_stacks (size_t limit)
  218: {
  219:   /* We reduce the size of the cache.  Remove the last entries until
  220:      the size is below the limit.  */
  221:   list_t *entry;
  222:   list_t *prev;
  223: 
  224:   /* Search from the end of the list.  */
  225:   list_for_each_prev_safe (entry, prev, &stack_cache)
  226:     {
  227:       struct pthread *curr;
  228: 
  229:       curr = list_entry (entry, struct pthread, list);
  230:       if (FREE_P (curr))
  231:         {
  232:           /* Unlink the block.  */
  233:           list_del (entry);
  234: 
  235:           /* Account for the freed memory.  */
  236:           stack_cache_actsize -= curr->stackblock_size;
  237: 
  238:           /* Free the memory associated with the ELF TLS.  */
  239:           _dl_deallocate_tls (TLS_TPADJ (curr), false);
  240: 
  241:           /* Remove this block.  This should never fail.  If it does
  242:              something is really wrong.  */
  243:           if (munmap (curr->stackblock, curr->stackblock_size) != 0)
  244:             abort ();
  245: 
  246:           /* Maybe we have freed enough.  */
  247:           if (stack_cache_actsize <= limit)
  248:             break;
  249:         }
  250:     }
  251: }
  252: 
  253: 
  254: /* Add a stack frame which is not used anymore to the stack.  Must be
  255:    called with the cache lock held.  */
  256: static inline void
  257: __attribute ((always_inline))
  258: queue_stack (struct pthread *stack)
  259: {
  260:   /* We unconditionally add the stack to the list.  The memory may
  261:      still be in use but it will not be reused until the kernel marks
  262:      the stack as not used anymore.  */
  263:   list_add (&stack->list, &stack_cache);
  264: 
  265:   stack_cache_actsize += stack->stackblock_size;
  266:   if (__builtin_expect (stack_cache_actsize > stack_cache_maxsize, 0))
  267:     free_stacks (stack_cache_maxsize);
  268: }
  269: 
  270: 
  271: /* This function is called indirectly from the freeres code in libc.  */
  272: void
  273: __free_stack_cache (void)
  274: {
  275:   free_stacks (0);
  276: }
  277: 
  278: 
  279: static int
  280: internal_function
  281: change_stack_perm (struct pthread *pd
  282: #ifdef NEED_SEPARATE_REGISTER_STACK
  283:                    , size_t pagemask
  284: #endif
  285:                    )
  286: {
  287: #ifdef NEED_SEPARATE_REGISTER_STACK
  288:   void *stack = (pd->stackblock
  289:                  + (((((pd->stackblock_size - pd->guardsize) / 2)
  290:                       & pagemask) + pd->guardsize) & pagemask));
  291:   size_t len = pd->stackblock + pd->stackblock_size - stack;
  292: #elif _STACK_GROWS_DOWN
  293:   void *stack = pd->stackblock + pd->guardsize;
  294:   size_t len = pd->stackblock_size - pd->guardsize;
  295: #elif _STACK_GROWS_UP
  296:   void *stack = pd->stackblock;
  297:   size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
  298: #else
  299: # error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
  300: #endif
  301:   if (mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
  302:     return errno;
  303: 
  304:   return 0;
  305: }
  306: 
  307: 
  308: static int
  309: allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
  310:                 ALLOCATE_STACK_PARMS)
  311: {
  312:   struct pthread *pd;
  313:   size_t size;
  314:   size_t pagesize_m1 = __getpagesize () - 1;
  315:   void *stacktop;
  316: 
  317:   assert (attr != NULL);
  318:   assert (powerof2 (pagesize_m1 + 1));
  319:   assert (TCB_ALIGNMENT >= STACK_ALIGN);
  320: 
  321:   /* Get the stack size from the attribute if it is set.  Otherwise we
  322:      use the default we determined at start time.  */
  323:   size = attr->stacksize ?: __default_stacksize;
  324: 
  325:   /* Get memory for the stack.  */
  326:   if (__builtin_expect (attr->flags & ATTR_FLAG_STACKADDR, 0))
  327:     {
  328:       uintptr_t adj;
  329: 
  330:       /* If the user also specified the size of the stack make sure it
  331:          is large enough.  */
  332:       if (attr->stacksize != 0
  333:           && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
  334:         return EINVAL;
  335: 
  336:       /* Adjust stack size for alignment of the TLS block.  */
  337: #if TLS_TCB_AT_TP
  338:       adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE)
  339:             & __static_tls_align_m1;
  340:       assert (size > adj + TLS_TCB_SIZE);
  341: #elif TLS_DTV_AT_TP
  342:       adj = ((uintptr_t) attr->stackaddr - __static_tls_size)
  343:             & __static_tls_align_m1;
  344:       assert (size > adj);
  345: #endif
  346: 
  347:       /* The user provided some memory.  Let's hope it matches the
  348:          size...  We do not allocate guard pages if the user provided
  349:          the stack.  It is the user's responsibility to do this if it
  350:          is wanted.  */
  351: #if TLS_TCB_AT_TP
  352:       pd = (struct pthread *) ((uintptr_t) attr->stackaddr
  353:                                - TLS_TCB_SIZE - adj);
  354: #elif TLS_DTV_AT_TP
  355:       pd = (struct pthread *) (((uintptr_t) attr->stackaddr
  356:                                 - __static_tls_size - adj)
  357:                                - TLS_PRE_TCB_SIZE);
  358: #endif
  359: 
  360:       /* The user provided stack memory needs to be cleared.  */
  361:       memset (pd, '\0', sizeof (struct pthread));
  362: 
  363:       /* The first TSD block is included in the TCB.  */
  364:       pd->specific[0] = pd->specific_1stblock;
  365: 
  366:       /* Remember the stack-related values.  */
  367:       pd->stackblock = (char *) attr->stackaddr - size;
  368:       pd->stackblock_size = size;
  369: 
  370:       /* This is a user-provided stack.  It will not be queued in the
  371:          stack cache nor will the memory (except the TLS memory) be freed.  */
  372:       pd->user_stack = true;
  373: 
  374:       /* This is at least the second thread.  */
  375:       pd->header.multiple_threads = 1;
  376: #ifndef TLS_MULTIPLE_THREADS_IN_TCB
  377:       __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
  378: #endif
  379: 
  380: #ifndef __ASSUME_PRIVATE_FUTEX
  381:       /* The thread must know when private futexes are supported.  */
  382:       pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
  383:                                                 header.private_futex);
  384: #endif
  385: 
  386: #ifdef NEED_DL_SYSINFO
  387:       /* Copy the sysinfo value from the parent.  */
  388:       THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
  389: #endif
  390: 
  391:       /* The process ID is also the same as that of the caller.  */
  392:       pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
  393: 
  394:       /* Allocate the DTV for this thread.  */
  395:       if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
  396:         {
  397:           /* Something went wrong.  */
  398:           assert (errno == ENOMEM);
  399:           return EAGAIN;
  400:         }
  401: 
  402: 
  403:       /* Prepare to modify global data.  */
  404:       lll_lock (stack_cache_lock, LLL_PRIVATE);
  405: 
  406:       /* And add to the list of stacks in use.  */
  407:       list_add (&pd->list, &__stack_user);
  408: 
  409:       lll_unlock (stack_cache_lock, LLL_PRIVATE);
  410:     }
  411:   else
  412:     {
  413:       /* Allocate some anonymous memory.  If possible use the cache.  */
  414:       size_t guardsize;
  415:       size_t reqsize;
  416:       void *mem;
  417:       const int prot = (PROT_READ | PROT_WRITE
  418:                         | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
  419: 
  420: #if COLORING_INCREMENT != 0
  421:       /* Add one more page for stack coloring.  Don't do it for stacks
  422:          with 16 times pagesize or larger.  This might just cause
  423:          unnecessary misalignment.  */
  424:       if (size <= 16 * pagesize_m1)
  425:         size += pagesize_m1 + 1;
  426: #endif
  427: 
  428:       /* Adjust the stack size for alignment.  */
  429:       size &= ~__static_tls_align_m1;
  430:       assert (size != 0);
  431: 
  432:       /* Make sure the size of the stack is enough for the guard and
  433:          eventually the thread descriptor.  */
  434:       guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
  435:       if (__builtin_expect (size < ((guardsize + __static_tls_size
  436:                                      + MINIMAL_REST_STACK + pagesize_m1)
  437:                                     & ~pagesize_m1),
  438:                             0))
  439:         /* The stack is too small (or the guard too large).  */
  440:         return EINVAL;
  441: 
  442:       /* Try to get a stack from the cache.  */
  443:       reqsize = size;
  444:       pd = get_cached_stack (&size, &mem);
  445:       if (pd == NULL)
  446:         {
  447:           /* To avoid aliasing effects on a larger scale than pages we
  448:              adjust the allocated stack size if necessary.  This way
  449:              allocations directly following each other will not have
  450:              aliasing problems.  */
  451: #if MULTI_PAGE_ALIASING != 0
  452:           if ((size % MULTI_PAGE_ALIASING) == 0)
  453:             size += pagesize_m1 + 1;
  454: #endif
  455: 
  456:           mem = mmap (NULL, size, prot,
  457:                       MAP_PRIVATE | MAP_ANONYMOUS | ARCH_MAP_FLAGS, -1, 0);
  458: 
  459:           if (__builtin_expect (mem == MAP_FAILED, 0))
  460:             {
  461: #ifdef ARCH_RETRY_MMAP
  462:               mem = ARCH_RETRY_MMAP (size);
  463:               if (__builtin_expect (mem == MAP_FAILED, 0))
  464: #endif
  465:                 return errno;
  466:             }
  467: 
  468:           /* SIZE is guaranteed to be greater than zero.
  469:              So we can never get a null pointer back from mmap.  */
  470:           assert (mem != NULL);
  471: 
  472: #if COLORING_INCREMENT != 0
  473:           /* Atomically increment NCREATED.  */
  474:           unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
  475: 
  476:           /* We chose the offset for coloring by incrementing it for
  477:              every new thread by a fixed amount.  The offset used
  478:              module the page size.  Even if coloring would be better
  479:              relative to higher alignment values it makes no sense to
  480:              do it since the mmap() interface does not allow us to
  481:              specify any alignment for the returned memory block.  */
  482:           size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
  483: 
  484:           /* Make sure the coloring offsets does not disturb the alignment
  485:              of the TCB and static TLS block.  */
  486:           if (__builtin_expect ((coloring & __static_tls_align_m1) != 0, 0))
  487:             coloring = (((coloring + __static_tls_align_m1)
  488:                          & ~(__static_tls_align_m1))
  489:                         & ~pagesize_m1);
  490: #else
  491:           /* Unless specified we do not make any adjustments.  */
  492: # define coloring 0
  493: #endif
  494: 
  495:           /* Place the thread descriptor at the end of the stack.  */
  496: #if TLS_TCB_AT_TP
  497:           pd = (struct pthread *) ((char *) mem + size - coloring) - 1;
  498: #elif TLS_DTV_AT_TP
  499:           pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
  500:                                     - __static_tls_size)
  501:                                     & ~__static_tls_align_m1)
  502:                                    - TLS_PRE_TCB_SIZE);
  503: #endif
  504: 
  505:           /* Remember the stack-related values.  */
  506:           pd->stackblock = mem;
  507:           pd->stackblock_size = size;
  508: 
  509:           /* We allocated the first block thread-specific data array.
  510:              This address will not change for the lifetime of this
  511:              descriptor.  */
  512:           pd->specific[0] = pd->specific_1stblock;
  513: 
  514:           /* This is at least the second thread.  */
  515:           pd->header.multiple_threads = 1;
  516: #ifndef TLS_MULTIPLE_THREADS_IN_TCB
  517:           __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
  518: #endif
  519: 
  520: #ifndef __ASSUME_PRIVATE_FUTEX
  521:           /* The thread must know when private futexes are supported.  */
  522:           pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
  523:                                                     header.private_futex);
  524: #endif
  525: 
  526: #ifdef NEED_DL_SYSINFO
  527:           /* Copy the sysinfo value from the parent.  */
  528:           THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
  529: #endif
  530: 
  531:           /* The process ID is also the same as that of the caller.  */
  532:           pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
  533: 
  534:           /* Allocate the DTV for this thread.  */
  535:           if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
  536:             {
  537:               /* Something went wrong.  */
  538:               assert (errno == ENOMEM);
  539: 
  540:               /* Free the stack memory we just allocated.  */
  541:               (void) munmap (mem, size);
  542: 
  543:               return EAGAIN;
  544:             }
  545: 
  546: 
  547:           /* Prepare to modify global data.  */
  548:           lll_lock (stack_cache_lock, LLL_PRIVATE);
  549: 
  550:           /* And add to the list of stacks in use.  */
  551:           list_add (&pd->list, &stack_used);
  552: 
  553:           lll_unlock (stack_cache_lock, LLL_PRIVATE);
  554: 
  555: 
  556:           /* There might have been a race.  Another thread might have
  557:              caused the stacks to get exec permission while this new
  558:              stack was prepared.  Detect if this was possible and
  559:              change the permission if necessary.  */
  560:           if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
  561:                                 && (prot & PROT_EXEC) == 0, 0))
  562:             {
  563:               int err = change_stack_perm (pd
  564: #ifdef NEED_SEPARATE_REGISTER_STACK
  565:                                            , ~pagesize_m1
  566: #endif
  567:                                            );