Home | History | Annotate | Download | only in gen
      1 /*
      2  * Copyright (c) 1989, 1993
      3  *	The Regents of the University of California.  All rights reserved.
      4  *
      5  * This code is derived from software contributed to Berkeley by
      6  * Guido van Rossum.
      7  *
      8  * Copyright (c) 2011 The FreeBSD Foundation
      9  * All rights reserved.
     10  * Portions of this software were developed by David Chisnall
     11  * under sponsorship from the FreeBSD Foundation.
     12  *
     13  * Redistribution and use in source and binary forms, with or without
     14  * modification, are permitted provided that the following conditions
     15  * are met:
     16  * 1. Redistributions of source code must retain the above copyright
     17  *    notice, this list of conditions and the following disclaimer.
     18  * 2. Redistributions in binary form must reproduce the above copyright
     19  *    notice, this list of conditions and the following disclaimer in the
     20  *    documentation and/or other materials provided with the distribution.
     21  * 3. Neither the name of the University nor the names of its contributors
     22  *    may be used to endorse or promote products derived from this software
     23  *    without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     35  * SUCH DAMAGE.
     36  */
     37 
     38 #if defined(LIBC_SCCS) && !defined(lint)
     39 static char sccsid[] = "@(#)glob.c	8.3 (Berkeley) 10/13/93";
     40 #endif /* LIBC_SCCS and not lint */
     41 #include <sys/cdefs.h>
     42 __FBSDID("$FreeBSD: head/lib/libc/gen/glob.c 317913 2017-05-07 19:52:56Z jilles $");
     43 
     44 /*
     45  * glob(3) -- a superset of the one defined in POSIX 1003.2.
     46  *
     47  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
     48  *
     49  * Optional extra services, controlled by flags not defined by POSIX:
     50  *
     51  * GLOB_QUOTE:
     52  *	Escaping convention: \ inhibits any special meaning the following
     53  *	character might have (except \ at end of string is retained).
     54  * GLOB_MAGCHAR:
     55  *	Set in gl_flags if pattern contained a globbing character.
     56  * GLOB_NOMAGIC:
     57  *	Same as GLOB_NOCHECK, but it will only append pattern if it did
     58  *	not contain any magic characters.  [Used in csh style globbing]
     59  * GLOB_ALTDIRFUNC:
     60  *	Use alternately specified directory access functions.
     61  * GLOB_TILDE:
     62  *	expand ~user/foo to the /home/dir/of/user/foo
     63  * GLOB_BRACE:
     64  *	expand {1,2}{a,b} to 1a 1b 2a 2b
     65  * gl_matchc:
     66  *	Number of matches in the current invocation of glob.
     67  */
     68 
     69 /*
     70  * Some notes on multibyte character support:
     71  * 1. Patterns with illegal byte sequences match nothing - even if
     72  *    GLOB_NOCHECK is specified.
     73  * 2. Illegal byte sequences in filenames are handled by treating them as
     74  *    single-byte characters with a values of such bytes of the sequence
     75  *    cast to wchar_t.
     76  * 3. State-dependent encodings are not currently supported.
     77  */
     78 
     79 #include <sys/param.h>
     80 #include <sys/stat.h>
     81 
     82 #include <ctype.h>
     83 #include <dirent.h>
     84 #include <errno.h>
     85 #include <glob.h>
     86 #include <limits.h>
     87 #include <pwd.h>
     88 #include <stdint.h>
     89 #include <stdio.h>
     90 #include <stdlib.h>
     91 #include <string.h>
     92 #include <unistd.h>
     93 #include <wchar.h>
     94 
     95 #include "collate.h"
     96 
     97 /*
     98  * glob(3) expansion limits. Stop the expansion if any of these limits
     99  * is reached. This caps the runtime in the face of DoS attacks. See
    100  * also CVE-2010-2632
    101  */
    102 #define	GLOB_LIMIT_BRACE	128	/* number of brace calls */
    103 #define	GLOB_LIMIT_PATH		65536	/* number of path elements */
    104 #define	GLOB_LIMIT_READDIR	16384	/* number of readdirs */
    105 #define	GLOB_LIMIT_STAT		1024	/* number of stat system calls */
    106 #define	GLOB_LIMIT_STRING	ARG_MAX	/* maximum total size for paths */
    107 
    108 struct glob_limit {
    109 	size_t	l_brace_cnt;
    110 	size_t	l_path_lim;
    111 	size_t	l_readdir_cnt;
    112 	size_t	l_stat_cnt;
    113 	size_t	l_string_cnt;
    114 };
    115 
    116 #define	DOT		L'.'
    117 #define	EOS		L'\0'
    118 #define	LBRACKET	L'['
    119 #define	NOT		L'!'
    120 #define	QUESTION	L'?'
    121 #define	QUOTE		L'\\'
    122 #define	RANGE		L'-'
    123 #define	RBRACKET	L']'
    124 #define	SEP		L'/'
    125 #define	STAR		L'*'
    126 #define	TILDE		L'~'
    127 #define	LBRACE		L'{'
    128 #define	RBRACE		L'}'
    129 #define	COMMA		L','
    130 
    131 #define	M_QUOTE		0x8000000000ULL
    132 #define	M_PROTECT	0x4000000000ULL
    133 #define	M_MASK		0xffffffffffULL
    134 #define	M_CHAR		0x00ffffffffULL
    135 
    136 typedef uint_fast64_t Char;
    137 
    138 #define	CHAR(c)		((Char)((c)&M_CHAR))
    139 #define	META(c)		((Char)((c)|M_QUOTE))
    140 #define	UNPROT(c)	((c) & ~M_PROTECT)
    141 #define	M_ALL		META(L'*')
    142 #define	M_END		META(L']')
    143 #define	M_NOT		META(L'!')
    144 #define	M_ONE		META(L'?')
    145 #define	M_RNG		META(L'-')
    146 #define	M_SET		META(L'[')
    147 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
    148 #ifdef DEBUG
    149 #define	isprot(c)	(((c)&M_PROTECT) != 0)
    150 #endif
    151 
    152 static int	 compare(const void *, const void *);
    153 static int	 g_Ctoc(const Char *, char *, size_t);
    154 static int	 g_lstat(Char *, struct stat *, glob_t *);
    155 static DIR	*g_opendir(Char *, glob_t *);
    156 static const Char *g_strchr(const Char *, wchar_t);
    157 #ifdef notdef
    158 static Char	*g_strcat(Char *, const Char *);
    159 #endif
    160 static int	 g_stat(Char *, struct stat *, glob_t *);
    161 static int	 glob0(const Char *, glob_t *, struct glob_limit *,
    162     const char *);
    163 static int	 glob1(Char *, glob_t *, struct glob_limit *);
    164 static int	 glob2(Char *, Char *, Char *, Char *, glob_t *,
    165     struct glob_limit *);
    166 static int	 glob3(Char *, Char *, Char *, Char *, Char *, glob_t *,
    167     struct glob_limit *);
    168 static int	 globextend(const Char *, glob_t *, struct glob_limit *,
    169     const char *);
    170 static const Char *
    171 		 globtilde(const Char *, Char *, size_t, glob_t *);
    172 static int	 globexp0(const Char *, glob_t *, struct glob_limit *,
    173     const char *);
    174 static int	 globexp1(const Char *, glob_t *, struct glob_limit *);
    175 static int	 globexp2(const Char *, const Char *, glob_t *,
    176     struct glob_limit *);
    177 static int	 globfinal(glob_t *, struct glob_limit *, size_t,
    178     const char *);
    179 static int	 match(Char *, Char *, Char *);
    180 static int	 err_nomatch(glob_t *, struct glob_limit *, const char *);
    181 static int	 err_aborted(glob_t *, int, char *);
    182 #ifdef DEBUG
    183 static void	 qprintf(const char *, Char *);
    184 #endif
    185 
    186 int
    187 glob(const char * __restrict pattern, int flags,
    188 	 int (*errfunc)(const char *, int), glob_t * __restrict pglob)
    189 {
    190 	struct glob_limit limit = { 0, 0, 0, 0, 0 };
    191 	const char *patnext;
    192 	Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
    193 	mbstate_t mbs;
    194 	wchar_t wc;
    195 	size_t clen;
    196 	int too_long;
    197 
    198 	patnext = pattern;
    199 	if (!(flags & GLOB_APPEND)) {
    200 		pglob->gl_pathc = 0;
    201 		pglob->gl_pathv = NULL;
    202 		if (!(flags & GLOB_DOOFFS))
    203 			pglob->gl_offs = 0;
    204 	}
    205 	if (flags & GLOB_LIMIT) {
    206 		limit.l_path_lim = pglob->gl_matchc;
    207 		if (limit.l_path_lim == 0)
    208 			limit.l_path_lim = GLOB_LIMIT_PATH;
    209 	}
    210 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
    211 	pglob->gl_errfunc = errfunc;
    212 	pglob->gl_matchc = 0;
    213 
    214 	bufnext = patbuf;
    215 	bufend = bufnext + MAXPATHLEN - 1;
    216 	too_long = 1;
    217 	if (flags & GLOB_NOESCAPE) {
    218 		memset(&mbs, 0, sizeof(mbs));
    219 		while (bufnext <= bufend) {
    220 			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
    221 			if (clen == (size_t)-1 || clen == (size_t)-2)
    222 				return (err_nomatch(pglob, &limit, pattern));
    223 			else if (clen == 0) {
    224 				too_long = 0;
    225 				break;
    226 			}
    227 			*bufnext++ = wc;
    228 			patnext += clen;
    229 		}
    230 	} else {
    231 		/* Protect the quoted characters. */
    232 		memset(&mbs, 0, sizeof(mbs));
    233 		while (bufnext <= bufend) {
    234 			if (*patnext == '\\') {
    235 				if (*++patnext == '\0') {
    236 					*bufnext++ = QUOTE;
    237 					continue;
    238 				}
    239 				prot = M_PROTECT;
    240 			} else
    241 				prot = 0;
    242 			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
    243 			if (clen == (size_t)-1 || clen == (size_t)-2)
    244 				return (err_nomatch(pglob, &limit, pattern));
    245 			else if (clen == 0) {
    246 				too_long = 0;
    247 				break;
    248 			}
    249 			*bufnext++ = wc | prot;
    250 			patnext += clen;
    251 		}
    252 	}
    253 	if (too_long)
    254 		return (err_nomatch(pglob, &limit, pattern));
    255 	*bufnext = EOS;
    256 
    257 	if (flags & GLOB_BRACE)
    258 	    return (globexp0(patbuf, pglob, &limit, pattern));
    259 	else
    260 	    return (glob0(patbuf, pglob, &limit, pattern));
    261 }
    262 
    263 static int
    264 globexp0(const Char *pattern, glob_t *pglob, struct glob_limit *limit,
    265     const char *origpat) {
    266 	int rv;
    267 	size_t oldpathc;
    268 
    269 	/* Protect a single {}, for find(1), like csh */
    270 	if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS) {
    271 		if ((pglob->gl_flags & GLOB_LIMIT) &&
    272 		    limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) {
    273 			errno = E2BIG;
    274 			return (GLOB_NOSPACE);
    275 		}
    276 		return (glob0(pattern, pglob, limit, origpat));
    277 	}
    278 
    279 	oldpathc = pglob->gl_pathc;
    280 
    281 	if ((rv = globexp1(pattern, pglob, limit)) != 0)
    282 		return rv;
    283 
    284 	return (globfinal(pglob, limit, oldpathc, origpat));
    285 }
    286 
    287 /*
    288  * Expand recursively a glob {} pattern. When there is no more expansion
    289  * invoke the standard globbing routine to glob the rest of the magic
    290  * characters
    291  */
    292 static int
    293 globexp1(const Char *pattern, glob_t *pglob, struct glob_limit *limit)
    294 {
    295 	const Char* ptr;
    296 
    297 	if ((ptr = g_strchr(pattern, LBRACE)) != NULL) {
    298 		if ((pglob->gl_flags & GLOB_LIMIT) &&
    299 		    limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) {
    300 			errno = E2BIG;
    301 			return (GLOB_NOSPACE);
    302 		}
    303 		return (globexp2(ptr, pattern, pglob, limit));
    304 	}
    305 
    306 	return (glob0(pattern, pglob, limit, NULL));
    307 }
    308 
    309 
    310 /*
    311  * Recursive brace globbing helper. Tries to expand a single brace.
    312  * If it succeeds then it invokes globexp1 with the new pattern.
    313  * If it fails then it tries to glob the rest of the pattern and returns.
    314  */
    315 static int
    316 globexp2(const Char *ptr, const Char *pattern, glob_t *pglob,
    317     struct glob_limit *limit)
    318 {
    319 	int     i, rv;
    320 	Char   *lm, *ls;
    321 	const Char *pe, *pm, *pm1, *pl;
    322 	Char    patbuf[MAXPATHLEN];
    323 
    324 	/* copy part up to the brace */
    325 	for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
    326 		continue;
    327 	*lm = EOS;
    328 	ls = lm;
    329 
    330 	/* Find the balanced brace */
    331 	for (i = 0, pe = ++ptr; *pe != EOS; pe++)
    332 		if (*pe == LBRACKET) {
    333 			/* Ignore everything between [] */
    334 			for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
    335 				continue;
    336 			if (*pe == EOS) {
    337 				/*
    338 				 * We could not find a matching RBRACKET.
    339 				 * Ignore and just look for RBRACE
    340 				 */
    341 				pe = pm;
    342 			}
    343 		}
    344 		else if (*pe == LBRACE)
    345 			i++;
    346 		else if (*pe == RBRACE) {
    347 			if (i == 0)
    348 				break;
    349 			i--;
    350 		}
    351 
    352 	/* Non matching braces; just glob the pattern */
    353 	if (i != 0 || *pe == EOS)
    354 		return (glob0(pattern, pglob, limit, NULL));
    355 
    356 	for (i = 0, pl = pm = ptr; pm <= pe; pm++)
    357 		switch (*pm) {
    358 		case LBRACKET:
    359 			/* Ignore everything between [] */
    360 			for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
    361 				continue;
    362 			if (*pm == EOS) {
    363 				/*
    364 				 * We could not find a matching RBRACKET.
    365 				 * Ignore and just look for RBRACE
    366 				 */
    367 				pm = pm1;
    368 			}
    369 			break;
    370 
    371 		case LBRACE:
    372 			i++;
    373 			break;
    374 
    375 		case RBRACE:
    376 			if (i) {
    377 			    i--;
    378 			    break;
    379 			}
    380 			/* FALLTHROUGH */
    381 		case COMMA:
    382 			if (i && *pm == COMMA)
    383 				break;
    384 			else {
    385 				/* Append the current string */
    386 				for (lm = ls; (pl < pm); *lm++ = *pl++)
    387 					continue;
    388 				/*
    389 				 * Append the rest of the pattern after the
    390 				 * closing brace
    391 				 */
    392 				for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
    393 					continue;
    394 
    395 				/* Expand the current pattern */
    396 #ifdef DEBUG
    397 				qprintf("globexp2:", patbuf);
    398 #endif
    399 				rv = globexp1(patbuf, pglob, limit);
    400 				if (rv)
    401 					return (rv);
    402 
    403 				/* move after the comma, to the next string */
    404 				pl = pm + 1;
    405 			}
    406 			break;
    407 
    408 		default:
    409 			break;
    410 		}
    411 	return (0);
    412 }
    413 
    414 
    415 
    416 /*
    417  * expand tilde from the passwd file.
    418  */
    419 static const Char *
    420 globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
    421 {
    422 	struct passwd *pwd;
    423 	char *h, *sc;
    424 	const Char *p;
    425 	Char *b, *eb;
    426 	wchar_t wc;
    427 	wchar_t wbuf[MAXPATHLEN];
    428 	wchar_t *wbufend, *dc;
    429 	size_t clen;
    430 	mbstate_t mbs;
    431 	int too_long;
    432 
    433 	if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
    434 		return (pattern);
    435 
    436 	/*
    437 	 * Copy up to the end of the string or /
    438 	 */
    439 	eb = &patbuf[patbuf_len - 1];
    440 	for (p = pattern + 1, b = patbuf;
    441 	    b < eb && *p != EOS && UNPROT(*p) != SEP; *b++ = *p++)
    442 		continue;
    443 
    444 	if (*p != EOS && UNPROT(*p) != SEP)
    445 		return (NULL);
    446 
    447 	*b = EOS;
    448 	h = NULL;
    449 
    450 	if (patbuf[0] == EOS) {
    451 		/*
    452 		 * handle a plain ~ or ~/ by expanding $HOME first (iff
    453 		 * we're not running setuid or setgid) and then trying
    454 		 * the password file
    455 		 */
    456 		if (issetugid() != 0 ||
    457 		    (h = getenv("HOME")) == NULL) {
    458 			if (((h = getlogin()) != NULL &&
    459 			     (pwd = getpwnam(h)) != NULL) ||
    460 			    (pwd = getpwuid(getuid())) != NULL)
    461 				h = pwd->pw_dir;
    462 			else
    463 				return (pattern);
    464 		}
    465 	}
    466 	else {
    467 		/*
    468 		 * Expand a ~user
    469 		 */
    470 		if (g_Ctoc(patbuf, (char *)wbuf, sizeof(wbuf)))
    471 			return (NULL);
    472 		if ((pwd = getpwnam((char *)wbuf)) == NULL)
    473 			return (pattern);
    474 		else
    475 			h = pwd->pw_dir;
    476 	}
    477 
    478 	/* Copy the home directory */
    479 	dc = wbuf;
    480 	sc = h;
    481 	wbufend = wbuf + MAXPATHLEN - 1;
    482 	too_long = 1;
    483 	memset(&mbs, 0, sizeof(mbs));
    484 	while (dc <= wbufend) {
    485 		clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
    486 		if (clen == (size_t)-1 || clen == (size_t)-2) {
    487 			/* XXX See initial comment #2. */
    488 			wc = (unsigned char)*sc;
    489 			clen = 1;
    490 			memset(&mbs, 0, sizeof(mbs));
    491 		}
    492 		if ((*dc++ = wc) == EOS) {
    493 			too_long = 0;
    494 			break;
    495 		}
    496 		sc += clen;
    497 	}
    498 	if (too_long)
    499 		return (NULL);
    500 
    501 	dc = wbuf;
    502 	for (b = patbuf; b < eb && *dc != EOS; *b++ = *dc++ | M_PROTECT)
    503 		continue;
    504 	if (*dc != EOS)
    505 		return (NULL);
    506 
    507 	/* Append the rest of the pattern */
    508 	if (*p != EOS) {
    509 		too_long = 1;
    510 		while (b <= eb) {
    511 			if ((*b++ = *p++) == EOS) {
    512 				too_long = 0;
    513 				break;
    514 			}
    515 		}
    516 		if (too_long)
    517 			return (NULL);
    518 	} else
    519 		*b = EOS;
    520 
    521 	return (patbuf);
    522 }
    523 
    524 
    525 /*
    526  * The main glob() routine: compiles the pattern (optionally processing
    527  * quotes), calls glob1() to do the real pattern matching, and finally
    528  * sorts the list (unless unsorted operation is requested).  Returns 0
    529  * if things went well, nonzero if errors occurred.
    530  */
    531 static int
    532 glob0(const Char *pattern, glob_t *pglob, struct glob_limit *limit,
    533     const char *origpat) {
    534 	const Char *qpatnext;
    535 	int err;
    536 	size_t oldpathc;
    537 	Char *bufnext, c, patbuf[MAXPATHLEN];
    538 
    539 	qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
    540 	if (qpatnext == NULL) {
    541 		errno = E2BIG;
    542 		return (GLOB_NOSPACE);
    543 	}
    544 	oldpathc = pglob->gl_pathc;
    545 	bufnext = patbuf;
    546 
    547 	/* We don't need to check for buffer overflow any more. */
    548 	while ((c = *qpatnext++) != EOS) {
    549 		switch (c) {
    550 		case LBRACKET:
    551 			c = *qpatnext;
    552 			if (c == NOT)
    553 				++qpatnext;
    554 			if (*qpatnext == EOS ||
    555 			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
    556 				*bufnext++ = LBRACKET;
    557 				if (c == NOT)
    558 					--qpatnext;
    559 				break;
    560 			}
    561 			*bufnext++ = M_SET;
    562 			if (c == NOT)
    563 				*bufnext++ = M_NOT;
    564 			c = *qpatnext++;
    565 			do {
    566 				*bufnext++ = CHAR(c);
    567 				if (*qpatnext == RANGE &&
    568 				    (c = qpatnext[1]) != RBRACKET) {
    569 					*bufnext++ = M_RNG;
    570 					*bufnext++ = CHAR(c);
    571 					qpatnext += 2;
    572 				}
    573 			} while ((c = *qpatnext++) != RBRACKET);
    574 			pglob->gl_flags |= GLOB_MAGCHAR;
    575 			*bufnext++ = M_END;
    576 			break;
    577 		case QUESTION:
    578 			pglob->gl_flags |= GLOB_MAGCHAR;
    579 			*bufnext++ = M_ONE;
    580 			break;
    581 		case STAR:
    582 			pglob->gl_flags |= GLOB_MAGCHAR;
    583 			/* collapse adjacent stars to one,
    584 			 * to ensure "**" at the end continues to match the
    585 			 * empty string
    586 			 */
    587 			if (bufnext == patbuf || bufnext[-1] != M_ALL)
    588 			    *bufnext++ = M_ALL;
    589 			break;
    590 		default:
    591 			*bufnext++ = CHAR(c);
    592 			break;
    593 		}
    594 	}
    595 	*bufnext = EOS;
    596 #ifdef DEBUG
    597 	qprintf("glob0:", patbuf);
    598 #endif
    599 
    600 	if ((err = glob1(patbuf, pglob, limit)) != 0)
    601 		return(err);
    602 
    603 	if (origpat != NULL)
    604 		return (globfinal(pglob, limit, oldpathc, origpat));
    605 
    606 	return (0);
    607 }
    608 
    609 static int
    610 globfinal(glob_t *pglob, struct glob_limit *limit, size_t oldpathc,
    611     const char *origpat) {
    612 	if (pglob->gl_pathc == oldpathc)
    613 		return (err_nomatch(pglob, limit, origpat));
    614 
    615 	if (!(pglob->gl_flags & GLOB_NOSORT))
    616 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
    617 		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
    618 
    619 	return (0);
    620 }
    621 
    622 static int
    623 compare(const void *p, const void *q)
    624 {
    625 	return (strcoll(*(char **)p, *(char **)q));
    626 }
    627 
    628 static int
    629 glob1(Char *pattern, glob_t *pglob, struct glob_limit *limit)
    630 {
    631 	Char pathbuf[MAXPATHLEN];
    632 
    633 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
    634 	if (*pattern == EOS)
    635 		return (0);
    636 	return (glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
    637 	    pattern, pglob, limit));
    638 }
    639 
    640 /*
    641  * The functions glob2 and glob3 are mutually recursive; there is one level
    642  * of recursion for each segment in the pattern that contains one or more
    643  * meta characters.
    644  */
    645 static int
    646 glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern,
    647       glob_t *pglob, struct glob_limit *limit)
    648 {
    649 	struct stat sb;
    650 	Char *p, *q;
    651 	int anymeta;
    652 
    653 	/*
    654 	 * Loop over pattern segments until end of pattern or until
    655 	 * segment with meta character found.
    656 	 */
    657 	for (anymeta = 0;;) {
    658 		if (*pattern == EOS) {		/* End of pattern? */
    659 			*pathend = EOS;
    660 			if (g_lstat(pathbuf, &sb, pglob))
    661 				return (0);
    662 
    663 			if ((pglob->gl_flags & GLOB_LIMIT) &&
    664 			    limit->l_stat_cnt++ >= GLOB_LIMIT_STAT) {
    665 				errno = E2BIG;
    666 				return (GLOB_NOSPACE);
    667 			}
    668 			if ((pglob->gl_flags & GLOB_MARK) &&
    669 			    UNPROT(pathend[-1]) != SEP &&
    670 			    (S_ISDIR(sb.st_mode) ||
    671 			    (S_ISLNK(sb.st_mode) &&
    672 			    g_stat(pathbuf, &sb, pglob) == 0 &&
    673 			    S_ISDIR(sb.st_mode)))) {
    674 				if (pathend + 1 > pathend_last) {
    675 					errno = E2BIG;
    676 					return (GLOB_NOSPACE);
    677 				}
    678 				*pathend++ = SEP;
    679 				*pathend = EOS;
    680 			}
    681 			++pglob->gl_matchc;
    682 			return (globextend(pathbuf, pglob, limit, NULL));
    683 		}
    684 
    685 		/* Find end of next segment, copy tentatively to pathend. */
    686 		q = pathend;
    687 		p = pattern;
    688 		while (*p != EOS && UNPROT(*p) != SEP) {
    689 			if (ismeta(*p))
    690 				anymeta = 1;
    691 			if (q + 1 > pathend_last) {
    692 				errno = E2BIG;
    693 				return (GLOB_NOSPACE);
    694 			}
    695 			*q++ = *p++;
    696 		}
    697 
    698 		if (!anymeta) {		/* No expansion, do next segment. */
    699 			pathend = q;
    700 			pattern = p;
    701 			while (UNPROT(*pattern) == SEP) {
    702 				if (pathend + 1 > pathend_last) {
    703 					errno = E2BIG;
    704 					return (GLOB_NOSPACE);
    705 				}
    706 				*pathend++ = *pattern++;
    707 			}
    708 		} else			/* Need expansion, recurse. */
    709 			return (glob3(pathbuf, pathend, pathend_last, pattern,
    710 			    p, pglob, limit));
    711 	}
    712 	/* NOTREACHED */
    713 }
    714 
    715 static int
    716 glob3(Char *pathbuf, Char *pathend, Char *pathend_last,
    717       Char *pattern, Char *restpattern,
    718       glob_t *pglob, struct glob_limit *limit)
    719 {
    720 	struct dirent *dp;
    721 	DIR *dirp;
    722 	int err, too_long, saverrno, saverrno2;
    723 	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
    724 
    725 	struct dirent *(*readdirfunc)(DIR *);
    726 
    727 	if (pathend > pathend_last) {
    728 		errno = E2BIG;
    729 		return (GLOB_NOSPACE);
    730 	}
    731 	*pathend = EOS;
    732 	if (pglob->gl_errfunc != NULL &&
    733 	    g_Ctoc(pathbuf, buf, sizeof(buf))) {
    734 		errno = E2BIG;
    735 		return (GLOB_NOSPACE);
    736 	}
    737 
    738 	saverrno = errno;
    739 	errno = 0;
    740 	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
    741 		if (errno == ENOENT || errno == ENOTDIR)
    742 			return (0);
    743 		err = err_aborted(pglob, errno, buf);
    744 		if (errno == 0)
    745 			errno = saverrno;
    746 		return (err);
    747 	}
    748 
    749 	err = 0;
    750 
    751 	/* pglob->gl_readdir takes a void *, fix this manually */
    752 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
    753 		readdirfunc = (struct dirent *(*)(DIR *))pglob->gl_readdir;
    754 	else
    755 		readdirfunc = readdir;
    756 
    757 	errno = 0;
    758 	/* Search directory for matching names. */
    759 	while ((dp = (*readdirfunc)(dirp)) != NULL) {
    760 		char *sc;
    761 		Char *dc;
    762 		wchar_t wc;
    763 		size_t clen;
    764 		mbstate_t mbs;
    765 
    766 		if ((pglob->gl_flags & GLOB_LIMIT) &&
    767 		    limit->l_readdir_cnt++ >= GLOB_LIMIT_READDIR) {
    768 			errno = E2BIG;
    769 			err = GLOB_NOSPACE;
    770 			break;
    771 		}
    772 
    773 		/* Initial DOT must be matched literally. */
    774 		if (dp->d_name[0] == '.' && UNPROT(*pattern) != DOT) {
    775 			errno = 0;
    776 			continue;
    777 		}
    778 		memset(&mbs, 0, sizeof(mbs));
    779 		dc = pathend;
    780 		sc = dp->d_name;
    781 		too_long = 1;
    782 		while (dc <= pathend_last) {
    783 			clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
    784 			if (clen == (size_t)-1 || clen == (size_t)-2) {
    785 				/* XXX See initial comment #2. */
    786 				wc = (unsigned char)*sc;
    787 				clen = 1;
    788 				memset(&mbs, 0, sizeof(mbs));
    789 			}
    790 			if ((*dc++ = wc) == EOS) {
    791 				too_long = 0;
    792 				break;
    793 			}
    794 			sc += clen;
    795 		}
    796 		if (too_long && (err = err_aborted(pglob, ENAMETOOLONG,
    797 		    buf))) {
    798 			errno = ENAMETOOLONG;
    799 			break;
    800 		}
    801 		if (too_long || !match(pathend, pattern, restpattern)) {
    802 			*pathend = EOS;
    803 			errno = 0;
    804 			continue;
    805 		}
    806 		if (errno == 0)
    807 			errno = saverrno;
    808 		err = glob2(pathbuf, --dc, pathend_last, restpattern,
    809 		    pglob, limit);
    810 		if (err)
    811 			break;
    812 		errno = 0;
    813 	}
    814 
    815 	saverrno2 = errno;
    816 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
    817 		(*pglob->gl_closedir)(dirp);
    818 	else
    819 		closedir(dirp);
    820 	errno = saverrno2;
    821 
    822 	if (err)
    823 		return (err);
    824 
    825 	if (dp == NULL && errno != 0 &&
    826 	    (err = err_aborted(pglob, errno, buf)))
    827 		return (err);
    828 
    829 	if (errno == 0)
    830 		errno = saverrno;
    831 	return (0);
    832 }
    833 
    834 
    835 /*
    836  * Extend the gl_pathv member of a glob_t structure to accommodate a new item,
    837  * add the new item, and update gl_pathc.
    838  *
    839  * This assumes the BSD realloc, which only copies the block when its size
    840  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
    841  * behavior.
    842  *
    843  * Return 0 if new item added, error code if memory couldn't be allocated.
    844  *
    845  * Invariant of the glob_t structure:
    846  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
    847  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
    848  */
    849 static int
    850 globextend(const Char *path, glob_t *pglob, struct glob_limit *limit,
    851     const char *origpat)
    852 {
    853 	char **pathv;
    854 	size_t i, newn, len;
    855 	char *copy;
    856 	const Char *p;
    857 
    858 	if ((pglob->gl_flags & GLOB_LIMIT) &&
    859 	    pglob->gl_matchc > limit->l_path_lim) {
    860 		errno = E2BIG;
    861 		return (GLOB_NOSPACE);
    862 	}
    863 
    864 	newn = 2 + pglob->gl_pathc + pglob->gl_offs;
    865 	/* reallocarray(NULL, newn, size) is equivalent to malloc(newn*size). */
    866 	pathv = reallocarray(pglob->gl_pathv, newn, sizeof(*pathv));
    867 	if (pathv == NULL)
    868 		return (GLOB_NOSPACE);
    869 
    870 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
    871 		/* first time around -- clear initial gl_offs items */
    872 		pathv += pglob->gl_offs;
    873 		for (i = pglob->gl_offs + 1; --i > 0; )
    874 			*--pathv = NULL;
    875 	}
    876 	pglob->gl_pathv = pathv;
    877 
    878 	if (origpat != NULL)
    879 		copy = strdup(origpat);
    880 	else {
    881 		for (p = path; *p++ != EOS;)
    882 			continue;
    883 		len = MB_CUR_MAX * (size_t)(p - path); /* XXX overallocation */
    884 		if ((copy = malloc(len)) != NULL) {
    885 			if (g_Ctoc(path, copy, len)) {
    886 				free(copy);
    887 				errno = E2BIG;
    888 				return (GLOB_NOSPACE);
    889 			}
    890 		}
    891 	}
    892 	if (copy != NULL) {
    893 		limit->l_string_cnt += strlen(copy) + 1;
    894 		if ((pglob->gl_flags & GLOB_LIMIT) &&
    895 		    limit->l_string_cnt >= GLOB_LIMIT_STRING) {
    896 			free(copy);
    897 			errno = E2BIG;
    898 			return (GLOB_NOSPACE);
    899 		}
    900 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
    901 	}
    902 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
    903 	return (copy == NULL ? GLOB_NOSPACE : 0);
    904 }
    905 
    906 /*
    907  * pattern matching function for filenames.
    908  */
    909 static int
    910 match(Char *name, Char *pat, Char *patend)
    911 {
    912 	int ok, negate_range;
    913 	Char c, k, *nextp, *nextn;
    914 #if !defined(__BIONIC__)
    915 	struct xlocale_collate *table =
    916 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
    917 #endif
    918 
    919 	nextn = NULL;
    920 	nextp = NULL;
    921 
    922 	while (1) {
    923 		while (pat < patend) {
    924 			c = *pat++;
    925 			switch (c & M_MASK) {
    926 			case M_ALL:
    927 				if (pat == patend)
    928 					return (1);
    929 				if (*name == EOS)
    930 					return (0);
    931 				nextn = name + 1;
    932 				nextp = pat - 1;
    933 				break;
    934 			case M_ONE:
    935 				if (*name++ == EOS)
    936 					goto fail;
    937 				break;
    938 			case M_SET:
    939 				ok = 0;
    940 				if ((k = *name++) == EOS)
    941 					goto fail;
    942 				negate_range = ((*pat & M_MASK) == M_NOT);
    943 				if (negate_range != 0)
    944 					++pat;
    945 				while (((c = *pat++) & M_MASK) != M_END)
    946 					if ((*pat & M_MASK) == M_RNG) {
    947 #if defined(__BIONIC__)
    948 						if (c <= k && k <= pat[1])
    949 #else
    950 						if (table->__collate_load_error ?
    951 						    CHAR(c) <= CHAR(k) &&
    952 						    CHAR(k) <= CHAR(pat[1]) :
    953 						    __wcollate_range_cmp(CHAR(c),
    954 						    CHAR(k)) <= 0 &&
    955 						    __wcollate_range_cmp(CHAR(k),
    956 						    CHAR(pat[1])) <= 0)
    957 #endif
    958 							ok = 1;
    959 						pat += 2;
    960 					} else if (c == k)
    961 						ok = 1;
    962 				if (ok == negate_range)
    963 					goto fail;
    964 				break;
    965 			default:
    966 				if (*name++ != c)
    967 					goto fail;
    968 				break;
    969 			}
    970 		}
    971 		if (*name == EOS)
    972 			return (1);
    973 
    974 	fail:
    975 		if (nextn == NULL)
    976 			break;
    977 		pat = nextp;
    978 		name = nextn;
    979 	}
    980 	return (0);
    981 }
    982 
    983 /* Free allocated data belonging to a glob_t structure. */
    984 void
    985 globfree(glob_t *pglob)
    986 {
    987 	size_t i;
    988 	char **pp;
    989 
    990 	if (pglob->gl_pathv != NULL) {
    991 		pp = pglob->gl_pathv + pglob->gl_offs;
    992 		for (i = pglob->gl_pathc; i--; ++pp)
    993 			if (*pp)
    994 				free(*pp);
    995 		free(pglob->gl_pathv);
    996 		pglob->gl_pathv = NULL;
    997 	}
    998 }
    999 
   1000 static DIR *
   1001 g_opendir(Char *str, glob_t *pglob)
   1002 {
   1003 	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
   1004 
   1005 	if (*str == EOS)
   1006 		strcpy(buf, ".");
   1007 	else {
   1008 		if (g_Ctoc(str, buf, sizeof(buf))) {
   1009 			errno = ENAMETOOLONG;
   1010 			return (NULL);
   1011 		}
   1012 	}
   1013 
   1014 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
   1015 		return ((*pglob->gl_opendir)(buf));
   1016 
   1017 	return (opendir(buf));
   1018 }
   1019 
   1020 static int
   1021 g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
   1022 {
   1023 	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
   1024 
   1025 	if (g_Ctoc(fn, buf, sizeof(buf))) {
   1026 		errno = ENAMETOOLONG;
   1027 		return (-1);
   1028 	}
   1029 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
   1030 		return((*pglob->gl_lstat)(buf, sb));
   1031 	return (lstat(buf, sb));
   1032 }
   1033 
   1034 static int
   1035 g_stat(Char *fn, struct stat *sb, glob_t *pglob)
   1036 {
   1037 	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
   1038 
   1039 	if (g_Ctoc(fn, buf, sizeof(buf))) {
   1040 		errno = ENAMETOOLONG;
   1041 		return (-1);
   1042 	}
   1043 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
   1044 		return ((*pglob->gl_stat)(buf, sb));
   1045 	return (stat(buf, sb));
   1046 }
   1047 
   1048 static const Char *
   1049 g_strchr(const Char *str, wchar_t ch)
   1050 {
   1051 
   1052 	do {
   1053 		if (*str == ch)
   1054 			return (str);
   1055 	} while (*str++);
   1056 	return (NULL);
   1057 }
   1058 
   1059 static int
   1060 g_Ctoc(const Char *str, char *buf, size_t len)
   1061 {
   1062 	mbstate_t mbs;
   1063 	size_t clen;
   1064 
   1065 	memset(&mbs, 0, sizeof(mbs));
   1066 	while (len >= MB_CUR_MAX) {
   1067 		clen = wcrtomb(buf, CHAR(*str), &mbs);
   1068 		if (clen == (size_t)-1) {
   1069 			/* XXX See initial comment #2. */
   1070 			*buf = (char)CHAR(*str);
   1071 			clen = 1;
   1072 			memset(&mbs, 0, sizeof(mbs));
   1073 		}
   1074 		if (CHAR(*str) == EOS)
   1075 			return (0);
   1076 		str++;
   1077 		buf += clen;
   1078 		len -= clen;
   1079 	}
   1080 	return (1);
   1081 }
   1082 
   1083 static int
   1084 err_nomatch(glob_t *pglob, struct glob_limit *limit, const char *origpat) {
   1085 	/*
   1086 	 * If there was no match we are going to append the origpat
   1087 	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
   1088 	 * and the origpat did not contain any magic characters
   1089 	 * GLOB_NOMAGIC is there just for compatibility with csh.
   1090 	 */
   1091 	if ((pglob->gl_flags & GLOB_NOCHECK) ||
   1092 	    ((pglob->gl_flags & GLOB_NOMAGIC) &&
   1093 	    !(pglob->gl_flags & GLOB_MAGCHAR)))
   1094 		return (globextend(NULL, pglob, limit, origpat));
   1095 	return (GLOB_NOMATCH);
   1096 }
   1097 
   1098 static int
   1099 err_aborted(glob_t *pglob, int err, char *buf) {
   1100 	if ((pglob->gl_errfunc != NULL && pglob->gl_errfunc(buf, err)) ||
   1101 	    (pglob->gl_flags & GLOB_ERR))
   1102 		return (GLOB_ABORTED);
   1103 	return (0);
   1104 }
   1105 
   1106 #ifdef DEBUG
   1107 static void
   1108 qprintf(const char *str, Char *s)
   1109 {
   1110 	Char *p;
   1111 
   1112 	(void)printf("%s\n", str);
   1113 	if (s != NULL) {
   1114 		for (p = s; *p != EOS; p++)
   1115 			(void)printf("%c", (char)CHAR(*p));
   1116 		(void)printf("\n");
   1117 		for (p = s; *p != EOS; p++)
   1118 			(void)printf("%c", (isprot(*p) ? '\\' : ' '));
   1119 		(void)printf("\n");
   1120 		for (p = s; *p != EOS; p++)
   1121 			(void)printf("%c", (ismeta(*p) ? '_' : ' '));
   1122 		(void)printf("\n");
   1123 	}
   1124 }
   1125 #endif
   1126