fnmatch_loop.c 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211
  1. /* Copyright (C) 1991-2021 Free Software Foundation, Inc.
  2. This file is part of the GNU C Library.
  3. The GNU C Library is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU General Public
  5. License as published by the Free Software Foundation; either
  6. version 3 of the License, or (at your option) any later version.
  7. The GNU C Library is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. General Public License for more details.
  11. You should have received a copy of the GNU General Public
  12. License along with the GNU C Library; if not, see
  13. <https://www.gnu.org/licenses/>. */
  14. #ifdef _LIBC
  15. # include <stdint.h>
  16. #endif
  17. struct STRUCT
  18. {
  19. const CHAR *pattern;
  20. const CHAR *string;
  21. bool no_leading_period;
  22. };
  23. /* Match STRING against the file name pattern PATTERN, returning zero if
  24. it matches, nonzero if not. */
  25. static int FCT (const CHAR *pattern, const CHAR *string,
  26. const CHAR *string_end, bool no_leading_period, int flags,
  27. struct STRUCT *ends, size_t alloca_used);
  28. static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  29. const CHAR *string_end, bool no_leading_period, int flags,
  30. size_t alloca_used);
  31. static const CHAR *END (const CHAR *patternp);
  32. static int
  33. FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
  34. bool no_leading_period, int flags, struct STRUCT *ends, size_t alloca_used)
  35. {
  36. const CHAR *p = pattern, *n = string;
  37. UCHAR c;
  38. #ifdef _LIBC
  39. # if WIDE_CHAR_VERSION
  40. const char *collseq = (const char *)
  41. _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  42. # else
  43. const UCHAR *collseq = (const UCHAR *)
  44. _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  45. # endif
  46. #endif
  47. while ((c = *p++) != L_('\0'))
  48. {
  49. bool new_no_leading_period = false;
  50. c = FOLD (c);
  51. switch (c)
  52. {
  53. case L_('?'):
  54. if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
  55. {
  56. int res = EXT (c, p, n, string_end, no_leading_period,
  57. flags, alloca_used);
  58. if (res != -1)
  59. return res;
  60. }
  61. if (n == string_end)
  62. return FNM_NOMATCH;
  63. else if (*n == L_('/') && (flags & FNM_FILE_NAME))
  64. return FNM_NOMATCH;
  65. else if (*n == L_('.') && no_leading_period)
  66. return FNM_NOMATCH;
  67. break;
  68. case L_('\\'):
  69. if (!(flags & FNM_NOESCAPE))
  70. {
  71. c = *p++;
  72. if (c == L_('\0'))
  73. /* Trailing \ loses. */
  74. return FNM_NOMATCH;
  75. c = FOLD (c);
  76. }
  77. if (n == string_end || FOLD ((UCHAR) *n) != c)
  78. return FNM_NOMATCH;
  79. break;
  80. case L_('*'):
  81. if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
  82. {
  83. int res = EXT (c, p, n, string_end, no_leading_period,
  84. flags, alloca_used);
  85. if (res != -1)
  86. return res;
  87. }
  88. else if (ends != NULL)
  89. {
  90. ends->pattern = p - 1;
  91. ends->string = n;
  92. ends->no_leading_period = no_leading_period;
  93. return 0;
  94. }
  95. if (n != string_end && *n == L_('.') && no_leading_period)
  96. return FNM_NOMATCH;
  97. for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
  98. {
  99. if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
  100. {
  101. const CHAR *endp = END (p);
  102. if (endp != p)
  103. {
  104. /* This is a pattern. Skip over it. */
  105. p = endp;
  106. continue;
  107. }
  108. }
  109. if (c == L_('?'))
  110. {
  111. /* A ? needs to match one character. */
  112. if (n == string_end)
  113. /* There isn't another character; no match. */
  114. return FNM_NOMATCH;
  115. else if (*n == L_('/')
  116. && __glibc_unlikely (flags & FNM_FILE_NAME))
  117. /* A slash does not match a wildcard under
  118. FNM_FILE_NAME. */
  119. return FNM_NOMATCH;
  120. else
  121. /* One character of the string is consumed in matching
  122. this ? wildcard, so *??? won't match if there are
  123. less than three characters. */
  124. ++n;
  125. }
  126. }
  127. if (c == L_('\0'))
  128. /* The wildcard(s) is/are the last element of the pattern.
  129. If the name is a file name and contains another slash
  130. this means it cannot match, unless the FNM_LEADING_DIR
  131. flag is set. */
  132. {
  133. int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
  134. if (flags & FNM_FILE_NAME)
  135. {
  136. if (flags & FNM_LEADING_DIR)
  137. result = 0;
  138. else
  139. {
  140. if (MEMCHR (n, L_('/'), string_end - n) == NULL)
  141. result = 0;
  142. }
  143. }
  144. return result;
  145. }
  146. else
  147. {
  148. const CHAR *endp;
  149. struct STRUCT end;
  150. end.pattern = NULL;
  151. endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
  152. string_end - n);
  153. if (endp == NULL)
  154. endp = string_end;
  155. if (c == L_('[')
  156. || (__glibc_unlikely (flags & FNM_EXTMATCH)
  157. && (c == L_('@') || c == L_('+') || c == L_('!'))
  158. && *p == L_('(')))
  159. {
  160. int flags2 = ((flags & FNM_FILE_NAME)
  161. ? flags : (flags & ~FNM_PERIOD));
  162. for (--p; n < endp; ++n, no_leading_period = false)
  163. if (FCT (p, n, string_end, no_leading_period, flags2,
  164. &end, alloca_used) == 0)
  165. goto found;
  166. }
  167. else if (c == L_('/') && (flags & FNM_FILE_NAME))
  168. {
  169. while (n < string_end && *n != L_('/'))
  170. ++n;
  171. if (n < string_end && *n == L_('/')
  172. && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
  173. NULL, alloca_used) == 0))
  174. return 0;
  175. }
  176. else
  177. {
  178. int flags2 = ((flags & FNM_FILE_NAME)
  179. ? flags : (flags & ~FNM_PERIOD));
  180. if (c == L_('\\') && !(flags & FNM_NOESCAPE))
  181. c = *p;
  182. c = FOLD (c);
  183. for (--p; n < endp; ++n, no_leading_period = false)
  184. if (FOLD ((UCHAR) *n) == c
  185. && (FCT (p, n, string_end, no_leading_period, flags2,
  186. &end, alloca_used) == 0))
  187. {
  188. found:
  189. if (end.pattern == NULL)
  190. return 0;
  191. break;
  192. }
  193. if (end.pattern != NULL)
  194. {
  195. p = end.pattern;
  196. n = end.string;
  197. no_leading_period = end.no_leading_period;
  198. continue;
  199. }
  200. }
  201. }
  202. /* If we come here no match is possible with the wildcard. */
  203. return FNM_NOMATCH;
  204. case L_('['):
  205. {
  206. /* Nonzero if the sense of the character class is inverted. */
  207. const CHAR *p_init = p;
  208. const CHAR *n_init = n;
  209. bool not;
  210. CHAR cold;
  211. UCHAR fn;
  212. if (posixly_correct == 0)
  213. posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
  214. if (n == string_end)
  215. return FNM_NOMATCH;
  216. if (*n == L_('.') && no_leading_period)
  217. return FNM_NOMATCH;
  218. if (*n == L_('/') && (flags & FNM_FILE_NAME))
  219. /* '/' cannot be matched. */
  220. return FNM_NOMATCH;
  221. not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
  222. if (not)
  223. ++p;
  224. fn = FOLD ((UCHAR) *n);
  225. c = *p++;
  226. for (;;)
  227. {
  228. if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
  229. {
  230. if (*p == L_('\0'))
  231. return FNM_NOMATCH;
  232. c = FOLD ((UCHAR) *p);
  233. ++p;
  234. goto normal_bracket;
  235. }
  236. else if (c == L_('[') && *p == L_(':'))
  237. {
  238. /* Leave room for the null. */
  239. CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
  240. size_t c1 = 0;
  241. wctype_t wt;
  242. const CHAR *startp = p;
  243. for (;;)
  244. {
  245. if (c1 == CHAR_CLASS_MAX_LENGTH)
  246. /* The name is too long and therefore the pattern
  247. is ill-formed. */
  248. return FNM_NOMATCH;
  249. c = *++p;
  250. if (c == L_(':') && p[1] == L_(']'))
  251. {
  252. p += 2;
  253. break;
  254. }
  255. if (c < L_('a') || c >= L_('z'))
  256. {
  257. /* This cannot possibly be a character class name.
  258. Match it as a normal range. */
  259. p = startp;
  260. c = L_('[');
  261. goto normal_bracket;
  262. }
  263. str[c1++] = c;
  264. }
  265. str[c1] = L_('\0');
  266. wt = IS_CHAR_CLASS (str);
  267. if (wt == 0)
  268. /* Invalid character class name. */
  269. return FNM_NOMATCH;
  270. #if defined _LIBC && ! WIDE_CHAR_VERSION
  271. /* The following code is glibc specific but does
  272. there a good job in speeding up the code since
  273. we can avoid the btowc() call. */
  274. if (_ISCTYPE ((UCHAR) *n, wt))
  275. goto matched;
  276. #else
  277. if (iswctype (BTOWC ((UCHAR) *n), wt))
  278. goto matched;
  279. #endif
  280. c = *p++;
  281. }
  282. #ifdef _LIBC
  283. else if (c == L_('[') && *p == L_('='))
  284. {
  285. /* It's important that STR be a scalar variable rather
  286. than a one-element array, because GCC (at least 4.9.2
  287. -O2 on x86-64) can be confused by the array and
  288. diagnose a "used initialized" in a dead branch in the
  289. findidx function. */
  290. UCHAR str;
  291. uint32_t nrules =
  292. _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
  293. const CHAR *startp = p;
  294. c = *++p;
  295. if (c == L_('\0'))
  296. {
  297. p = startp;
  298. c = L_('[');
  299. goto normal_bracket;
  300. }
  301. str = c;
  302. c = *++p;
  303. if (c != L_('=') || p[1] != L_(']'))
  304. {
  305. p = startp;
  306. c = L_('[');
  307. goto normal_bracket;
  308. }
  309. p += 2;
  310. if (nrules == 0)
  311. {
  312. if ((UCHAR) *n == str)
  313. goto matched;
  314. }
  315. else
  316. {
  317. const int32_t *table;
  318. # if WIDE_CHAR_VERSION
  319. const int32_t *weights;
  320. const wint_t *extra;
  321. # else
  322. const unsigned char *weights;
  323. const unsigned char *extra;
  324. # endif
  325. const int32_t *indirect;
  326. int32_t idx;
  327. const UCHAR *cp = (const UCHAR *) &str;
  328. # if WIDE_CHAR_VERSION
  329. table = (const int32_t *)
  330. _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
  331. weights = (const int32_t *)
  332. _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
  333. extra = (const wint_t *)
  334. _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
  335. indirect = (const int32_t *)
  336. _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
  337. # else
  338. table = (const int32_t *)
  339. _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
  340. weights = (const unsigned char *)
  341. _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
  342. extra = (const unsigned char *)
  343. _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
  344. indirect = (const int32_t *)
  345. _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
  346. # endif
  347. idx = FINDIDX (table, indirect, extra, &cp, 1);
  348. if (idx != 0)
  349. {
  350. /* We found a table entry. Now see whether the
  351. character we are currently at has the same
  352. equivalence class value. */
  353. int len = weights[idx & 0xffffff];
  354. int32_t idx2;
  355. const UCHAR *np = (const UCHAR *) n;
  356. idx2 = FINDIDX (table, indirect, extra,
  357. &np, string_end - n);
  358. if (idx2 != 0
  359. && (idx >> 24) == (idx2 >> 24)
  360. && len == weights[idx2 & 0xffffff])
  361. {
  362. int cnt = 0;
  363. idx &= 0xffffff;
  364. idx2 &= 0xffffff;
  365. while (cnt < len
  366. && (weights[idx + 1 + cnt]
  367. == weights[idx2 + 1 + cnt]))
  368. ++cnt;
  369. if (cnt == len)
  370. goto matched;
  371. }
  372. }
  373. }
  374. c = *p++;
  375. }
  376. #endif
  377. else if (c == L_('\0'))
  378. {
  379. /* [ unterminated, treat as normal character. */
  380. p = p_init;
  381. n = n_init;
  382. c = L_('[');
  383. goto normal_match;
  384. }
  385. else
  386. {
  387. bool is_range = false;
  388. #ifdef _LIBC
  389. bool is_seqval = false;
  390. if (c == L_('[') && *p == L_('.'))
  391. {
  392. uint32_t nrules =
  393. _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
  394. const CHAR *startp = p;
  395. size_t c1 = 0;
  396. while (1)
  397. {
  398. c = *++p;
  399. if (c == L_('.') && p[1] == L_(']'))
  400. {
  401. p += 2;
  402. break;
  403. }
  404. if (c == '\0')
  405. return FNM_NOMATCH;
  406. ++c1;
  407. }
  408. /* We have to handling the symbols differently in
  409. ranges since then the collation sequence is
  410. important. */
  411. is_range = *p == L_('-') && p[1] != L_('\0');
  412. if (nrules == 0)
  413. {
  414. /* There are no names defined in the collation
  415. data. Therefore we only accept the trivial
  416. names consisting of the character itself. */
  417. if (c1 != 1)
  418. return FNM_NOMATCH;
  419. if (!is_range && *n == startp[1])
  420. goto matched;
  421. cold = startp[1];
  422. c = *p++;
  423. }
  424. else
  425. {
  426. int32_t table_size;
  427. const int32_t *symb_table;
  428. const unsigned char *extra;
  429. int32_t idx;
  430. int32_t elem;
  431. # if WIDE_CHAR_VERSION
  432. CHAR *wextra;
  433. # endif
  434. table_size =
  435. _NL_CURRENT_WORD (LC_COLLATE,
  436. _NL_COLLATE_SYMB_HASH_SIZEMB);
  437. symb_table = (const int32_t *)
  438. _NL_CURRENT (LC_COLLATE,
  439. _NL_COLLATE_SYMB_TABLEMB);
  440. extra = (const unsigned char *)
  441. _NL_CURRENT (LC_COLLATE,
  442. _NL_COLLATE_SYMB_EXTRAMB);
  443. for (elem = 0; elem < table_size; elem++)
  444. if (symb_table[2 * elem] != 0)
  445. {
  446. idx = symb_table[2 * elem + 1];
  447. /* Skip the name of collating element. */
  448. idx += 1 + extra[idx];
  449. # if WIDE_CHAR_VERSION
  450. /* Skip the byte sequence of the
  451. collating element. */
  452. idx += 1 + extra[idx];
  453. /* Adjust for the alignment. */
  454. idx = (idx + 3) & ~3;
  455. wextra = (CHAR *) &extra[idx + 4];
  456. if (/* Compare the length of the sequence. */
  457. c1 == wextra[0]
  458. /* Compare the wide char sequence. */
  459. && (__wmemcmp (startp + 1, &wextra[1],
  460. c1)
  461. == 0))
  462. /* Yep, this is the entry. */
  463. break;
  464. # else
  465. if (/* Compare the length of the sequence. */
  466. c1 == extra[idx]
  467. /* Compare the byte sequence. */
  468. && memcmp (startp + 1,
  469. &extra[idx + 1], c1) == 0)
  470. /* Yep, this is the entry. */
  471. break;
  472. # endif
  473. }
  474. if (elem < table_size)
  475. {
  476. /* Compare the byte sequence but only if
  477. this is not part of a range. */
  478. if (! is_range
  479. # if WIDE_CHAR_VERSION
  480. && __wmemcmp (n, &wextra[1], c1) == 0
  481. # else
  482. && memcmp (n, &extra[idx + 1], c1) == 0
  483. # endif
  484. )
  485. {
  486. n += c1 - 1;
  487. goto matched;
  488. }
  489. /* Get the collation sequence value. */
  490. is_seqval = true;
  491. # if WIDE_CHAR_VERSION
  492. cold = wextra[1 + wextra[0]];
  493. # else
  494. idx += 1 + extra[idx];
  495. /* Adjust for the alignment. */
  496. idx = (idx + 3) & ~3;
  497. cold = *((int32_t *) &extra[idx]);
  498. # endif
  499. c = *p++;
  500. }
  501. else if (c1 == 1)
  502. {
  503. /* No valid character. Match it as a
  504. single byte. */
  505. if (!is_range && *n == startp[1])
  506. goto matched;
  507. cold = startp[1];
  508. c = *p++;
  509. }
  510. else
  511. return FNM_NOMATCH;
  512. }
  513. }
  514. else
  515. #endif
  516. {
  517. c = FOLD (c);
  518. normal_bracket:
  519. /* We have to handling the symbols differently in
  520. ranges since then the collation sequence is
  521. important. */
  522. is_range = (*p == L_('-') && p[1] != L_('\0')
  523. && p[1] != L_(']'));
  524. if (!is_range && c == fn)
  525. goto matched;
  526. #if _LIBC
  527. /* This is needed if we goto normal_bracket; from
  528. outside of is_seqval's scope. */
  529. is_seqval = false;
  530. #endif
  531. cold = c;
  532. c = *p++;
  533. }
  534. if (c == L_('-') && *p != L_(']'))
  535. {
  536. #if _LIBC
  537. /* We have to find the collation sequence
  538. value for C. Collation sequence is nothing
  539. we can regularly access. The sequence
  540. value is defined by the order in which the
  541. definitions of the collation values for the
  542. various characters appear in the source
  543. file. A strange concept, nowhere
  544. documented. */
  545. uint32_t fcollseq;
  546. uint32_t lcollseq;
  547. UCHAR cend = *p++;
  548. # if WIDE_CHAR_VERSION
  549. /* Search in the 'names' array for the characters. */
  550. fcollseq = __collseq_table_lookup (collseq, fn);
  551. if (fcollseq == ~((uint32_t) 0))
  552. /* XXX We don't know anything about the character
  553. we are supposed to match. This means we are
  554. failing. */
  555. goto range_not_matched;
  556. if (is_seqval)
  557. lcollseq = cold;
  558. else
  559. lcollseq = __collseq_table_lookup (collseq, cold);
  560. # else
  561. fcollseq = collseq[fn];
  562. lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
  563. # endif
  564. is_seqval = false;
  565. if (cend == L_('[') && *p == L_('.'))
  566. {
  567. uint32_t nrules =
  568. _NL_CURRENT_WORD (LC_COLLATE,
  569. _NL_COLLATE_NRULES);
  570. const CHAR *startp = p;
  571. size_t c1 = 0;
  572. while (1)
  573. {
  574. c = *++p;
  575. if (c == L_('.') && p[1] == L_(']'))
  576. {
  577. p += 2;
  578. break;
  579. }
  580. if (c == '\0')
  581. return FNM_NOMATCH;
  582. ++c1;
  583. }
  584. if (nrules == 0)
  585. {
  586. /* There are no names defined in the
  587. collation data. Therefore we only
  588. accept the trivial names consisting
  589. of the character itself. */
  590. if (c1 != 1)
  591. return FNM_NOMATCH;
  592. cend = startp[1];
  593. }
  594. else
  595. {
  596. int32_t table_size;
  597. const int32_t *symb_table;
  598. const unsigned char *extra;
  599. int32_t idx;
  600. int32_t elem;
  601. # if WIDE_CHAR_VERSION
  602. CHAR *wextra;
  603. # endif
  604. table_size =
  605. _NL_CURRENT_WORD (LC_COLLATE,
  606. _NL_COLLATE_SYMB_HASH_SIZEMB);
  607. symb_table = (const int32_t *)
  608. _NL_CURRENT (LC_COLLATE,
  609. _NL_COLLATE_SYMB_TABLEMB);
  610. extra = (const unsigned char *)
  611. _NL_CURRENT (LC_COLLATE,
  612. _NL_COLLATE_SYMB_EXTRAMB);
  613. for (elem = 0; elem < table_size; elem++)
  614. if (symb_table[2 * elem] != 0)
  615. {
  616. idx = symb_table[2 * elem + 1];
  617. /* Skip the name of collating
  618. element. */
  619. idx += 1 + extra[idx];
  620. # if WIDE_CHAR_VERSION
  621. /* Skip the byte sequence of the
  622. collating element. */
  623. idx += 1 + extra[idx];
  624. /* Adjust for the alignment. */
  625. idx = (idx + 3) & ~3;
  626. wextra = (CHAR *) &extra[idx + 4];
  627. if (/* Compare the length of the
  628. sequence. */
  629. c1 == wextra[0]
  630. /* Compare the wide char sequence. */
  631. && (__wmemcmp (startp + 1,
  632. &wextra[1], c1)
  633. == 0))
  634. /* Yep, this is the entry. */
  635. break;
  636. # else
  637. if (/* Compare the length of the
  638. sequence. */
  639. c1 == extra[idx]
  640. /* Compare the byte sequence. */
  641. && memcmp (startp + 1,
  642. &extra[idx + 1], c1) == 0)
  643. /* Yep, this is the entry. */
  644. break;
  645. # endif
  646. }
  647. if (elem < table_size)
  648. {
  649. /* Get the collation sequence value. */
  650. is_seqval = true;
  651. # if WIDE_CHAR_VERSION
  652. cend = wextra[1 + wextra[0]];
  653. # else
  654. idx += 1 + extra[idx];
  655. /* Adjust for the alignment. */
  656. idx = (idx + 3) & ~3;
  657. cend = *((int32_t *) &extra[idx]);
  658. # endif
  659. }
  660. else if (c1 == 1)
  661. {
  662. cend = startp[1];
  663. c = *p++;
  664. }
  665. else
  666. return FNM_NOMATCH;
  667. }
  668. }
  669. else
  670. {
  671. if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
  672. cend = *p++;
  673. if (cend == L_('\0'))
  674. return FNM_NOMATCH;
  675. cend = FOLD (cend);
  676. }
  677. /* XXX It is not entirely clear to me how to handle
  678. characters which are not mentioned in the
  679. collation specification. */
  680. if (
  681. # if WIDE_CHAR_VERSION
  682. lcollseq == 0xffffffff ||
  683. # endif
  684. lcollseq <= fcollseq)
  685. {
  686. /* We have to look at the upper bound. */
  687. uint32_t hcollseq;
  688. if (is_seqval)
  689. hcollseq = cend;
  690. else
  691. {
  692. # if WIDE_CHAR_VERSION
  693. hcollseq =
  694. __collseq_table_lookup (collseq, cend);
  695. if (hcollseq == ~((uint32_t) 0))
  696. {
  697. /* Hum, no information about the upper
  698. bound. The matching succeeds if the
  699. lower bound is matched exactly. */
  700. if (lcollseq != fcollseq)
  701. goto range_not_matched;
  702. goto matched;
  703. }
  704. # else
  705. hcollseq = collseq[cend];
  706. # endif
  707. }
  708. if (lcollseq <= hcollseq && fcollseq <= hcollseq)
  709. goto matched;
  710. }
  711. # if WIDE_CHAR_VERSION
  712. range_not_matched:
  713. # endif
  714. #else
  715. /* We use a boring value comparison of the character
  716. values. This is better than comparing using
  717. 'strcoll' since the latter would have surprising
  718. and sometimes fatal consequences. */
  719. UCHAR cend = *p++;
  720. if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
  721. cend = *p++;
  722. if (cend == L_('\0'))
  723. return FNM_NOMATCH;
  724. /* It is a range. */
  725. if ((UCHAR) cold <= fn && fn <= cend)
  726. goto matched;
  727. #endif
  728. c = *p++;
  729. }
  730. }
  731. if (c == L_(']'))
  732. break;
  733. }
  734. if (!not)
  735. return FNM_NOMATCH;
  736. break;
  737. matched:
  738. /* Skip the rest of the [...] that already matched. */
  739. while ((c = *p++) != L_(']'))
  740. {
  741. if (c == L_('\0'))
  742. /* [... (unterminated) loses. */
  743. return FNM_NOMATCH;
  744. if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
  745. {
  746. if (*p == L_('\0'))
  747. return FNM_NOMATCH;
  748. /* XXX 1003.2d11 is unclear if this is right. */
  749. ++p;
  750. }
  751. else if (c == L_('[') && *p == L_(':'))
  752. {
  753. int c1 = 0;
  754. const CHAR *startp = p;
  755. while (1)
  756. {
  757. c = *++p;
  758. if (++c1 == CHAR_CLASS_MAX_LENGTH)
  759. return FNM_NOMATCH;
  760. if (*p == L_(':') && p[1] == L_(']'))
  761. break;
  762. if (c < L_('a') || c >= L_('z'))
  763. {
  764. p = startp - 2;
  765. break;
  766. }
  767. }
  768. p += 2;
  769. }
  770. else if (c == L_('[') && *p == L_('='))
  771. {
  772. c = *++p;
  773. if (c == L_('\0'))
  774. return FNM_NOMATCH;
  775. c = *++p;
  776. if (c != L_('=') || p[1] != L_(']'))
  777. return FNM_NOMATCH;
  778. p += 2;
  779. }
  780. else if (c == L_('[') && *p == L_('.'))
  781. {
  782. while (1)
  783. {
  784. c = *++p;
  785. if (c == L_('\0'))
  786. return FNM_NOMATCH;
  787. if (c == L_('.') && p[1] == L_(']'))
  788. break;
  789. }
  790. p += 2;
  791. }
  792. }
  793. if (not)
  794. return FNM_NOMATCH;
  795. }
  796. break;
  797. case L_('+'):
  798. case L_('@'):
  799. case L_('!'):
  800. if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
  801. {
  802. int res = EXT (c, p, n, string_end, no_leading_period, flags,
  803. alloca_used);
  804. if (res != -1)
  805. return res;
  806. }
  807. goto normal_match;
  808. case L_('/'):
  809. if (NO_LEADING_PERIOD (flags))
  810. {
  811. if (n == string_end || c != (UCHAR) *n)
  812. return FNM_NOMATCH;
  813. new_no_leading_period = true;
  814. break;
  815. }
  816. FALLTHROUGH;
  817. default:
  818. normal_match:
  819. if (n == string_end || c != FOLD ((UCHAR) *n))
  820. return FNM_NOMATCH;
  821. }
  822. no_leading_period = new_no_leading_period;
  823. ++n;
  824. }
  825. if (n == string_end)
  826. return 0;
  827. if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
  828. /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
  829. return 0;
  830. return FNM_NOMATCH;
  831. }
  832. static const CHAR *
  833. END (const CHAR *pattern)
  834. {
  835. const CHAR *p = pattern;
  836. while (1)
  837. if (*++p == L_('\0'))
  838. /* This is an invalid pattern. */
  839. return pattern;
  840. else if (*p == L_('['))
  841. {
  842. /* Handle brackets special. */
  843. if (posixly_correct == 0)
  844. posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
  845. /* Skip the not sign. We have to recognize it because of a possibly
  846. following ']'. */
  847. if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
  848. ++p;
  849. /* A leading ']' is recognized as such. */
  850. if (*p == L_(']'))
  851. ++p;
  852. /* Skip over all characters of the list. */
  853. while (*p != L_(']'))
  854. if (*p++ == L_('\0'))
  855. /* This is no valid pattern. */
  856. return pattern;
  857. }
  858. else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
  859. || *p == L_('!')) && p[1] == L_('('))
  860. {
  861. p = END (p + 1);
  862. if (*p == L_('\0'))
  863. /* This is an invalid pattern. */
  864. return pattern;
  865. }
  866. else if (*p == L_(')'))
  867. break;
  868. return p + 1;
  869. }
  870. static int
  871. EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
  872. bool no_leading_period, int flags, size_t alloca_used)
  873. {
  874. const CHAR *startp;
  875. ptrdiff_t level;
  876. struct patternlist
  877. {
  878. struct patternlist *next;
  879. CHAR malloced;
  880. CHAR str __flexarr;
  881. } *list = NULL;
  882. struct patternlist **lastp = &list;
  883. size_t pattern_len = STRLEN (pattern);
  884. bool any_malloced = false;
  885. const CHAR *p;
  886. const CHAR *rs;
  887. int retval = 0;
  888. /* Parse the pattern. Store the individual parts in the list. */
  889. level = 0;
  890. for (startp = p = pattern + 1; level >= 0; ++p)
  891. if (*p == L_('\0'))
  892. {
  893. /* This is an invalid pattern. */
  894. retval = -1;
  895. goto out;
  896. }
  897. else if (*p == L_('['))
  898. {
  899. /* Handle brackets special. */
  900. if (posixly_correct == 0)
  901. posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
  902. /* Skip the not sign. We have to recognize it because of a possibly
  903. following ']'. */
  904. if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
  905. ++p;
  906. /* A leading ']' is recognized as such. */
  907. if (*p == L_(']'))
  908. ++p;
  909. /* Skip over all characters of the list. */
  910. while (*p != L_(']'))
  911. if (*p++ == L_('\0'))
  912. {
  913. /* This is no valid pattern. */
  914. retval = -1;
  915. goto out;
  916. }
  917. }
  918. else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
  919. || *p == L_('!')) && p[1] == L_('('))
  920. /* Remember the nesting level. */
  921. ++level;
  922. else if (*p == L_(')'))
  923. {
  924. if (level-- == 0)
  925. {
  926. /* This means we found the end of the pattern. */
  927. #define NEW_PATTERN \
  928. struct patternlist *newp; \
  929. size_t plen = (opt == L_('?') || opt == L_('@') \
  930. ? pattern_len : (p - startp + 1UL)); \
  931. idx_t slen = FLEXSIZEOF (struct patternlist, str, 0); \
  932. idx_t new_used = alloca_used + slen; \
  933. idx_t plensize; \
  934. if (INT_MULTIPLY_WRAPV (plen, sizeof (CHAR), &plensize) \
  935. || INT_ADD_WRAPV (new_used, plensize, &new_used)) \
  936. { \
  937. retval = -2; \
  938. goto out; \
  939. } \
  940. slen += plensize; \
  941. bool malloced = ! __libc_use_alloca (new_used); \
  942. if (__glibc_unlikely (malloced)) \
  943. { \
  944. newp = malloc (slen); \
  945. if (newp == NULL) \
  946. { \
  947. retval = -2; \
  948. goto out; \
  949. } \
  950. any_malloced = true; \
  951. } \
  952. else \
  953. newp = alloca_account (slen, alloca_used); \
  954. newp->next = NULL; \
  955. newp->malloced = malloced; \
  956. *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \
  957. *lastp = newp; \
  958. lastp = &newp->next
  959. NEW_PATTERN;
  960. }
  961. }
  962. else if (*p == L_('|'))
  963. {
  964. if (level == 0)
  965. {
  966. NEW_PATTERN;
  967. startp = p + 1;
  968. }
  969. }
  970. assert (list != NULL);
  971. assert (p[-1] == L_(')'));
  972. #undef NEW_PATTERN
  973. switch (opt)
  974. {
  975. case L_('*'):
  976. if (FCT (p, string, string_end, no_leading_period, flags, NULL,
  977. alloca_used) == 0)
  978. goto success;
  979. FALLTHROUGH;
  980. case L_('+'):
  981. do
  982. {
  983. for (rs = string; rs <= string_end; ++rs)
  984. /* First match the prefix with the current pattern with the
  985. current pattern. */
  986. if (FCT (list->str, string, rs, no_leading_period,
  987. flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
  988. NULL, alloca_used) == 0
  989. /* This was successful. Now match the rest with the rest
  990. of the pattern. */
  991. && (FCT (p, rs, string_end,
  992. rs == string
  993. ? no_leading_period
  994. : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
  995. flags & FNM_FILE_NAME
  996. ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
  997. /* This didn't work. Try the whole pattern. */
  998. || (rs != string
  999. && FCT (pattern - 1, rs, string_end,
  1000. rs == string
  1001. ? no_leading_period
  1002. : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
  1003. flags & FNM_FILE_NAME
  1004. ? flags : flags & ~FNM_PERIOD, NULL,
  1005. alloca_used) == 0)))
  1006. /* It worked. Signal success. */
  1007. goto success;
  1008. }
  1009. while ((list = list->next) != NULL);
  1010. /* None of the patterns lead to a match. */
  1011. retval = FNM_NOMATCH;
  1012. break;
  1013. case L_('?'):
  1014. if (FCT (p, string, string_end, no_leading_period, flags, NULL,
  1015. alloca_used) == 0)
  1016. goto success;
  1017. FALLTHROUGH;
  1018. case L_('@'):
  1019. do
  1020. /* I cannot believe it but 'strcat' is actually acceptable
  1021. here. Match the entire string with the prefix from the
  1022. pattern list and the rest of the pattern following the
  1023. pattern list. */
  1024. if (FCT (STRCAT (list->str, p), string, string_end,
  1025. no_leading_period,
  1026. flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
  1027. NULL, alloca_used) == 0)
  1028. /* It worked. Signal success. */
  1029. goto success;
  1030. while ((list = list->next) != NULL);
  1031. /* None of the patterns lead to a match. */
  1032. retval = FNM_NOMATCH;
  1033. break;
  1034. case L_('!'):
  1035. for (rs = string; rs <= string_end; ++rs)
  1036. {
  1037. struct patternlist *runp;
  1038. for (runp = list; runp != NULL; runp = runp->next)
  1039. if (FCT (runp->str, string, rs, no_leading_period,
  1040. flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
  1041. NULL, alloca_used) == 0)
  1042. break;
  1043. /* If none of the patterns matched see whether the rest does. */
  1044. if (runp == NULL
  1045. && (FCT (p, rs, string_end,
  1046. rs == string
  1047. ? no_leading_period
  1048. : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
  1049. flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
  1050. NULL, alloca_used) == 0))
  1051. /* This is successful. */
  1052. goto success;
  1053. }
  1054. /* None of the patterns together with the rest of the pattern
  1055. lead to a match. */
  1056. retval = FNM_NOMATCH;
  1057. break;
  1058. default:
  1059. assert (! "Invalid extended matching operator");
  1060. retval = -1;
  1061. break;
  1062. }
  1063. success:
  1064. out:
  1065. if (any_malloced)
  1066. while (list != NULL)
  1067. {
  1068. struct patternlist *old = list;
  1069. list = list->next;
  1070. if (old->malloced)
  1071. free (old);
  1072. }
  1073. return retval;
  1074. }
  1075. #undef FOLD
  1076. #undef CHAR
  1077. #undef UCHAR
  1078. #undef INT
  1079. #undef FCT
  1080. #undef EXT
  1081. #undef END
  1082. #undef STRUCT
  1083. #undef MEMPCPY
  1084. #undef MEMCHR
  1085. #undef STRLEN
  1086. #undef STRCAT
  1087. #undef L_
  1088. #undef BTOWC
  1089. #undef WIDE_CHAR_VERSION
  1090. #undef FINDIDX