Home | History | Annotate | Download | only in lib

Lines Matching defs:dfa

28 static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
33 static reg_errcode_t create_initial_state (re_dfa_t *dfa);
35 static void optimize_utf8 (re_dfa_t *dfa);
51 static Idx duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint);
52 static Idx search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
54 static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
55 static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
57 static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
77 re_dfa_t *dfa, re_token_t *token,
79 static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
85 re_dfa_t *dfa,
110 static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
115 static bin_tree_t *create_tree (re_dfa_t *dfa,
118 static bin_tree_t *create_token_tree (re_dfa_t *dfa,
121 static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
281 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
285 re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
286 if (dfa->init_state != dfa->init_state_word)
287 re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
288 if (dfa->init_state != dfa->init_state_nl)
289 re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
290 if (dfa->init_state != dfa->init_state_begbuf)
291 re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
315 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
317 bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
321 re_token_type_t type = dfa->nodes[node].type;
325 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
327 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
335 *p++ = dfa->nodes[node].opr.c;
336 while (++node < dfa->nodes_len
337 && dfa->nodes[node].type == CHARACTER
338 && dfa->nodes[node].mb_partial)
339 *p++ = dfa->nodes[node].opr.c;
355 bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
364 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
389 if (dfa->mb_cur_max > 1
417 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
613 free_dfa_content (re_dfa_t *dfa)
617 if (dfa->nodes)
618 for (i = 0; i < dfa->nodes_len; ++i)
619 free_token (dfa->nodes + i);
620 re_free (dfa->nexts);
621 for (i = 0; i < dfa->nodes_len; ++i)
623 if (dfa->eclosures != NULL)
624 re_node_set_free (dfa->eclosures + i);
625 if (dfa->inveclosures != NULL)
626 re_node_set_free (dfa->inveclosures + i);
627 if (dfa->edests != NULL)
628 re_node_set_free (dfa->edests + i);
630 re_free (dfa->edests);
631 re_free (dfa->eclosures);
632 re_free (dfa->inveclosures);
633 re_free (dfa->nodes);
635 if (dfa->state_table)
636 for (i = 0; i <= dfa->state_hash_mask; ++i)
638 struct re_state_table_entry *entry = dfa->state_table + i;
646 re_free (dfa->state_table);
648 if (dfa->sb_char != utf8_sb_map)
649 re_free (dfa->sb_char);
651 re_free (dfa->subexp_map);
653 re_free (dfa->re_str);
656 re_free (dfa);
666 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
667 if (BE (dfa != NULL, 1))
668 free_dfa_content (dfa);
762 re_dfa_t *dfa;
774 /* Initialize the dfa. */
775 dfa = (re_dfa_t *) preg->buffer;
782 dfa = re_realloc (preg->buffer, re_dfa_t, 1);
783 if (dfa == NULL)
786 preg->buffer = (unsigned char *) dfa;
790 err = init_dfa (dfa, length);
793 free_dfa_content (dfa);
800 dfa->re_str = re_malloc (char, length + 1);
801 strncpy (dfa->re_str, pattern, length + 1);
804 __libc_lock_init (dfa->lock);
807 (syntax & RE_ICASE) != 0, dfa);
813 free_dfa_content (dfa);
821 dfa->str_tree = parse (&regexp, preg, syntax, &err);
822 if (BE (dfa->str_tree == NULL, 0))
832 if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
833 optimize_utf8 (dfa);
836 /* Then create the initial state of the dfa. */
837 err = create_initial_state (dfa);
845 free_dfa_content (dfa);
853 /* Initialize DFA. We use the length of the regular expression PAT_LEN
857 init_dfa (re_dfa_t *dfa, size_t pat_len)
872 memset (dfa, '\0', sizeof (re_dfa_t));
875 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
884 dfa->nodes_alloc = pat_len + 1;
885 dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
892 dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
893 dfa->state_hash_mask = table_size - 1;
895 dfa->mb_cur_max = MB_CUR_MAX;
897 if (dfa->mb_cur_max == 6
899 dfa->is_utf8 = 1;
900 dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
904 dfa->is_utf8 = 1;
908 dfa->map_notascii = 0;
912 if (dfa->mb_cur_max > 1)
914 if (dfa->is_utf8)
915 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
920 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
921 if (BE (dfa->sb_char == NULL, 0))
930 dfa->sb_char[i] |= (bitset_word_t) 1 << j;
933 dfa->map_notascii = 1;
940 if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
951 init_word_char (re_dfa_t *dfa)
954 dfa->word_ops_used = 1;
958 dfa->word_char[i] |= (bitset_word_t) 1 << j;
966 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
968 for (storage = dfa->str_tree_storage; storage; storage = next)
973 dfa->str_tree_storage = NULL;
974 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
975 dfa->str_tree = NULL;
976 re_free (dfa->org_indices);
977 dfa->org_indices = NULL;
983 create_initial_state (re_dfa_t *dfa)
991 first = dfa->str_tree->first->node_idx;
992 dfa->init_node = first;
993 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
1001 if (dfa->nbackref > 0)
1005 re_token_type_t type = dfa->nodes[node_idx].type;
1013 clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
1015 && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
1023 Idx dest_idx = dfa->edests[node_idx].elems[0];
1026 re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
1033 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
1035 if (BE (dfa->init_state == NULL, 0))
1037 if (dfa->init_state->has_constraint)
1039 dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
1041 dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
1043 dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
1047 if (BE (dfadfa->init_state_nl == NULL
1048 || dfa->init_state_begbuf == NULL, 0))
1052 dfa->init_state_word = dfa->init_state_nl
1053 = dfa->init_state_begbuf = dfa->init_state;
1062 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
1063 DFA nodes where needed. */
1066 optimize_utf8 (re_dfa_t *dfa)
1073 for (node = 0; node < dfa->nodes_len; ++node)
1074 switch (dfa->nodes[node].type)
1077 if (dfa->nodes[node].opr.c >= ASCII_CHARS)
1081 switch (dfa->nodes[node].opr.ctx_type)
1090 opr.ctx_type since constraints (for all DFA nodes) are
1115 if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0)
1126 for (node = 0; node < dfa->nodes_len; ++node)
1128 if (dfa->nodes[node].type == CHARACTER
1129 && dfa->nodes[node].opr.c >= ASCII_CHARS)
1130 dfa->nodes[node].mb_partial = 0;
1131 else if (dfa->nodes[node].type == OP_PERIOD)
1132 dfa->nodes[node].type = OP_UTF8_PERIOD;
1136 dfa->mb_cur_max = 1;
1137 dfa->is_utf8 = 0;
1138 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
1149 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1153 dfa->nexts = re_malloc (Idx, dfa->nodes_alloc);
1154 dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc);
1155 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
1156 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
1157 if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
1158 || dfa->eclosures == NULL, 0))
1161 dfa->subexp_map = re_malloc (Idx, preg->re_nsub);
1162 if (dfa->subexp_map != NULL)
1166 dfa->subexp_map[i] = i;
1167 preorder (dfa->str_tree, optimize_subexps, dfa);
1169 if (dfa->subexp_map[i] != i)
1173 free (dfa->subexp_map);
1174 dfa->subexp_map = NULL;
1178 ret = postorder (dfa->str_tree, lower_subexps, preg);
1181 ret = postorder (dfa->str_tree, calc_first, dfa);
1184 preorder (dfa->str_tree, calc_next, dfa);
1185 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
1188 ret = calc_eclosure (dfa);
1194 if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
1195 || dfa->nbackref)
1197 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
1198 if (BE (dfa->inveclosures == NULL, 0))
1200 ret = calc_inveclosure (dfa);
1277 re_dfa_t *dfa = (re_dfa_t *) extra;
1279 if (node->token.type == OP_BACK_REF && dfa->subexp_map)
1282 node->token.opr.idx = dfa->subexp_map[idx];
1283 dfa->used_bkref_map |= 1 << node->token.opr.idx;
1295 dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
1297 dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
1330 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1341 || !(dfa->used_bkref_map
1347 op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
1348 cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
1349 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
1350 tree = create_tree (dfa, op, tree1, CONCAT);
1367 re_dfa_t *dfa = (re_dfa_t *) extra;
1376 node->node_idx = re_dfa_add_node (dfa, node->token);
1380 dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
1408 /* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
1412 re_dfa_t *dfa = (re_dfa_t *) extra;
1429 dfa->has_plural_match = 1;
1440 err = re_node_set_init_2 (dfa->edests + idx, left, right);
1447 err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
1451 dfa->nexts[idx] = node->next->node_idx;
1453 re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
1458 dfa->nexts[idx] = node->next->node_idx;
1471 duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
1480 if (dfa->nodes[org_node].type == OP_BACK_REF)
1486 org_dest = dfa->nexts[org_node];
1487 re_node_set_empty (dfa->edests + clone_node);
1488 clone_dest = duplicate_node (dfa, org_dest, constraint);
1491 dfa->nexts[clone_node] = dfa->nexts[org_node];
1492 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1496 else if (dfa->edests[org_node].nelem == 0)
1501 dfa->nexts[clone_node] = dfa->nexts[org_node];
1504 else if (dfa->edests[org_node].nelem == 1)
1508 org_dest = dfa->edests[org_node].elems[0];
1509 re_node_set_empty (dfa->edests + clone_node);
1510 clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1515 ok = re_node_set_insert (dfa->edests + clone_node, org_dest);
1521 constraint |= dfa->nodes[org_node].constraint;
1522 clone_dest = duplicate_node (dfa, org_dest, constraint);
1525 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1529 else /* dfa->edests[org_node].nelem == 2 */
1532 destinations. In the bin_tree_t and DFA, that's '|' and '*'. */
1533 org_dest = dfa->edests[org_node].elems[0];
1534 re_node_set_empty (dfa->edests + clone_node);
1536 clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1541 clone_dest = duplicate_node (dfa, org_dest, constraint);
1544 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1547 err = duplicate_node_closure (dfa, org_dest, clone_dest,
1556 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1561 org_dest = dfa->edests[org_node].elems[1];
1562 clone_dest = duplicate_node (dfa, org_dest, constraint);
1565 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1579 search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
1583 for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
1585 if (org_node == dfa->org_indices[idx]
1586 && constraint == dfa->nodes[idx].constraint)
1597 duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint)
1599 Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
1602 dfa->nodes[dup_idx].constraint = constraint;
1603 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
1604 dfa->nodes[dup_idx].duplicated = 1;
1607 dfa->org_indices[dup_idx] = org_idx;
1613 dfa)
1617 for (idx = 0; idx < dfa->nodes_len; ++idx)
1618 re_node_set_init_empty (dfa->inveclosures + idx);
1620 for (src = 0; src < dfa->nodes_len; ++src)
1622 Idx *elems = dfa->eclosures[src].elems;
1623 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
1625 ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
1634 /* Calculate "eclosure" for all the node in DFA. */
1637 calc_eclosure (re_dfa_t *dfa)
1642 assert (dfa->nodes_len > 0);
1650 if (node_idx == dfa->nodes_len)
1659 assert (dfa->eclosures[node_idx].nelem != REG_MISSING);
1663 if (dfa->eclosures[node_idx].nelem != 0)
1666 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true);
1670 if (dfa->eclosures[node_idx].nelem == 0)
1682 calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1690 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
1696 dfa->eclosures[node].nelem = REG_MISSING;
1700 if (dfa->nodes[node].constraint
1701 && dfa->edests[node].nelem
1702 && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
1704 err = duplicate_node_closure (dfa, node, node, node,
1705 dfa->nodes[node].constraint);
1711 if (IS_EPSILON_NODE(dfa->nodes[node].type))
1712 for (i = 0; i < dfa->edests[node].nelem; ++i)
1715 Idx edest = dfa->edests[node].elems[i];
1718 if (dfa->eclosures[edest].nelem == REG_MISSING)
1725 if (dfa->eclosures[edest].nelem == 0)
1727 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false);
1732 eclosure_elem = dfa->eclosures[edest];
1737 if (dfa->eclosures[edest].nelem == 0)
1749 dfa->eclosures[node].nelem = 0;
1751 dfa->eclosures[node] = eclosure;
2113 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2116 dfa->syntax = syntax;
2121 eor = create_tree (dfa, NULL, NULL, END_OF_RE);
2123 root = create_tree (dfa, tree, eor, CONCAT);
2147 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2165 tree = create_tree (dfa, tree, branch, OP_ALT);
2189 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2204 tree = create_tree (dfa, tree, expr, CONCAT);
2228 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2233 tree = create_token_tree (dfa, NULL, NULL, token);
2240 if (dfa->mb_cur_max > 1)
2247 mbc_remain = create_token_tree (dfa, NULL, NULL, token);
2248 tree = create_tree (dfa, tree, mbc_remain, CONCAT);
2264 tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
2269 if (!BE (dfa
2274 dfa->used_bkref_map |= 1 << token->opr.idx;
2275 tree = create_token_tree (dfa, NULL, NULL, token);
2281 ++dfa->nbackref;
2282 dfa->has_mb_node = 1;
2320 tree = create_token_tree (dfa, NULL, NULL, token);
2330 && dfa->word_ops_used == 0)
2331 init_word_char (dfa);
2339 tree_first = create_token_tree (dfa, NULL, NULL, token);
2345 tree_first = create_token_tree (dfa, NULL, NULL, token);
2348 tree_last = create_token_tree (dfa, NULL, NULL, token);
2349 tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
2358 tree = create_token_tree (dfa, NULL, NULL, token);
2372 tree = create_token_tree (dfa, NULL, NULL, token);
2378 if (dfa->mb_cur_max > 1)
2379 dfa->has_mb_node = 1;
2383 tree = build_charclass_op (dfa, regexp->trans,
2392 tree = build_charclass_op (dfa, regexp->trans,
2417 tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
2444 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2464 dfa->completed_bkref_map |= 1 << cur_nsub;
2466 tree = create_tree (dfa, tree, NULL, SUBEXP);
2479 parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2558 elem = duplicate_tree (elem, dfa);
2559 tree = create_tree (dfa, tree, elem, CONCAT);
2568 elem = duplicate_tree (elem, dfa);
2577 tree = create_tree (dfa, elem, NULL,
2588 elem = duplicate_tree (elem, dfa);
2589 tree = create_tree (dfa, tree, elem, CONCAT);
2593 tree = create_tree (dfa, tree, NULL, OP_ALT);
2599 tree = create_tree (dfa, old_tree, tree, CONCAT);
2673 no MBCSET if dfa->mb_cur_max == 1. */
2764 parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
2916 if (nrules > 0 || dfa->mb_cur_max > 1)
3110 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
3154 dfa, syntax, true);
3169 dfa->mb_cur_max > 1 ? mbcset : NULL,
3253 if (dfa->mb_cur_max > 1)
3254 bitset_mask (sbcset, dfa->sb_char);
3257 || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
3263 dfa->has_mb_node = 1;
3266 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3279 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3284 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
3303 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3323 re_token_t *token, int token_len, re_dfa_t *dfa,
3584 build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3646 if (dfa->mb_cur_max > 1)
3647 bitset_mask (sbcset, dfa->sb_char);
3653 tree = create_token_tree (dfa, NULL, NULL, &br_token);
3658 if (dfa->mb_cur_max > 1)
3664 dfa->has_mb_node = 1;
3665 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3669 tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
3741 create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3746 return create_token_tree (dfa, left, right, &t);
3750 create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3754 if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
3760 storage->next = dfa->str_tree_storage;
3761 dfa->str_tree_storage = storage;
3762 dfa->str_tree_storage_idx = 0;
3764 tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
3827 duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
3836 *p_new = create_token_tree (dfa, NULL, NULL, &node->token);