Index: bxl_lex.c =================================================================== --- bxl_lex.c (revision 33615) +++ bxl_lex.c (revision 33616) @@ -16,359 +16,8 @@ #endif /* strtree.h END } */ -/* strtree_exec.c BEGIN { */ -int ureglex_strtree_exec(ureglex_strtree_t *ctx, int chr) -{ - int expected, dst; - for(;;) { - switch(*ctx->ip) { - case ULX_REQ: - expected = ctx->ip[1]; - ctx->ip += 2; - if (chr == expected) - return UREGLEX_STRTREE_MORE; - return -1; - case ULX_BRA: - expected = ctx->ip[1]; - dst = ctx->ip[2]; - ctx->ip += 3; - if (chr == expected) { - ctx->ip = ctx->code + dst; - return UREGLEX_STRTREE_MORE; - } - break; - case ULX_FIN: return ctx->ip[1]; - case ULX_BAD: return -2; - } - } - return -1; -} -/* strtree_exec.c END } */ - -/* exec.h BEGIN { */ -#ifndef UREGLEX_EXEC_COMMON_H -#define UREGLEX_EXEC_COMMON_H -#define MAXTAG 10 -typedef struct ureglex_precomp_s { - const unsigned char *nfa; - const unsigned char *bittab; - const unsigned char *chrtyp; - double weight; -} ureglex_precomp_t; -typedef struct ureglex_s { - ureglex_precomp_t *pc; - const char *bol; - const char *bopat[MAXTAG]; - const char *eopat[MAXTAG]; - int score; - const char *endp; - union { const void *ptr; int i; } pmstk[30]; - int pmsp; - const unsigned char *pm_ap; - const char *pm_lp; - int pm_c; - const char *pm_bp; - const char *pm_ep; - const char *pm_are; - const char *ex_lp; - unsigned char ex_c; - int ex_loop, pm_loop, pm_loop2, pm_loop2_later; - int exec_state; -} ureglex_t; -typedef enum { - UREGLEX_MORE = -1, - UREGLEX_TOO_LONG = -2, - UREGLEX_NO_MATCH = -3, - UREGLEX_NOP = -4 -} ureglex_error_t; -extern const unsigned char ureglex_nfa_str[]; -#define ULX_BUF ctx->buff -#define ULX_TAGP(n) (ctx->state[ruleid].bopat[(n)]) -#define ULX_TAGL(n) (ctx->state[ruleid].eopat[(n)] - ctx->state[ruleid].bopat[(n)]) -#define ULX_IGNORE goto ureglex_ignore; -#endif -/* exec.h END } */ - -/* common.h, exec.c BEGIN { */ -#define MAXCHR 128 -#define CHRBIT 8 -#define BITBLK MAXCHR/CHRBIT -#define BLKIND 0170 -#define BITIND 07 -void ureglex_exec_init(ureglex_t *re, const char *str, int buff_used); -extern int ureglex_exec(ureglex_t *re); -extern int ureglex_tag(ureglex_t *re, int tagid, char **begin, char **end); -enum ureglex_opcode_e { - NOP = 0, - END = 0, - CHR = 1, - ANY = 2, - CCL = 3, - BOL = 4, - EOL = 5, - BOT = 6, - EOT = 7, - BOW = 8, - EOW = 9, - REF = 10, - CLO = 11 -}; -#include -#include -#define re_tolower(c) c -#define iswordc(r,x) r->pc->chrtyp[inascii(x)] -#define end(s) (*(s) == '\0') -static unsigned char ureglex_bitarr[] = {1,2,4,8,16,32,64,128}; -#define inascii(x) (0177&(x)) -#define isinset(x,y) ((x)[((y)&BLKIND)>>3] & ureglex_bitarr[(y)&BITIND]) -const unsigned char ureglex_nfa_str[] = {0}; -#define ANYSKIP 2 -#define CHRSKIP 3 -#define CCLSKIP 18 -static const char MORE[] = "more!"; -#define want_more(loopid) \ -do { \ - if (r->pm_lp < r->endp) \ - goto loop ## loopid; \ - r->pm_loop = loopid; \ - return MORE; \ -} while(0) -#define PUSH(r,ty,val) if (r->pmsp >= sizeof(r->pmstk) / sizeof(r->pmstk[0])) return 0; r->pmstk[r->pmsp++].ty = val -#define POP(r,ty,dst) dst = r->pmstk[--r->pmsp].ty -static const char *pmatch(ureglex_t *r) -{ - register int op, c, n; - register const char *e = NULL; - switch(r->pm_loop) { - case 1: r->pm_loop = 0; goto loop1; - case 2: r->pm_loop = 0; goto loop2; - case 3: r->pm_loop = 0; goto loop3; - case 4: r->pm_loop = 0; goto loop4; - case 6: r->pm_loop = 0; goto loop6; - case 7: r->pm_loop = 0; goto loop7; - case 8: r->pm_loop = 0; goto loop8; - } - switch(r->pm_loop2) { - case 1: r->pm_loop2 = 0; goto loop2_1; - } - while ((op = *r->pm_ap++) != END) - switch(op) { - case CHR: - want_more(6); - loop6:; - if (re_tolower(*r->pm_lp++) != *r->pm_ap++) - return 0; - r->score += 100; - break; - case ANY: - want_more(7); - loop7:; - if (end(r->pm_lp++)) - return 0; - r->score++; - break; - case CCL: - if (end(r->pm_lp)) - return 0; - want_more(8); - loop8:; - c = re_tolower(*r->pm_lp++); - if (!isinset(r->pm_ap,c)) - return 0; - r->pm_ap += BITBLK; - r->score += 2; - break; - case BOL: - if (r->pm_lp != r->bol) - return 0; - r->score += 10; - break; - case EOL: - if (!end(r->pm_lp)) - return 0; - r->score += 10; - break; - case BOT: - r->bopat[*r->pm_ap++] = r->pm_lp; - break; - case EOT: - r->eopat[*r->pm_ap++] = r->pm_lp; - break; - case BOW: - if ((r->pm_lp!=r->bol && iswordc(r, r->pm_lp[-1])) || !iswordc(r, *r->pm_lp)) - return 0; - r->score += 5; - break; - case EOW: - if (r->pm_lp==r->bol || !iswordc(r, r->pm_lp[-1]) || (!end(r->pm_lp) && iswordc(r, *r->pm_lp))) - return 0; - r->score += 5; - break; - case REF: - n = *r->pm_ap++; - r->pm_bp = r->bopat[n]; - r->pm_ep = r->eopat[n]; - while (r->pm_bp < r->pm_ep) { - want_more(1); - loop1:; - if (*r->pm_bp++ != *r->pm_lp++) - return 0; - r->score += 2; - } - break; - case CLO: - r->pm_are = r->pm_lp; - switch(*r->pm_ap) { - case ANY: - do { - want_more(2); - loop2:; - } while(!end(r->pm_lp++)); - n = ANYSKIP; - r->score++; - break; - case CHR: - r->pm_c = *(r->pm_ap+1); - do { - want_more(3); - loop3:; - } while (!end(r->pm_lp) && r->pm_c == re_tolower(*r->pm_lp) && (r->pm_lp++)); - n = CHRSKIP; - r->score += 100; - break; - case CCL: - do { - want_more(4); - loop4:; - } while ((c = re_tolower(*r->pm_lp)) && isinset(r->pm_ap+1,c) && (r->pm_lp++)); - n = CCLSKIP; - r->score += 2; - break; - default: - return 0; - } - r->pm_ap += n; - while (r->pm_lp >= r->pm_are) { - PUSH(r, ptr, r->pm_ap); - PUSH(r, ptr, r->pm_lp); - PUSH(r, i, r->pm_loop); - r->pm_loop2_later = 1; - e = pmatch(r); - if (e == MORE) - return MORE; - loop2_1:; - POP(r, i, r->pm_loop); - POP(r, ptr, r->pm_lp); - POP(r, ptr, r->pm_ap); - if (e) - return e; - --r->pm_lp; - } - return 0; - default: - return 0; - } - r->pm_loop2 = r->pm_loop2_later; - return r->pm_lp; -} -void ureglex_exec_init(ureglex_t *r, const char *lp, int buff_used) -{ - r->bol = lp; - r->score = 1; - memset(r->bopat, 0, (char *)&r->eopat[MAXTAG] - (char *)&r->bopat[0]); - r->pmsp = 0; - r->ex_lp = lp; - r->endp = lp + buff_used; - r->ex_loop = r->pm_loop = r->pm_loop2 = 0; - r->exec_state = -1; -} -#undef want_more -#define want_more(loopid) \ -do { \ - if (r->ex_lp < r->endp) \ - goto loop ## loopid; \ - r->ex_loop = loopid; \ - return -1; \ -} while(0) -#define want_more2(loopid) \ -do { \ - if (r->pm_lp < r->endp) \ - goto loop ## loopid; \ - r->ex_loop = loopid; \ - return -1; \ -} while(0) -int ureglex_exec(ureglex_t *r) -{ - register const char *ep = 0; - const unsigned char *ap = r->pc->nfa; - r->endp++; - switch(r->ex_loop) { - case 1: r->ex_loop = 0; goto loop1; - case 2: r->ex_loop = 0; goto loop2; - case 3: r->ex_loop = 0; goto loop3; - case 4: r->ex_loop = 0; goto loop4; - } - switch(*ap) { - case BOL: - r->pm_ap = ap; - r->pm_lp = r->ex_lp; - loop1:; - ep = pmatch(r); - if (ep == MORE) - want_more2(1); - break; - case CHR: - r->ex_c = *(ap+1); - while (!end(r->ex_lp) && re_tolower(*r->ex_lp) != r->ex_c) { - r->ex_lp++; - want_more(2); - loop2:; - } - if (end(r->ex_lp)) - return 0; - default: - for(;;) { - r->pm_ap = ap; - r->pm_lp = r->ex_lp; - loop3:; - ep = pmatch(r); - if (ep == MORE) { - want_more2(3); - } - if (ep != NULL) - break; - r->ex_lp++; - want_more(4); - loop4:; - if (end(r->ex_lp)) - break; - } - break; - case END: - return 0; - } - if (!ep) - return 0; - r->bopat[0] = r->ex_lp; - r->eopat[0] = ep; - return r->score; -} -#define setout(dest,val) \ - if ((dest) != NULL) \ - *(dest) = val; -int ureglex_tag(ureglex_t *re, int tagid, char **begin, char **end) -{ - if ((tagid < 0) || (tagid > MAXTAG)) { - setout(begin, NULL); - setout(end, NULL); - return -1; - } - setout(begin, (char *)re->bopat[tagid]); - setout(end, (char *)re->eopat[tagid]); - return 0; -} -/* common.h, exec.c END } */ - +#include +#include "ureglex/exec.h" int pcb_bxl_strings[] = {2,51,266,2,65,169,2,67,75,2,68,243,2,69,58,2,70,259,2,71,224,2,72,252,2,73,153,2,74,176,2,76,191,2,78,217,2,79,198,2,80,137,2,82,162,2,83,94,2,84,121,2,86,110,2,87,128,4,1,110,1,100,2,67,305,2,68,344,2,80,337,2,83,324,4,1,111,1,109,1,112,1,111,1,110,1,101,1,110,1,116,3,61,4,2,104,832,2,116,800,2,117,819,2,119,781,2,121,770,4,1,97,1,114,2,68,897,2,78,906,4,2,101,843,2,114,850,4,1,105,2,100,924,2,122,915,4,2,97,631,2,105,638,2,108,645,2,111,624,2,114,609,4,1,115,2,70,485,2,86 ,500,4,2,97,748,2,111,759,4,2,114,279,2,116,284,4,1,117,1,115,1,116,1,105,1,102,1,121,3,46,4,2,97,522,2,105,515,4,1,114,1,105,1,103,1,105,1,110,2,80,566,2,97,555,3,37,4,2,111,542,2,117,531,4,1,108,1,117,1,101,1,80,1,111,1,105,1,110,1,116,3,31,4,1,97,1,116,1,97,3,27,4,2,101,459,2,111,470,4,2,97,407,2,111,393,4,1,68,1,95,1,68,1,88,1,70,3,6,4,1,99,3,47,4,1,116,1,114,2,105,294,3,45,4,1,98,1,117,1,116,1,101,3,44,4,1,111,1,109,1,112,1,111,1,110,1,101,1,110,1,116,3,62,4,1,121,1,109,1,98,1,111,1,108,3,60 ,4,2,97,362,2,111,353,4,1,97,1,116,1,97,3,28,4,1,105,1,110,1,116,3,42,4,2,100,380,2,116,369,4,1,116,1,101,1,114,1,110,3,26,4,1,83,1,116,1,97,1,99,1,107,3,14,4,1,110,1,116,2,67,429,2,72,416,2,87,448,4,1,108,1,115,1,101,3,8,4,1,101,1,105,1,103,1,104,1,116,3,12,4,1,104,1,97,1,114,1,87,1,105,1,100,1,116,1,104,3,11,4,1,105,1,100,1,116,1,104,3,10,4,1,105,1,103,1,104,1,116,3,22,4,1,108,1,101,1,68,1,105,1,97,1,109,3,17,4,1,108,1,105,1,112,1,112,1,101,1,100,3,53,4,1,105,1,115,1,105,1,98,1,108,1,101,3,52,4 Index: bxl_lex.h =================================================================== --- bxl_lex.h (revision 33615) +++ bxl_lex.h (revision 33616) @@ -49,6 +49,8 @@ #define ULX_TAGP(n) (ctx->state[ruleid].bopat[(n)]) #define ULX_TAGL(n) (ctx->state[ruleid].eopat[(n)] - ctx->state[ruleid].bopat[(n)]) #define ULX_IGNORE goto ureglex_ignore; +void ureglex_exec_init(ureglex_t *r, const char *lp, int buff_used); +int ureglex_exec(ureglex_t *r); #endif /* exec.h END } */ @@ -73,5 +75,5 @@ #ifndef URELGLEX_EXEC_pcb_bxl_H #define URELGLEX_EXEC_pcb_bxl_H extern ureglex_precomp_t pcb_bxl_rules[]; -#define URELGLEX_EXEC_pcb_bxl_HAS_COMMON 1 +#define URELGLEX_EXEC_pcb_bxl_HAS_COMMON 0 #endif