char *zBuf, /* The buffer to which text is added */
int *pnUsed, /* Slots of the buffer used so far */
const char *zIn, /* Text to add */
- int nIn /* Bytes of text to add. -1 to use strlen() */
+ int nIn, /* Bytes of text to add. -1 to use strlen() */
+ int iWidth /* Field width. Negative to left justify */
){
if( nIn<0 ) for(nIn=0; zIn[nIn]; nIn++){}
+ while( iWidth>nIn ){ zBuf[*(pnUsed++)] = ' '; iWidth--; }
if( nIn==0 ) return;
memcpy(&zBuf[*pnUsed], zIn, nIn);
*pnUsed += nIn;
+ while( (-iWidth)>nIn ){ zBuf[*(pnUsed++)] = ' '; iWidth++; }
zBuf[*pnUsed] = 0;
}
static int lemon_vsprintf(char *str, const char *zFormat, va_list ap){
str[0] = 0;
for(i=j=0; (c = zFormat[i])!=0; i++){
if( c=='%' ){
- lemon_addtext(str, &nUsed, &zFormat[j], i-j);
+ int iWidth = 0;
+ lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0);
c = zFormat[++i];
+ if( isdigit(c) || (c=='-' && isdigit(zFormat[i+1])) ){
+ if( c=='-' ) i++;
+ while( isdigit(zFormat[i]) ) iWidth = iWidth*10 + zFormat[i++] - '0';
+ if( c=='-' ) iWidth = -iWidth;
+ c = zFormat[i];
+ }
if( c=='d' ){
int v = va_arg(ap, int);
if( v<0 ){
- lemon_addtext(str, &nUsed, "-", 1);
+ lemon_addtext(str, &nUsed, "-", 1, iWidth);
v = -v;
}else if( v==0 ){
- lemon_addtext(str, &nUsed, "0", 1);
+ lemon_addtext(str, &nUsed, "0", 1, iWidth);
}
k = 0;
while( v>0 ){
zTemp[sizeof(zTemp)-k] = (v%10) + '0';
v /= 10;
}
- lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp)-k], k);
+ lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp)-k], k, iWidth);
}else if( c=='s' ){
z = va_arg(ap, const char*);
- lemon_addtext(str, &nUsed, z, -1);
+ lemon_addtext(str, &nUsed, z, -1, iWidth);
}else if( c=='.' && memcmp(&zFormat[i], ".*s", 3)==0 ){
i += 2;
k = va_arg(ap, int);
z = va_arg(ap, const char*);
- lemon_addtext(str, &nUsed, z, k);
+ lemon_addtext(str, &nUsed, z, k, iWidth);
}else if( c=='%' ){
- lemon_addtext(str, &nUsed, "%", 1);
+ lemon_addtext(str, &nUsed, "%", 1, 0);
}else{
fprintf(stderr, "illegal format\n");
exit(1);
j = i+1;
}
}
- lemon_addtext(str, &nUsed, &zFormat[j], i-j);
+ lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0);
return nUsed;
}
static int lemon_sprintf(char *str, const char *format, ...){
}
/* Count and index the symbols of the grammar */
- lem.nsymbol = Symbol_count();
Symbol_new("{default}");
+ lem.nsymbol = Symbol_count();
lem.symbols = Symbol_arrayof();
- for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
- qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*), Symbolcmpp);
- for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
+ for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i;
+ qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp);
+ for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i;
+ while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; }
+ assert( strcmp(lem.symbols[i-1]->name,"{default}")==0 );
+ lem.nsymbol = i - 1;
for(i=1; isupper(lem.symbols[i]->name[0]); i++);
lem.nterminal = i;
WAITING_FOR_DESTRUCTOR_SYMBOL,
WAITING_FOR_DATATYPE_SYMBOL,
WAITING_FOR_FALLBACK_ID,
- WAITING_FOR_WILDCARD_ID
+ WAITING_FOR_WILDCARD_ID,
+ WAITING_FOR_CLASS_ID,
+ WAITING_FOR_CLASS_TOKEN
};
struct pstate {
char *filename; /* Name of the input file */
struct lemon *gp; /* Global state vector */
enum e_state state; /* The state of the parser */
struct symbol *fallback; /* The fallback token */
+ struct symbol *tkclass; /* Token class symbol */
struct symbol *lhs; /* Left-hand side of current rule */
const char *lhsalias; /* Alias for the LHS */
int nrhs; /* Number of right-hand side symbols seen */
psp->state = WAITING_FOR_FALLBACK_ID;
}else if( strcmp(x,"wildcard")==0 ){
psp->state = WAITING_FOR_WILDCARD_ID;
+ }else if( strcmp(x,"token_class")==0 ){
+ psp->state = WAITING_FOR_CLASS_ID;
}else{
ErrorMsg(psp->filename,psp->tokenlineno,
"Unknown declaration keyword: \"%%%s\".",x);
}
}
break;
+ case WAITING_FOR_CLASS_ID:
+ if( !islower(x[0]) ){
+ ErrorMsg(psp->filename, psp->tokenlineno,
+ "%%token_class must be followed by an identifier: ", x);
+ psp->errorcnt++;
+ psp->state = RESYNC_AFTER_DECL_ERROR;
+ }else if( Symbol_find(x) ){
+ ErrorMsg(psp->filename, psp->tokenlineno,
+ "Symbol \"%s\" already used", x);
+ psp->errorcnt++;
+ psp->state = RESYNC_AFTER_DECL_ERROR;
+ }else{
+ psp->tkclass = Symbol_new(x);
+ psp->tkclass->type = MULTITERMINAL;
+ psp->state = WAITING_FOR_CLASS_TOKEN;
+ }
+ break;
+ case WAITING_FOR_CLASS_TOKEN:
+ if( x[0]=='.' ){
+ psp->state = WAITING_FOR_DECL_OR_RULE;
+ }else if( isupper(x[0]) || ((x[0]=='|' || x[0]=='/') && isupper(x[1])) ){
+ struct symbol *msp = psp->tkclass;
+ msp->nsubsym++;
+ msp->subsym = (struct symbol **) realloc(msp->subsym,
+ sizeof(struct symbol*)*msp->nsubsym);
+ if( !isupper(x[0]) ) x++;
+ msp->subsym[msp->nsubsym-1] = Symbol_new(x);
+ }else{
+ ErrorMsg(psp->filename, psp->tokenlineno,
+ "%%token_class argument \"%s\" should be a token", x);
+ psp->errorcnt++;
+ psp->state = RESYNC_AFTER_DECL_ERROR;
+ }
+ break;
case RESYNC_AFTER_RULE_ERROR:
/* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
** break; */
printf(" ::=");
for(i=0; i<rp->nrhs; i++){
sp = rp->rhs[i];
- printf(" %s", sp->name);
if( sp->type==MULTITERMINAL ){
+ printf(" %s", sp->subsym[0]->name);
for(j=1; j<sp->nsubsym; j++){
printf("|%s", sp->subsym[j]->name);
}
+ }else{
+ printf(" %s", sp->name);
}
/* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */
}
if( i==cfp->dot ) fprintf(fp," *");
if( i==rp->nrhs ) break;
sp = rp->rhs[i];
- fprintf(fp," %s", sp->name);
if( sp->type==MULTITERMINAL ){
+ fprintf(fp," %s", sp->subsym[0]->name);
for(j=1; j<sp->nsubsym; j++){
fprintf(fp,"|%s",sp->subsym[j]->name);
}
+ }else{
+ fprintf(fp," %s", sp->name);
}
}
}
fprintf(out,"%s ::=", rp->lhs->name);
for(j=0; j<rp->nrhs; j++){
struct symbol *sp = rp->rhs[j];
- fprintf(out," %s", sp->name);
- if( sp->type==MULTITERMINAL ){
+ if( sp->type!=MULTITERMINAL ){
+ fprintf(out," %s", sp->name);
+ }else{
int k;
+ fprintf(out," %s", sp->subsym[0]->name);
for(k=1; k<sp->nsubsym; k++){
fprintf(out,"|%s",sp->subsym[k]->name);
}
if( in ){
int nextChar;
for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){
- lemon_sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
+ lemon_sprintf(pattern,"#define %s%-30s %3d\n",
+ prefix,lemp->symbols[i]->name,i);
if( strcmp(line,pattern) ) break;
}
nextChar = fgetc(in);
out = file_open(lemp,".h","wb");
if( out ){
for(i=1; i<lemp->nterminal; i++){
- fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
+ fprintf(out,"#define %s%-30s %3d\n",prefix,lemp->symbols[i]->name,i);
}
fclose(out);
}
return sp;
}
-/* Compare two symbols for working purposes
+/* Compare two symbols for sorting purposes. Return negative,
+** zero, or positive if a is less then, equal to, or greater
+** than b.
**
** Symbols that begin with upper case letters (terminals or tokens)
** must sort before symbols that begin with lower case letters
-** (non-terminals). Other than that, the order does not matter.
+** (non-terminals). And MULTITERMINAL symbols (created using the
+** %token_class directive) must sort at the very end. Other than
+** that, the order does not matter.
**
** We find experimentally that leaving the symbols in their original
** order (the order they appeared in the grammar file) gives the
*/
int Symbolcmpp(const void *_a, const void *_b)
{
- const struct symbol **a = (const struct symbol **) _a;
- const struct symbol **b = (const struct symbol **) _b;
- int i1 = (**a).index + 10000000*((**a).name[0]>'Z');
- int i2 = (**b).index + 10000000*((**b).name[0]>'Z');
- assert( i1!=i2 || strcmp((**a).name,(**b).name)==0 );
- return i1-i2;
+ const struct symbol *a = *(const struct symbol **) _a;
+ const struct symbol *b = *(const struct symbol **) _b;
+ int i1 = a->type==MULTITERMINAL ? 3 : a->name[0]>'Z' ? 2 : 1;
+ int i2 = b->type==MULTITERMINAL ? 3 : b->name[0]>'Z' ? 2 : 1;
+ return i1==i2 ? a->index - b->index : i1 - i2;
}
/* There is one instance of the following structure for each