_C PROGRAMMING COLUMN_ by Al Stevens Listing One /* --- scanner.c - Quincy's lexical scanner --- */ #include #include #include "qnc.h" #undef isxdigit static int uncesc(char **); static void fltnum(char **, char **); static void intnum(char **, char **); /* --- Convert C in srcbuf to tokens in tknbuf --- */ int tokenize(char *tknbuf, char *srcbuf) { char *start, *laststring = NULL, *cp, c, c2, c3, op; char buf[8]; int i; int BraceCount = 0; char *tknptr = tknbuf; int sawCond = 0; int sawCase = 0; while (*srcbuf) { /* --- search for 2-char C operators --- */ if ((i = FindOperator(srcbuf)) != 0) { srcbuf+=2; if ((i == T_SHL || i == T_SHR) && *srcbuf == '=') { srcbuf++; i |= 0x80; /* encode op= operator */ } *tknptr++ = i; continue; } c = *srcbuf++; /* next src code char */ c &= 0x7f; op = 0; c2 = *srcbuf; /* lookahead 1 */ c3 = *(srcbuf+1); /* lookahead 2 */ if (c != '"' && c != '\n') laststring = NULL; switch (c) { case '\n': /* File/Line */ /* _____________ * | T_LINENO | * |_____________| * |fileno (byte)| * |_____________| * |lineno (word)| * |_____________| */ handshake(); /* keep D-Flat clock ticking */ *tknptr++ = T_LINENO; Ctx.CurrFileno = atoi(srcbuf+2); *tknptr++ = (unsigned char) Ctx.CurrFileno; srcbuf = strchr(srcbuf, ':'); Assert(srcbuf != NULL); srcbuf++; Ctx.CurrLineno = atoi(srcbuf); *(int*)tknptr = Ctx.CurrLineno; tknptr += sizeof(int); srcbuf = strchr(srcbuf, '/'); Assert(srcbuf != NULL); srcbuf++; break; case '"': /* string constant */ /* ___________ * | T_STRCONST| * |___________| * | length | * |___________| * | char(s) | * |___________| * | 0 | * |___________| */ if (laststring != NULL) /* ---- concatenated string ---- */ tknptr = laststring+strlen(laststring); else { *tknptr++ = T_STRCONST; laststring = tknptr++; } while ((c = *srcbuf) != '"' && c) *tknptr++ = uncesc(&srcbuf); *tknptr++ = '\0'; *laststring = tknptr - laststring; if (c) ++srcbuf; break; case '\'': /* character constant */ /* ___________ * | T_CHRCONST| * |___________| * | value | * |___________| */ *tknptr++ = T_CHRCONST; *tknptr++ = uncesc(&srcbuf); /* --- Skip to delimiting apostrophe --- */ while ((c = *srcbuf++) != '\'' && c) ; if (!c) --srcbuf; break; /* --- operators --- */ /* ___________ * | op token | * |___________| */ case '*': case '^': case '%': case '&': case '|': case '+': case '-': case '/': op = c; case '=': case '!': case '<': case '>': case '[': case ']': case '(': case ')': case ',': case '~': case ' ': case ';': /* --- single character operator --- */ *tknptr++ = c; break; case '?': sawCond++; *tknptr++ = c; break; case ':': if (sawCond) --sawCond; sawCase = 0; *tknptr++ = c; break; case '{': BraceCount++; *tknptr++ = c; break; case '}': --BraceCount; *tknptr++ = c; break; case '.': if (c2 == '.' && c3 == '.') { *tknptr++ = T_ELLIPSE; srcbuf += 2; } else if (isdigit(c2)) { /* * floating pointer number. */ --srcbuf; fltnum(&srcbuf, &tknptr); } else *tknptr++ = c; break; default: if (isdigit(c)) { /* --- constant --- */ /* ___________ * | T_INTCONST| (or T_LNGCONST, * |___________| T_FLTCONST, etc.) * | value | <- binary value of the * |___________| number. Number of * | . | bytes depends on type * |___________| */ --srcbuf; intnum(&srcbuf, &tknptr); } else if (alphanum(c)) { /* --- identifier --- */ start = cp = tknptr+2; --srcbuf; while (alphanum(*srcbuf)) *cp++ = *srcbuf++; *cp++ = 0; if ((i = FindKeyword(start)) != 0) { /* --- keyword --- */ /* ___________ * | key token | * |___________| */ *tknptr++ = i; if (i == T_CASE) sawCase = 1; } else if (!sawCond && !sawCase && *srcbuf == ':') { /* --- label for gotos --- */ VARIABLE var, *lvar; NullVariable(&var); var.vkind = LABEL; var.vsymbolid = AddSymbol(start); var.vclass = BraceCount; lvar = InstallVariable(&var, &Ctx.Curfunction->locals, 0,0,1,0); lvar->voffset = tknptr - tknbuf; srcbuf++; } else { /* symbol, function declaration, prototype, or call? */ FUNCTION *funcp; int fsymbol = AddSymbol(start); if ((funcp = FindFunction(fsymbol)) != NULL) { /* decl, func call, or addr */ /* ____________ * | T_FUNCTREF | * |____________| * | Function | * | Number | * |____________| */ *tknptr++ = T_FUNCTREF; *(unsigned *)tknptr = (funcp - FunctionMemory); tknptr += sizeof(unsigned); } else if (*srcbuf == '(' && BraceCount == 0) { FUNCTION func; NullFunction(&func); /* declaration or prototype */ /* _____________ * | T_FUNCTION | * |_____________| * |symbol offset| * |_____________| */ /* --- install the function --- */ func.symbol = fsymbol; func.libcode = SearchLibrary(start); func.ismain = (strcmp(start, "main") == 0); func.fileno = Ctx.CurrFileno; func.lineno = Ctx.CurrLineno; Ctx.Curfunction = NextFunction; InstallFunction(&func); *tknptr++ = T_FUNCTION; *(int *)tknptr = func.symbol; tknptr += sizeof(int); } else { /* variable reference */ /* _____________ * | T_SYMBOL | * |_____________| * |symbol offset| * |_____________| */ *tknptr++ = T_SYMBOL; *(int *)tknptr = fsymbol; tknptr += sizeof(int); } } } else /* --- Bad character in input line --- */ error(LEXERR); } if (*srcbuf == '=' && op) { tknptr[-1] |= 128; ++srcbuf; } } *tknptr++ = T_EOF; *tknptr = '\0'; return tknptr - tknbuf; } static int uncesc(char **bufp) { /* Unescape character escapes */ char *buf, c; buf = *bufp; if ((c = *buf++) == '\\') { int i; char n[4]; switch (c = *buf++) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case '\\': c = '\\'; break; case '\'': c = '\''; break; case '"': c = '"'; break; case 'x': sscanf(buf, "%x", &i); c = i; while (isxdigit(*buf)) buf++; break; default: if (isdigit(c)) { --buf; for (i=0; i<3 && isdigit(*buf); ++i) n[i] = *buf++; n[i] = 0; sscanf(n, "%o", &i); c = i; } break; } } *bufp = buf; return c; } static void fltnum(char **srcstr, char **tknstr) { /* Parse a floating point number */ char *srcp, *cp; char numbuf[64]; char c, n, dot, e, sign; double f; n = dot = e = sign = 0; srcp = *srcstr; **tknstr = T_FLTCONST; ++(*tknstr); while (*srcp) { if ((c = *srcp++) == '.') { if (dot) { /* Already saw a dot */ --srcp; break; } ++dot; } else if (c=='e' || c=='E') { if (!(dot || n) || e) { /* 'E' does not immediately follow dot or number */ --srcp; break; } ++e; } else if (c=='+' || c=='-') { if (e!=1 || sign) { /* Sign does not immediately follow an 'E' */ --srcp; break; } ++sign; } else if (isdigit(c)) { ++n; if (e) { /* number follows an 'E' - don't allow the sign anymore */ ++e; } } else { --srcp; break; } } /* copy number into local buffer and null terminate it */ n = 0; cp = *srcstr; while (cp < srcp) numbuf[n++] = *cp++; numbuf[n] = 0; f = atof(numbuf); *((double*)*tknstr) = f; *srcstr = srcp; *tknstr += sizeof(double); } /* --- Parse a decimal, octal or hexadecimal number --- */ static void intnum(char **srcstr, char **tknstr) { char *srcp, *cp, c; int i; long j; int isDecimal = 1; /* ---- test for float number ---- */ srcp = *srcstr; while (isdigit(*srcp)) ++srcp; if (*srcp == '.' || *srcp == 'e' || *srcp == 'E') { fltnum(srcstr, tknstr); return; } /* ----- not a float ----- */ c = T_INTCONST; srcp = *srcstr; if (*srcp++ == '0') { if (isdigit(*srcp)) { /* --- octal constant --- */ sscanf(srcp, "%o", &i); while (isdigit(*srcp)) ++srcp; isDecimal = 0; } else if (tolower(*srcp) == 'x') { /* --- hexadecimal constant --- */ sscanf(++srcp, "%x", &i); while (isxdigit(*srcp)) ++srcp; isDecimal = 0; } } if (isDecimal) { cp = --srcp; while (isdigit(*cp)) ++cp; /* --- decimal integer number --- */ i = atoi(srcp); j = atol(srcp); if (*cp == 'U') cp++; if (*cp == 'l' || *cp == 'L') { c = T_LNGCONST; ++cp; } else if (j != (long)i) c = T_LNGCONST; srcp = cp; } *srcstr = srcp; **tknstr = c; ++(*tknstr); if (c == T_LNGCONST) { *((long *)*tknstr) = j; *tknstr += sizeof(long); } else { *((int *)*tknstr) = i; *tknstr += sizeof(int); } } Listing Two /* --------- symbols.c --------- */ #include #include #include "qnc.h" #include "sys.h" SYMBOLTABLE LibraryFunctions[] = { /* --- These have to be maintained in alphabetic order --- */ { "_Errno", SYSERRNO }, { "_filename", SYSFILENAME }, { "_lineno", SYSLINENO }, { "abs", SYSABS }, { "acos", SYSACOS }, { "asctime", SYSASCTIME }, { "asin", SYSASIN }, { "atan", SYSATAN }, { "atan2", SYSATAN }, { "atof", SYSATOF }, { "atoi", SYSATOI }, { "atol", SYSATOL }, { "ceil", SYSCEIL }, { "clrscr", SYSCLRSCRN }, { "cos", SYSCOS }, { "cosh", SYSCOSH }, { "cprintf", SYSCPRINTF }, { "cursor", SYSCURSOR }, { "exit", SYSEXIT }, { "exp", SYSEXP }, { "fabs", SYSFABS }, { "fclose", SYSFCLOSE }, { "fflush", SYSFFLUSH }, { "fgetc", SYSFGETC }, { "fgets", SYSFGETS }, { "findfirst", SYSFINDFIRST }, { "findnext", SYSFINDNEXT }, { "floor", SYSFLOOR }, { "fopen", SYSFOPEN }, { "fprintf", SYSFPRINTF }, { "fputc", SYSFPUTC }, { "fputs", SYSFPUTS }, { "fread", SYSFREAD }, { "free", SYSFREE }, { "fscanf", SYSFSCANF }, { "fseek", SYSFSEEK }, { "ftell", SYSFTELL }, { "fwrite", SYSFWRITE }, { "getch", SYSGETCH }, { "getchar", SYSGETCHAR }, { "gets", SYSGETS }, { "gmtime", SYSGMTIME }, { "localtime", SYSLOCALTIME }, { "log", SYSLOG }, { "log10", SYSLOG10 }, { "longjmp", SYSLONGJMP }, { "malloc", SYSMALLOC }, { "mktime", SYSMKTIME }, { "pow", SYSPOW }, { "printf", SYSPRINTF }, { "putch", SYSPUTCH }, { "putchar", SYSPUTCHAR }, { "puts", SYSPUTS }, { "remove", SYSREMOVE }, { "rename", SYSRENAME }, { "rewind", SYSREWIND }, { "scanf", SYSSCANF }, { "setjmp", SYSSETJMP }, { "sin", SYSSIN }, { "sinh", SYSSINH }, { "sprintf", SYSSPRINTF }, { "sqrt", SYSSQRT }, { "sscanf", SYSSSCANF }, { "strcat", SYSSTRCAT }, { "strcmp", SYSSTRCMP }, { "strcpy", SYSSTRCPY }, { "strlen", SYSSTRLEN }, { "strncat", SYSSTRNCAT }, { "strncmp", SYSSTRNCMP }, { "strncpy", SYSSTRNCPY }, { "system", SYSSYSTEM }, { "tan", SYSTAN }, { "tanh", SYSTANH }, { "time", SYSTIME }, { "tmpfile", SYSTMPFILE }, { "tmpnam", SYSTMPNAM }, { "ungetc", SYSUNGETC } }; #define MAXLIBFUNCTIONS (sizeof(LibraryFunctions)/sizeof(SYMBOLTABLE)) /* --------- keyword lookup table ------------ */ static SYMBOLTABLE Keywords[] = { /* --- These have to be maintained in alphabetic order --- */ { "auto", T_AUTO }, { "break", T_BREAK }, { "case", T_CASE }, { "char", T_CHAR }, { "const", T_CONST }, { "continue", T_CONTINUE }, { "default", T_DEFAULT }, { "do", T_DO }, { "double", T_DOUBLE }, { "else", T_ELSE }, { "enum", T_ENUM }, { "extern", T_EXTERN }, { "float", T_FLOAT }, { "for", T_FOR }, { "goto", T_GOTO }, { "if", T_IF }, { "int", T_INT }, { "long", T_LONG }, { "register", T_REGISTER }, { "return", T_RETURN }, { "short", T_SHORT }, { "sizeof", T_SIZEOF }, { "static", T_STATIC }, { "struct", T_STRUCT }, { "switch", T_SWITCH }, { "typedef", T_TYPEDEF }, { "union", T_UNION }, { "unsigned", T_UNSIGNED }, { "void", T_VOID }, { "volatile", T_VOLATILE }, { "while", T_WHILE } }; #define MAXKEYWORDS (sizeof(Keywords)/sizeof(SYMBOLTABLE)) /* -------- multi-character operator lookup tbl ------------ */ static SYMBOLTABLE Operators[] = { /* --- These have to be maintained in collating order --- */ { "!=", T_NE }, { "&&", T_LAND }, { "++", T_INCR }, { "--", T_DECR }, { "->", T_ARROW }, { "<<", T_SHL }, { "<=", T_LE }, { "==", T_EQ }, { ">=", T_GE }, { ">>", T_SHR }, { "||", T_LIOR } }; #define MAXOPERATORS (sizeof(Operators)/sizeof(SYMBOLTABLE)) static SYMBOLTABLE PreProcessors[] = { /* --- These have to be maintained in collating order --- */ { "define", P_DEFINE }, { "elif", P_ELIF }, { "else", P_ELSE }, { "endif", P_ENDIF }, { "error", P_ERROR }, { "if", P_IF }, { "ifdef", P_IFDEF }, { "ifndef", P_IFNDEF }, { "include", P_INCLUDE }, { "undef", P_UNDEF } }; #define MAXPREPROCESSORS (sizeof(PreProcessors)/sizeof(SYMBOLTABLE)) /* --- search a symbol table for matching entry --- */ int SearchSymbols(char *arg, SYMBOLTABLE *tbl, int siz, int wd) { int i, mid, lo, hi; lo = 0; hi = siz-1; while (lo <= hi) { mid = (lo + hi) / 2; i = wd ? strncmp(arg, tbl[mid].symbol, wd) : strcmp(arg, tbl[mid].symbol); if (i < 0) hi = mid-1; else if (i) lo = mid + 1; else return tbl[mid].ident; } return 0; } /* --- search for library function identifier --- */ int SearchLibrary(char *fname) { return SearchSymbols(fname,LibraryFunctions,MAXLIBFUNCTIONS,0); } /* --- search for keyword --- */ int FindKeyword(char *keyword) { return SearchSymbols(keyword, Keywords, MAXKEYWORDS, 0); } /* --- search for two-character operator --- */ int FindOperator(char *oper) { return SearchSymbols(oper, Operators, MAXOPERATORS, 2); } /* --- search for preprocessing directive --- */ int FindPreProcessor(char *preproc) { return SearchSymbols(preproc,PreProcessors,MAXPREPROCESSORS,0); } /* --- search for user-declared identifier --- */ int FindSymbol(char *sym) { if (SymbolTable != NULL) return SearchSymbols(sym, SymbolTable, SymbolCount, 0); return 0; } /* --- find identifier given code --- */ char *FindSymbolName(int id) { int i; for (i = 0; i < SymbolCount; i++) if (SymbolTable[i].ident == id) return SymbolTable[i].symbol; return NULL; } /* --- add identifier to symbol table --- */ int AddSymbol(char *sym) { int symbolid = 0; if (SymbolTable != NULL) { symbolid = FindSymbol(sym); if (symbolid == 0) { if (SymbolCount < qCfg.MaxSymbolTable) { int i, j; int len = strlen(sym)+1; char *s = getmem(len); strcpy(s, sym); for (i = 0; i < SymbolCount; i++) if (strcmp(sym, SymbolTable[i].symbol) < 0) break; for (j = SymbolCount; j > i; --j) SymbolTable[j] = SymbolTable[j-1]; SymbolTable[i].symbol = s; SymbolTable[i].ident = ++SymbolCount; symbolid = SymbolCount; } else error(SYMBOLTABLERR); } } return symbolid; } /* --- delete the symbol table entries --- */ void DeleteSymbols(void) { int i; for (i = 0; i < SymbolCount; i++) free(SymbolTable[i].symbol); }