1434 lines
54 KiB
C
1434 lines
54 KiB
C
/*
|
|
* Public domain / CC0. Use freely for any purpose. RoyR 2026
|
|
* common.c — Compiler for the "Common" language
|
|
* Outputs NASM x86_32 assembly (cdecl, ELF32)
|
|
*
|
|
* Build: gcc -o common common.c
|
|
* Usage: ./common source.cm > output.asm
|
|
* nasm -f elf32 output.asm -o output.o
|
|
* gcc -m32 output.o -o output
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include <stdarg.h>
|
|
|
|
/* ══════════════════════════════════════════════════════════
|
|
TOKENS
|
|
══════════════════════════════════════════════════════════ */
|
|
typedef enum {
|
|
/* literals */
|
|
TK_NUM, TK_STR, TK_ID,
|
|
/* types */
|
|
TK_UINT8, TK_UINT16, TK_UINT32, TK_UINT64, TK_VOID,
|
|
TK_INT8, TK_INT16, TK_INT32, TK_INT64,
|
|
/* keywords */
|
|
TK_IF, TK_ELSE, TK_WHILE, TK_FOR,
|
|
TK_SWITCH, TK_CASE, TK_DEFAULT,
|
|
TK_BREAK, TK_CONTINUE, TK_RETURN,
|
|
/* operators (multi-char first) */
|
|
TK_EQ, TK_NEQ, TK_LEQ, TK_GEQ, TK_AND, TK_OR,
|
|
TK_SHL, TK_SHR, TK_INC, TK_DEC,
|
|
TK_ADDEQ, TK_SUBEQ, TK_MULEQ, TK_DIVEQ, TK_MODEQ,
|
|
TK_ANDEQ, TK_OREQ, TK_XOREQ, TK_SHLEQ, TK_SHREQ,
|
|
/* single-char operators / punctuation */
|
|
TK_PLUS, TK_MINUS, TK_STAR, TK_SLASH, TK_MOD,
|
|
TK_AMP, TK_PIPE, TK_CARET, TK_TILDE, TK_BANG,
|
|
TK_LT, TK_GT, TK_ASSIGN,
|
|
TK_LPAREN, TK_RPAREN, TK_LBRACE, TK_RBRACE,
|
|
TK_LBRACK, TK_RBRACK,
|
|
TK_SEMI, TK_COMMA, TK_COLON, TK_QUESTION,
|
|
TK_EOF
|
|
} TKind;
|
|
|
|
typedef struct {
|
|
TKind kind;
|
|
char str[512]; /* identifier / string value — 512 to match str_val */
|
|
int slen; /* FIX(bug2+3): explicit byte count for str, excl. terminator */
|
|
long num; /* numeric value */
|
|
int line; /* source line where token starts */
|
|
} Token;
|
|
|
|
/* ══════════════════════════════════════════════════════════
|
|
LEXER
|
|
══════════════════════════════════════════════════════════ */
|
|
static const char *src;
|
|
static int src_pos;
|
|
static int src_line = 1; /* current line number (1-based) */
|
|
static Token tok; /* current token */
|
|
|
|
static void die(const char *fmt, ...) {
|
|
fprintf(stderr, "line %d: ", src_line);
|
|
va_list ap; va_start(ap, fmt);
|
|
vfprintf(stderr, fmt, ap);
|
|
fputc('\n', stderr);
|
|
exit(1);
|
|
}
|
|
|
|
static void skip_ws(void) {
|
|
for (;;) {
|
|
while (isspace((unsigned char)src[src_pos])) {
|
|
if (src[src_pos] == '\n') src_line++;
|
|
src_pos++;
|
|
}
|
|
if (src[src_pos]=='/' && src[src_pos+1]=='/') {
|
|
while (src[src_pos] && src[src_pos]!='\n') src_pos++;
|
|
} else if (src[src_pos]=='/' && src[src_pos+1]=='*') {
|
|
src_pos += 2;
|
|
while (src[src_pos] && !(src[src_pos-1]=='*' && src[src_pos]=='/')) {
|
|
if (src[src_pos] == '\n') src_line++;
|
|
src_pos++;
|
|
}
|
|
if (src[src_pos]) src_pos++;
|
|
} else break;
|
|
}
|
|
}
|
|
|
|
static struct { const char *kw; TKind tk; } kw_table[] = {
|
|
{"if",TK_IF},{"else",TK_ELSE},{"while",TK_WHILE},{"for",TK_FOR},
|
|
{"switch",TK_SWITCH},{"case",TK_CASE},{"default",TK_DEFAULT},
|
|
{"break",TK_BREAK},{"continue",TK_CONTINUE},{"return",TK_RETURN},
|
|
{"void",TK_VOID},{"uint8",TK_UINT8},{"uint16",TK_UINT16},
|
|
{"uint32",TK_UINT32},{"uint64",TK_UINT64},
|
|
{"int8",TK_INT8},{"int16",TK_INT16},{"int32",TK_INT32},
|
|
{"int64", TK_INT64},{NULL,TK_EOF}
|
|
};
|
|
|
|
static void next(void) {
|
|
skip_ws();
|
|
tok.line = src_line;
|
|
char c = src[src_pos];
|
|
if (!c) { tok.kind = TK_EOF; return; }
|
|
|
|
/* number */
|
|
if (isdigit((unsigned char)c)) {
|
|
char *end;
|
|
tok.num = (long)strtoul(src+src_pos, &end, 0);
|
|
src_pos = (int)(end - src);
|
|
tok.kind = TK_NUM; return;
|
|
}
|
|
|
|
/* string literal */
|
|
if (c == '"') {
|
|
src_pos++; int i=0;
|
|
while (src[src_pos] && src[src_pos]!='"') {
|
|
/* FIX(bug3): bounds check before writing into tok.str */
|
|
if (i >= 511) die("string literal too long");
|
|
if (src[src_pos]=='\\') {
|
|
src_pos++;
|
|
switch(src[src_pos]) {
|
|
case 'n': tok.str[i++]='\n'; break;
|
|
case 't': tok.str[i++]='\t'; break;
|
|
case 'r': tok.str[i++]='\r'; break;
|
|
case '0': tok.str[i++]='\0'; break;
|
|
default: tok.str[i++]=src[src_pos]; break;
|
|
}
|
|
} else tok.str[i++]=src[src_pos];
|
|
src_pos++;
|
|
}
|
|
if (src[src_pos]=='"') src_pos++;
|
|
tok.str[i]=0;
|
|
tok.slen=i; /* FIX(bug2): record true byte count */
|
|
tok.kind=TK_STR;
|
|
return;
|
|
}
|
|
|
|
/* identifier / keyword */
|
|
if (isalpha((unsigned char)c) || c=='_') {
|
|
int i=0;
|
|
while (isalnum((unsigned char)src[src_pos]) || src[src_pos]=='_') {
|
|
/* FIX(bug3): bounds check for identifiers too */
|
|
if (i >= 511) die("identifier too long");
|
|
tok.str[i++]=src[src_pos++];
|
|
}
|
|
tok.str[i]=0;
|
|
tok.slen=i;
|
|
tok.kind=TK_ID;
|
|
for (int k=0; kw_table[k].kw; k++)
|
|
if (!strcmp(tok.str, kw_table[k].kw)) { tok.kind=kw_table[k].tk; return; }
|
|
return;
|
|
}
|
|
|
|
/* multi/single char operators */
|
|
src_pos++;
|
|
#define PEEK src[src_pos]
|
|
#define EAT src_pos++
|
|
switch(c) {
|
|
case '+': if(PEEK=='+'){ EAT; tok.kind=TK_INC; }
|
|
else if(PEEK=='='){ EAT; tok.kind=TK_ADDEQ; }
|
|
else tok.kind=TK_PLUS; break;
|
|
case '-': if(PEEK=='-'){ EAT; tok.kind=TK_DEC; }
|
|
else if(PEEK=='='){ EAT; tok.kind=TK_SUBEQ; }
|
|
else tok.kind=TK_MINUS; break;
|
|
case '*': if(PEEK=='='){ EAT; tok.kind=TK_MULEQ; } else tok.kind=TK_STAR; break;
|
|
case '/': if(PEEK=='='){ EAT; tok.kind=TK_DIVEQ; } else tok.kind=TK_SLASH; break;
|
|
case '%': if(PEEK=='='){ EAT; tok.kind=TK_MODEQ; } else tok.kind=TK_MOD; break;
|
|
case '&': if(PEEK=='&'){ EAT; tok.kind=TK_AND; }
|
|
else if(PEEK=='='){ EAT; tok.kind=TK_ANDEQ; }
|
|
else tok.kind=TK_AMP; break;
|
|
case '|': if(PEEK=='|'){ EAT; tok.kind=TK_OR; }
|
|
else if(PEEK=='='){ EAT; tok.kind=TK_OREQ; }
|
|
else tok.kind=TK_PIPE; break;
|
|
case '^': if(PEEK=='='){ EAT; tok.kind=TK_XOREQ; } else tok.kind=TK_CARET; break;
|
|
case '<': if(PEEK=='<'){ EAT; if(PEEK=='='){ EAT; tok.kind=TK_SHLEQ; } else tok.kind=TK_SHL; }
|
|
else if(PEEK=='='){ EAT; tok.kind=TK_LEQ; } else tok.kind=TK_LT; break;
|
|
case '>': if(PEEK=='>'){ EAT; if(PEEK=='='){ EAT; tok.kind=TK_SHREQ; } else tok.kind=TK_SHR; }
|
|
else if(PEEK=='='){ EAT; tok.kind=TK_GEQ; } else tok.kind=TK_GT; break;
|
|
case '=': if(PEEK=='='){ EAT; tok.kind=TK_EQ; } else tok.kind=TK_ASSIGN; break;
|
|
case '!': if(PEEK=='='){ EAT; tok.kind=TK_NEQ; } else tok.kind=TK_BANG; break;
|
|
case '~': tok.kind=TK_TILDE; break;
|
|
case '(': tok.kind=TK_LPAREN; break;
|
|
case ')': tok.kind=TK_RPAREN; break;
|
|
case '{': tok.kind=TK_LBRACE; break;
|
|
case '}': tok.kind=TK_RBRACE; break;
|
|
case '[': tok.kind=TK_LBRACK; break;
|
|
case ']': tok.kind=TK_RBRACK; break;
|
|
case ';': tok.kind=TK_SEMI; break;
|
|
case ',': tok.kind=TK_COMMA; break;
|
|
case ':': tok.kind=TK_COLON; break;
|
|
case '?': tok.kind=TK_QUESTION;break;
|
|
default: die("Unknown char '%c'", c);
|
|
}
|
|
#undef PEEK
|
|
#undef EAT
|
|
}
|
|
|
|
static void expect(TKind k) {
|
|
if (tok.kind != k) {
|
|
fprintf(stderr, "line %d: syntax error near '%s'\n", tok.line, tok.str);
|
|
exit(1);
|
|
}
|
|
next();
|
|
}
|
|
static int accept(TKind k) {
|
|
if (tok.kind == k) { next(); return 1; }
|
|
return 0;
|
|
}
|
|
static int is_type(void) {
|
|
return tok.kind==TK_UINT8||tok.kind==TK_UINT16||
|
|
tok.kind==TK_UINT32||tok.kind==TK_UINT64||tok.kind==TK_VOID||
|
|
tok.kind==TK_INT8||tok.kind==TK_INT16||tok.kind==TK_INT32||
|
|
tok.kind==TK_INT64;
|
|
}
|
|
static int is_signed(TKind t) {
|
|
return t==TK_INT8||t==TK_INT16||t==TK_INT32||t==TK_INT64;
|
|
}
|
|
static int is_64bit(TKind t) {
|
|
return t==TK_UINT64||t==TK_INT64;
|
|
}
|
|
|
|
/* ══════════════════════════════════════════════════════════
|
|
AST NODES
|
|
══════════════════════════════════════════════════════════ */
|
|
typedef enum {
|
|
N_PROG, N_FUNC, N_FDECL, N_GVAR, N_GARR,
|
|
N_BLK, N_LVAR, N_LARR, N_ES, N_RET, N_BRK, N_CONT,
|
|
N_IF, N_WHILE, N_FOR, N_SW, N_CASE, N_DEF,
|
|
N_NUM, N_STR, N_ID,
|
|
N_BOPN, N_UOPN, N_ASGN, N_TERN,
|
|
N_CALL, N_IDX, N_ADDR, N_DEREF, N_CAST,
|
|
N_POSTINC, N_POSTDEC, N_PREINC, N_PREDEC,
|
|
N_PARAM
|
|
} NKind;
|
|
|
|
typedef struct Node Node;
|
|
struct Node {
|
|
NKind kind;
|
|
TKind op; /* operator token */
|
|
long num;
|
|
char str[512]; /* FIX(bug3): was 256, now 512 to match str_val */
|
|
int slen; /* FIX(bug2): explicit byte count for str literals */
|
|
/* type */
|
|
TKind tbase; /* TK_UINT8 … TK_VOID */
|
|
int ptrs; /* pointer depth */
|
|
/* children */
|
|
Node *ch[4]; /* left/right/body/else */
|
|
/* lists */
|
|
Node **list; /* params, args, stmts, cases */
|
|
int nlist;
|
|
int cap;
|
|
};
|
|
|
|
static Node *alloc_node(NKind k) {
|
|
Node *n = calloc(1, sizeof(Node));
|
|
n->kind = k; return n;
|
|
}
|
|
static void list_push(Node *n, Node *child) {
|
|
if (n->nlist == n->cap) {
|
|
n->cap = n->cap ? n->cap*2 : 4;
|
|
n->list = realloc(n->list, n->cap * sizeof(Node*));
|
|
}
|
|
n->list[n->nlist++] = child;
|
|
}
|
|
|
|
/* ══════════════════════════════════════════════════════════
|
|
PARSER
|
|
══════════════════════════════════════════════════════════ */
|
|
static void parse_type(TKind *base, int *ptrs) {
|
|
*base = tok.kind; next();
|
|
*ptrs = 0;
|
|
while (tok.kind==TK_STAR) { (*ptrs)++; next(); }
|
|
}
|
|
|
|
static Node *expr(void);
|
|
|
|
/* forward decls */
|
|
static Node *stmt(void);
|
|
static Node *block(void);
|
|
|
|
static Node *primary(void) {
|
|
Node *n;
|
|
if (tok.kind==TK_NUM) {
|
|
n=alloc_node(N_NUM); n->num=tok.num; next(); return n;
|
|
}
|
|
if (tok.kind==TK_STR) {
|
|
n=alloc_node(N_STR);
|
|
/* FIX(bug2): use memcpy + slen instead of strcpy so embedded nulls survive */
|
|
memcpy(n->str, tok.str, tok.slen+1);
|
|
n->slen=tok.slen;
|
|
next(); return n;
|
|
}
|
|
if (tok.kind==TK_ID) {
|
|
n=alloc_node(N_ID); strcpy(n->str, tok.str); next(); return n;
|
|
}
|
|
if (tok.kind==TK_LPAREN) {
|
|
next(); n=expr(); expect(TK_RPAREN); return n;
|
|
}
|
|
die("expected expression");
|
|
return NULL;
|
|
}
|
|
|
|
static Node *postfix(void) {
|
|
Node *e = primary(), *t;
|
|
for(;;) {
|
|
if (tok.kind==TK_LPAREN) {
|
|
next(); t=alloc_node(N_CALL); t->ch[0]=e;
|
|
if (tok.kind!=TK_RPAREN)
|
|
for(;;) {
|
|
list_push(t, expr());
|
|
if (!accept(TK_COMMA)) break;
|
|
}
|
|
expect(TK_RPAREN); e=t;
|
|
} else if (tok.kind==TK_LBRACK) {
|
|
next(); t=alloc_node(N_IDX); t->ch[0]=e; t->ch[1]=expr();
|
|
expect(TK_RBRACK); e=t;
|
|
} else if (tok.kind==TK_INC) {
|
|
next(); t=alloc_node(N_POSTINC); t->ch[0]=e; e=t;
|
|
} else if (tok.kind==TK_DEC) {
|
|
next(); t=alloc_node(N_POSTDEC); t->ch[0]=e; e=t;
|
|
} else break;
|
|
}
|
|
return e;
|
|
}
|
|
|
|
static Node *unary(void) {
|
|
Node *t;
|
|
if (tok.kind==TK_MINUS||tok.kind==TK_BANG||tok.kind==TK_TILDE) {
|
|
TKind op=tok.kind; next();
|
|
t=alloc_node(N_UOPN); t->op=op; t->ch[0]=unary(); return t;
|
|
}
|
|
if (tok.kind==TK_AMP) {
|
|
next(); t=alloc_node(N_ADDR); t->ch[0]=unary(); return t;
|
|
}
|
|
if (tok.kind==TK_STAR) {
|
|
next(); t=alloc_node(N_DEREF); t->ch[0]=unary(); return t;
|
|
}
|
|
if (tok.kind==TK_INC) {
|
|
next(); t=alloc_node(N_PREINC); t->ch[0]=unary(); return t;
|
|
}
|
|
if (tok.kind==TK_DEC) {
|
|
next(); t=alloc_node(N_PREDEC); t->ch[0]=unary(); return t;
|
|
}
|
|
/* cast: (type) expr */
|
|
if (tok.kind==TK_LPAREN && (src[src_pos]==' '||1)) {
|
|
int saved=src_pos; Token saved_tok=tok; int saved_line=src_line;
|
|
next();
|
|
if (is_type()) {
|
|
TKind tb; int pt;
|
|
parse_type(&tb,&pt);
|
|
if (tok.kind==TK_RPAREN) {
|
|
next(); t=alloc_node(N_CAST);
|
|
t->tbase=tb; t->ptrs=pt; t->ch[0]=unary(); return t;
|
|
}
|
|
}
|
|
src_pos=saved; tok=saved_tok; src_line=saved_line;
|
|
}
|
|
return postfix();
|
|
}
|
|
|
|
#define BINOP(name, next_fn, ...) \
|
|
static Node *name(void) { \
|
|
Node *l=next_fn(), *t; TKind ops[]={__VA_ARGS__, TK_EOF}; \
|
|
for(;;) { \
|
|
int found=0; \
|
|
for(int i=0;ops[i]!=TK_EOF;i++) if(tok.kind==ops[i]){found=1;break;} \
|
|
if(!found) break; \
|
|
TKind op=tok.kind; next(); \
|
|
t=alloc_node(N_BOPN); t->op=op; t->ch[0]=l; t->ch[1]=next_fn(); l=t; \
|
|
} return l; \
|
|
}
|
|
|
|
BINOP(mul_expr, unary, TK_STAR, TK_SLASH, TK_MOD)
|
|
BINOP(add_expr, mul_expr, TK_PLUS, TK_MINUS)
|
|
BINOP(shf_expr, add_expr, TK_SHL, TK_SHR)
|
|
BINOP(cmp_expr, shf_expr, TK_LT, TK_LEQ, TK_GT, TK_GEQ)
|
|
BINOP(eq_expr, cmp_expr, TK_EQ, TK_NEQ)
|
|
BINOP(band_expr, eq_expr, TK_AMP)
|
|
BINOP(bxor_expr, band_expr,TK_CARET)
|
|
BINOP(bor_expr, bxor_expr,TK_PIPE)
|
|
|
|
static Node *land_expr(void) {
|
|
Node *l=bor_expr(), *t;
|
|
while (tok.kind==TK_AND) {
|
|
next(); t=alloc_node(N_BOPN); t->op=TK_AND;
|
|
t->ch[0]=l; t->ch[1]=bor_expr(); l=t;
|
|
} return l;
|
|
}
|
|
static Node *lor_expr(void) {
|
|
Node *l=land_expr(), *t;
|
|
while (tok.kind==TK_OR) {
|
|
next(); t=alloc_node(N_BOPN); t->op=TK_OR;
|
|
t->ch[0]=l; t->ch[1]=land_expr(); l=t;
|
|
} return l;
|
|
}
|
|
|
|
static Node *ternary(void);
|
|
static Node *ternary(void) {
|
|
Node *c=lor_expr(), *t;
|
|
if (tok.kind==TK_QUESTION) {
|
|
next(); t=alloc_node(N_TERN); t->ch[0]=c;
|
|
t->ch[1]=expr(); expect(TK_COLON); t->ch[2]=ternary(); return t;
|
|
}
|
|
return c;
|
|
}
|
|
|
|
static TKind asgn_ops[] = {
|
|
TK_ASSIGN, TK_ADDEQ, TK_SUBEQ, TK_MULEQ, TK_DIVEQ, TK_MODEQ,
|
|
TK_ANDEQ, TK_OREQ, TK_XOREQ, TK_SHLEQ, TK_SHREQ, TK_EOF
|
|
};
|
|
static Node *expr(void) {
|
|
Node *l=ternary(), *t;
|
|
for(int i=0; asgn_ops[i]!=TK_EOF; i++)
|
|
if (tok.kind==asgn_ops[i]) {
|
|
TKind op=tok.kind; next();
|
|
t=alloc_node(N_ASGN); t->op=op; t->ch[0]=l; t->ch[1]=expr();
|
|
return t;
|
|
}
|
|
return l;
|
|
}
|
|
|
|
static Node *decl_local(void) {
|
|
TKind tb; int pt;
|
|
parse_type(&tb,&pt);
|
|
char nm[512]; strcpy(nm, tok.str); expect(TK_ID);
|
|
Node *n;
|
|
if (tok.kind==TK_LBRACK) {
|
|
next(); n=alloc_node(N_LARR);
|
|
n->tbase=tb; n->ptrs=pt; strcpy(n->str,nm);
|
|
n->ch[0]=expr(); expect(TK_RBRACK);
|
|
if (accept(TK_ASSIGN)) {
|
|
expect(TK_LBRACE);
|
|
while (tok.kind!=TK_RBRACE) {
|
|
list_push(n, expr()); accept(TK_COMMA);
|
|
}
|
|
expect(TK_RBRACE);
|
|
}
|
|
} else {
|
|
n=alloc_node(N_LVAR);
|
|
n->tbase=tb; n->ptrs=pt; strcpy(n->str,nm);
|
|
if (accept(TK_ASSIGN)) n->ch[0]=expr();
|
|
}
|
|
expect(TK_SEMI); return n;
|
|
}
|
|
|
|
static Node *stmt(void) {
|
|
Node *n, *t;
|
|
if (is_type()) return decl_local();
|
|
switch(tok.kind) {
|
|
case TK_LBRACE: return block();
|
|
case TK_IF:
|
|
n=alloc_node(N_IF); next();
|
|
expect(TK_LPAREN); n->ch[0]=expr(); expect(TK_RPAREN);
|
|
n->ch[1]=stmt();
|
|
if (tok.kind==TK_ELSE) {
|
|
next(); n->ch[2]=stmt();
|
|
}
|
|
return n;
|
|
case TK_WHILE:
|
|
n=alloc_node(N_WHILE); next();
|
|
expect(TK_LPAREN); n->ch[0]=expr(); expect(TK_RPAREN);
|
|
n->ch[1]=stmt(); return n;
|
|
case TK_FOR:
|
|
n=alloc_node(N_FOR); next(); expect(TK_LPAREN);
|
|
if (is_type()) n->ch[0]=decl_local(); /* consumes ; */
|
|
else if (tok.kind!=TK_SEMI) { Node *es=alloc_node(N_ES); es->ch[0]=expr(); expect(TK_SEMI); n->ch[0]=es; }
|
|
else { next(); }
|
|
if (tok.kind!=TK_SEMI) n->ch[1]=expr();
|
|
expect(TK_SEMI);
|
|
if (tok.kind!=TK_RPAREN) n->ch[2]=expr();
|
|
expect(TK_RPAREN); n->ch[3]=stmt(); return n;
|
|
case TK_SWITCH:
|
|
n=alloc_node(N_SW); next();
|
|
expect(TK_LPAREN); n->ch[0]=expr(); expect(TK_RPAREN);
|
|
expect(TK_LBRACE);
|
|
while (tok.kind!=TK_RBRACE) {
|
|
if (tok.kind==TK_CASE) {
|
|
next(); t=alloc_node(N_CASE); t->ch[0]=expr(); expect(TK_COLON);
|
|
while(tok.kind!=TK_CASE&&tok.kind!=TK_DEFAULT&&tok.kind!=TK_RBRACE)
|
|
list_push(t, stmt());
|
|
list_push(n, t);
|
|
} else if (tok.kind==TK_DEFAULT) {
|
|
next(); expect(TK_COLON);
|
|
t=alloc_node(N_DEF);
|
|
while(tok.kind!=TK_CASE&&tok.kind!=TK_DEFAULT&&tok.kind!=TK_RBRACE)
|
|
list_push(t, stmt());
|
|
list_push(n, t);
|
|
} else break;
|
|
}
|
|
expect(TK_RBRACE); return n;
|
|
case TK_RETURN:
|
|
n=alloc_node(N_RET); next();
|
|
if (tok.kind!=TK_SEMI) n->ch[0]=expr();
|
|
expect(TK_SEMI); return n;
|
|
case TK_BREAK:
|
|
next(); expect(TK_SEMI); return alloc_node(N_BRK);
|
|
case TK_CONTINUE:
|
|
next(); expect(TK_SEMI); return alloc_node(N_CONT);
|
|
default:
|
|
n=alloc_node(N_ES); n->ch[0]=expr(); expect(TK_SEMI); return n;
|
|
}
|
|
}
|
|
|
|
static Node *block(void) {
|
|
Node *n=alloc_node(N_BLK); expect(TK_LBRACE);
|
|
while (tok.kind!=TK_RBRACE) list_push(n, stmt());
|
|
expect(TK_RBRACE); return n;
|
|
}
|
|
|
|
static Node *parse_prog(void) {
|
|
Node *prog=alloc_node(N_PROG);
|
|
next();
|
|
while (tok.kind!=TK_EOF) {
|
|
TKind tb; int pt;
|
|
parse_type(&tb,&pt);
|
|
char nm[512]; strcpy(nm,tok.str); expect(TK_ID);
|
|
Node *d;
|
|
if (tok.kind==TK_LPAREN) {
|
|
next(); d=alloc_node(N_FUNC);
|
|
d->tbase=tb; d->ptrs=pt; strcpy(d->str,nm);
|
|
/* params */
|
|
if (tok.kind!=TK_RPAREN)
|
|
for(;;) {
|
|
if (tok.kind==TK_VOID) { next(); break; } /* accept (void) as empty param list */
|
|
TKind ptb; int ppt;
|
|
parse_type(&ptb,&ppt);
|
|
Node *p=alloc_node(N_PARAM);
|
|
p->tbase=ptb; p->ptrs=ppt; strcpy(p->str,tok.str);
|
|
expect(TK_ID); list_push(d, p);
|
|
if (!accept(TK_COMMA)) break;
|
|
}
|
|
expect(TK_RPAREN);
|
|
if (tok.kind==TK_SEMI) { next(); d->kind=N_FDECL; }
|
|
else d->ch[0]=block();
|
|
} else if (tok.kind==TK_LBRACK) {
|
|
/* global array: type name[size];
|
|
or type name[size] = { v, … }; */
|
|
next();
|
|
d=alloc_node(N_GARR);
|
|
d->tbase=tb; d->ptrs=pt; strcpy(d->str,nm);
|
|
d->ch[0]=expr(); /* size expression */
|
|
expect(TK_RBRACK);
|
|
if (accept(TK_ASSIGN)) {
|
|
expect(TK_LBRACE);
|
|
while (tok.kind!=TK_RBRACE) {
|
|
list_push(d, expr()); accept(TK_COMMA);
|
|
}
|
|
expect(TK_RBRACE);
|
|
}
|
|
expect(TK_SEMI);
|
|
} else {
|
|
d=alloc_node(N_GVAR);
|
|
d->tbase=tb; d->ptrs=pt; strcpy(d->str,nm);
|
|
if (accept(TK_ASSIGN)) d->ch[0]=expr();
|
|
expect(TK_SEMI);
|
|
}
|
|
list_push(prog, d);
|
|
}
|
|
return prog;
|
|
}
|
|
|
|
/* ══════════════════════════════════════════════════════════
|
|
CODE GENERATOR (NASM x86_32, cdecl)
|
|
══════════════════════════════════════════════════════════ */
|
|
|
|
/* String literal pool */
|
|
#define MAX_STRS 512
|
|
static char str_val[MAX_STRS][512];
|
|
static int str_val_len[MAX_STRS]; /* FIX(bug2): track explicit byte lengths */
|
|
static int str_cnt = 0;
|
|
|
|
/* FIX(bug2): length-aware string interning using memcmp instead of strcmp */
|
|
static int intern_str_n(const char *s, int len) {
|
|
for (int i=0; i<str_cnt; i++)
|
|
if (str_val_len[i]==len && memcmp(str_val[i], s, len)==0) return i;
|
|
if (str_cnt==MAX_STRS) die("too many strings");
|
|
memcpy(str_val[str_cnt], s, len+1);
|
|
str_val_len[str_cnt]=len;
|
|
return str_cnt++;
|
|
}
|
|
|
|
/* ══════════════════════════════════════════════════════════
|
|
SYMBOL TABLES (with type info for pointer arithmetic)
|
|
══════════════════════════════════════════════════════════ */
|
|
|
|
/* Local variable table */
|
|
#define MAX_LOCALS 256
|
|
static struct {
|
|
char name[64];
|
|
int off;
|
|
TKind tbase;
|
|
int ptrs;
|
|
int is_array; /* 1 = array on stack (name decays to address via lea) */
|
|
} locals[MAX_LOCALS];
|
|
static int nlocals=0, frame_size=0;
|
|
|
|
static int find_local(const char *nm) {
|
|
for(int i=0;i<nlocals;i++) if(!strcmp(locals[i].name,nm)) return locals[i].off;
|
|
return 0x7fffffff; /* not found */
|
|
}
|
|
static int local_is_array(const char *nm) {
|
|
for(int i=0;i<nlocals;i++) if(!strcmp(locals[i].name,nm)) return locals[i].is_array;
|
|
return 0;
|
|
}
|
|
static void def_local(const char *nm, int off, TKind tbase, int ptrs) {
|
|
if (nlocals==MAX_LOCALS) die("too many locals");
|
|
strncpy(locals[nlocals].name, nm, 63);
|
|
locals[nlocals].off = off;
|
|
locals[nlocals].tbase = tbase;
|
|
locals[nlocals].ptrs = ptrs;
|
|
locals[nlocals].is_array = 0;
|
|
nlocals++;
|
|
}
|
|
static void def_local_array(const char *nm, int off, TKind tbase, int ptrs) {
|
|
def_local(nm, off, tbase, ptrs);
|
|
locals[nlocals-1].is_array = 1;
|
|
}
|
|
|
|
/* Global variable table */
|
|
#define MAX_GLOBALS 256
|
|
static struct {
|
|
char name[64];
|
|
TKind tbase;
|
|
int ptrs;
|
|
int is_array;
|
|
} gvars[MAX_GLOBALS];
|
|
static int ngvars=0;
|
|
|
|
static int global_is_array(const char *nm) {
|
|
for(int i=0;i<ngvars;i++) if(!strcmp(gvars[i].name,nm)) return gvars[i].is_array;
|
|
return 0;
|
|
}
|
|
static void def_global(const char *nm, TKind tbase, int ptrs) {
|
|
if (ngvars==MAX_GLOBALS) die("too many globals");
|
|
strncpy(gvars[ngvars].name, nm, 63);
|
|
gvars[ngvars].tbase = tbase;
|
|
gvars[ngvars].ptrs = ptrs;
|
|
gvars[ngvars].is_array = 0;
|
|
ngvars++;
|
|
}
|
|
static void def_global_array(const char *nm, TKind tbase, int ptrs) {
|
|
def_global(nm, tbase, ptrs);
|
|
gvars[ngvars-1].is_array = 1;
|
|
}
|
|
|
|
/* Label counter */
|
|
static int lbl_cnt=0;
|
|
static int new_lbl(void) { return ++lbl_cnt; }
|
|
|
|
/* break/continue/return label stacks */
|
|
static int brk_stk[64], cont_stk[64], stk_top=0;
|
|
static int ret_lbl=0;
|
|
|
|
/* Output helpers */
|
|
static FILE *out;
|
|
#define E(...) fprintf(out, __VA_ARGS__)
|
|
#define EL(...) do { fprintf(out," "); fprintf(out,__VA_ARGS__); } while(0)
|
|
|
|
static void emit_ref(const char *nm) {
|
|
int off = find_local(nm);
|
|
if (off == 0x7fffffff) E("[%s]", nm);
|
|
else if (off > 0) E("[ebp+%d]", off);
|
|
else E("[ebp%d]", off);
|
|
}
|
|
|
|
/* Emit address of a local/global (for array decay: lea not mov) */
|
|
static void emit_addr(const char *nm) {
|
|
int off = find_local(nm);
|
|
if (off == 0x7fffffff) { E(" mov eax, %s\n", nm); return; } /* global: label IS address */
|
|
if (off > 0) EL("lea eax, [ebp+%d]\n", off);
|
|
else EL("lea eax, [ebp%d]\n", off);
|
|
}
|
|
|
|
static void load_nm(const char *nm) {
|
|
/* Arrays decay to a pointer: yield address, not the value stored there */
|
|
if (local_is_array(nm)) { emit_addr(nm); return; }
|
|
if (global_is_array(nm)) { E(" mov eax, %s\n", nm); return; }
|
|
E(" mov eax, "); emit_ref(nm); E("\n");
|
|
}
|
|
|
|
/* ══════════════════════════════════════════════════════════
|
|
TYPE SYSTEM (for pointer arithmetic)
|
|
══════════════════════════════════════════════════════════ */
|
|
typedef struct { TKind tbase; int ptrs; } Type;
|
|
static const Type T_INT = { TK_UINT32, 0 };
|
|
|
|
/* Size in bytes of a base type (non-pointer) */
|
|
static int base_size(TKind tbase) {
|
|
switch (tbase) {
|
|
case TK_UINT8: case TK_INT8: return 1;
|
|
case TK_UINT16: case TK_INT16: return 2;
|
|
case TK_UINT64: case TK_INT64: return 8;
|
|
default: return 4; /* uint32, int32, void, pointer */
|
|
}
|
|
}
|
|
|
|
/* Stride for pointer arithmetic: the size of what the pointer points at.
|
|
pointer-to-pointer always has stride 4 (one address). */
|
|
static int pointee_size(TKind tbase, int ptrs) {
|
|
if (ptrs > 1) return 4;
|
|
return base_size(tbase);
|
|
}
|
|
|
|
/* forward */
|
|
static void gen_expr(Node *n);
|
|
static void gen_stmt(Node *n);
|
|
static Type fun_ret_type(const char *nm); /* defined in codegen section */
|
|
|
|
/* Infer the type of an expression node.
|
|
Walks the AST without emitting any code. */
|
|
static Type get_type(Node *n) {
|
|
if (!n) return T_INT;
|
|
switch (n->kind) {
|
|
case N_NUM:
|
|
return T_INT;
|
|
case N_STR:
|
|
return (Type){ TK_UINT8, 1 }; /* char* */
|
|
case N_ID: {
|
|
/* Check locals first, then globals */
|
|
for (int i=0; i<nlocals; i++)
|
|
if (!strcmp(locals[i].name, n->str))
|
|
return (Type){ locals[i].tbase, locals[i].ptrs };
|
|
for (int i=0; i<ngvars; i++)
|
|
if (!strcmp(gvars[i].name, n->str))
|
|
return (Type){ gvars[i].tbase, gvars[i].ptrs };
|
|
return T_INT;
|
|
}
|
|
case N_ADDR: {
|
|
Type inner = get_type(n->ch[0]);
|
|
return (Type){ inner.tbase, inner.ptrs + 1 };
|
|
}
|
|
case N_DEREF: {
|
|
Type inner = get_type(n->ch[0]);
|
|
if (inner.ptrs > 0) return (Type){ inner.tbase, inner.ptrs - 1 };
|
|
return T_INT;
|
|
}
|
|
case N_IDX: {
|
|
Type arr = get_type(n->ch[0]);
|
|
if (arr.ptrs > 0) return (Type){ arr.tbase, arr.ptrs - 1 };
|
|
return T_INT;
|
|
}
|
|
case N_CAST:
|
|
return (Type){ n->tbase, n->ptrs };
|
|
case N_CALL:
|
|
if (n->ch[0] && n->ch[0]->kind == N_ID)
|
|
return fun_ret_type(n->ch[0]->str);
|
|
return T_INT;
|
|
case N_BOPN:
|
|
case N_ASGN: {
|
|
/* Arithmetic preserves a pointer type if either operand is a pointer */
|
|
Type l = get_type(n->ch[0]);
|
|
Type r = get_type(n->ch[1]);
|
|
if (l.ptrs > 0) return l;
|
|
if (r.ptrs > 0) return r;
|
|
return T_INT;
|
|
}
|
|
case N_POSTINC: case N_POSTDEC:
|
|
case N_PREINC: case N_PREDEC:
|
|
return get_type(n->ch[0]);
|
|
case N_UOPN:
|
|
/* Negation/bitwise-not propagate the child type; the result is signed */
|
|
if (n->op == TK_MINUS || n->op == TK_TILDE)
|
|
return (Type){ TK_INT32, 0 };
|
|
return T_INT;
|
|
default:
|
|
return T_INT;
|
|
}
|
|
}
|
|
|
|
/* Emit an integer scale instruction sequence.
|
|
On entry eax holds the integer to scale; on exit eax = eax * scale. */
|
|
static void emit_scale(int scale) {
|
|
if (scale == 1) { /* nothing */ }
|
|
else if (scale == 2) EL("shl eax, 1\n");
|
|
else if (scale == 4) EL("shl eax, 2\n");
|
|
else if (scale == 8) EL("shl eax, 3\n");
|
|
else EL("imul eax, %d\n", scale);
|
|
}
|
|
|
|
/* ══════════════════════════════════════════════════════════
|
|
LVALUE / STORE HELPERS
|
|
══════════════════════════════════════════════════════════ */
|
|
static void gen_lval_addr(Node *n) {
|
|
if (n->kind==N_ID) {
|
|
int off=find_local(n->str);
|
|
if (off==0x7fffffff) EL("mov eax, %s\n", n->str);
|
|
else if (off>0) EL("lea eax, [ebp+%d]\n", off);
|
|
else EL("lea eax, [ebp%d]\n", off);
|
|
} else if (n->kind==N_IDX) {
|
|
/* addr of arr[ix]: base + ix * element_size.
|
|
FIX: use gen_expr on the base, not gen_lval_addr.
|
|
gen_lval_addr on a pointer N_ID yields the stack slot address (lea),
|
|
but we need the pointer *value* (mov). gen_expr already handles both:
|
|
array identifiers decay to their base address (lea), pointer identifiers
|
|
load their value (mov). */
|
|
Type arr = get_type(n->ch[0]);
|
|
int scale = pointee_size(arr.tbase, arr.ptrs);
|
|
gen_expr(n->ch[0]); EL("push eax\n");
|
|
gen_expr(n->ch[1]);
|
|
emit_scale(scale);
|
|
EL("pop ecx\n"); EL("add eax, ecx\n");
|
|
} else if (n->kind==N_DEREF) {
|
|
gen_expr(n->ch[0]);
|
|
}
|
|
}
|
|
|
|
static void store_lval(Node *n) {
|
|
if (n->kind==N_ID) {
|
|
E(" mov "); emit_ref(n->str); E(", eax\n");
|
|
} else if (n->kind==N_IDX || n->kind==N_DEREF) {
|
|
EL("push eax\n");
|
|
gen_lval_addr(n);
|
|
EL("pop ecx\n");
|
|
/* Store only as many bytes as the element type requires */
|
|
Type t = get_type(n);
|
|
int sz = (t.ptrs > 0) ? 4 : base_size(t.tbase);
|
|
switch (sz) {
|
|
case 1: EL("mov byte [eax], cl\n"); break;
|
|
case 2: EL("mov word [eax], cx\n"); break;
|
|
default: EL("mov dword [eax], ecx\n"); break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* ══════════════════════════════════════════════════════════
|
|
ARITHMETIC HELPERS
|
|
══════════════════════════════════════════════════════════ */
|
|
static void arith(TKind op, int sgn) {
|
|
/* ecx=left, eax=right → eax=result; sgn=1 for signed operands */
|
|
switch(op) {
|
|
case TK_PLUS: EL("add eax, ecx\n"); break;
|
|
case TK_MINUS: EL("sub ecx, eax\n"); EL("mov eax, ecx\n"); break;
|
|
case TK_STAR: EL("imul eax, ecx\n"); break;
|
|
case TK_SLASH:
|
|
EL("xchg eax, ecx\n");
|
|
if (sgn) { EL("cdq\n"); EL("idiv ecx\n"); }
|
|
else { EL("xor edx, edx\n"); EL("div ecx\n"); }
|
|
break;
|
|
case TK_MOD:
|
|
EL("xchg eax, ecx\n");
|
|
if (sgn) { EL("cdq\n"); EL("idiv ecx\n"); }
|
|
else { EL("xor edx, edx\n"); EL("div ecx\n"); }
|
|
EL("mov eax, edx\n");
|
|
break;
|
|
case TK_AMP: EL("and eax, ecx\n"); break;
|
|
case TK_PIPE: EL("or eax, ecx\n"); break;
|
|
case TK_CARET: EL("xor eax, ecx\n"); break;
|
|
case TK_SHL: EL("xchg eax, ecx\n"); EL("shl eax, cl\n"); break;
|
|
case TK_SHR:
|
|
EL("xchg eax, ecx\n");
|
|
EL(sgn ? "sar eax, cl\n" : "shr eax, cl\n");
|
|
break;
|
|
default: break;
|
|
}
|
|
}
|
|
|
|
/* Pointer-aware add/subtract.
|
|
On entry: ecx = left operand, eax = right operand.
|
|
On exit: eax = result. */
|
|
static void ptr_arith(TKind op, Type lt, Type rt) {
|
|
if (op == TK_PLUS) {
|
|
if (lt.ptrs > 0) {
|
|
/* ptr + int: scale the integer (eax) by pointee size */
|
|
int scale = pointee_size(lt.tbase, lt.ptrs);
|
|
emit_scale(scale);
|
|
EL("add eax, ecx\n");
|
|
} else if (rt.ptrs > 0) {
|
|
/* int + ptr: scale the integer (ecx) by pointee size */
|
|
int scale = pointee_size(rt.tbase, rt.ptrs);
|
|
if (scale == 1) { /* nothing */ }
|
|
else if (scale == 2) EL("shl ecx, 1\n");
|
|
else if (scale == 4) EL("shl ecx, 2\n");
|
|
else if (scale == 8) EL("shl ecx, 3\n");
|
|
else EL("imul ecx, %d\n", scale);
|
|
EL("add eax, ecx\n");
|
|
} else {
|
|
EL("add eax, ecx\n");
|
|
}
|
|
} else if (op == TK_MINUS) {
|
|
if (lt.ptrs > 0 && rt.ptrs == 0) {
|
|
/* ptr - int: scale the integer (eax) by pointee size */
|
|
int scale = pointee_size(lt.tbase, lt.ptrs);
|
|
emit_scale(scale);
|
|
EL("sub ecx, eax\n"); EL("mov eax, ecx\n");
|
|
} else if (lt.ptrs > 0 && rt.ptrs > 0) {
|
|
/* ptr - ptr: raw byte difference divided by pointee size */
|
|
EL("sub ecx, eax\n"); EL("mov eax, ecx\n");
|
|
int scale = pointee_size(lt.tbase, lt.ptrs);
|
|
if (scale > 1) {
|
|
EL("xor edx, edx\n");
|
|
EL("mov ecx, %d\n", scale);
|
|
EL("div ecx\n");
|
|
}
|
|
} else {
|
|
EL("sub ecx, eax\n"); EL("mov eax, ecx\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
/* ══════════════════════════════════════════════════════════
|
|
EXPRESSION CODE GENERATOR
|
|
══════════════════════════════════════════════════════════ */
|
|
static void gen_expr(Node *n) {
|
|
int la, lb;
|
|
switch(n->kind) {
|
|
case N_NUM:
|
|
EL("mov eax, %ld\n", n->num); break;
|
|
case N_STR: {
|
|
/* FIX(bug2): use length-aware intern */
|
|
int id=intern_str_n(n->str, n->slen);
|
|
EL("mov eax, _s%d\n", id); break; }
|
|
case N_ID:
|
|
load_nm(n->str); break;
|
|
case N_ADDR:
|
|
gen_lval_addr(n->ch[0]); break;
|
|
|
|
case N_DEREF: {
|
|
/* Load from pointer; respect the pointee width and signedness */
|
|
gen_expr(n->ch[0]);
|
|
Type inner = get_type(n->ch[0]);
|
|
int psz = (inner.ptrs > 1) ? 4 : base_size(inner.tbase);
|
|
int sgn = (inner.ptrs == 1) && is_signed(inner.tbase);
|
|
switch (psz) {
|
|
case 1: EL(sgn ? "movsx eax, byte [eax]\n" : "movzx eax, byte [eax]\n"); break;
|
|
case 2: EL(sgn ? "movsx eax, word [eax]\n" : "movzx eax, word [eax]\n"); break;
|
|
case 8: EL("mov eax, [eax]\n"); break; /* truncate 64→32; full 64-bit NYI */
|
|
default: EL("mov eax, [eax]\n"); break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case N_CAST:
|
|
gen_expr(n->ch[0]);
|
|
if (n->ptrs == 0) {
|
|
switch (n->tbase) {
|
|
case TK_INT8: EL("movsx eax, al\n"); break; /* sign-extend low byte */
|
|
case TK_INT16: EL("movsx eax, ax\n"); break; /* sign-extend low word */
|
|
case TK_UINT8: EL("and eax, 0xFF\n"); break;
|
|
case TK_UINT16: EL("and eax, 0xFFFF\n"); break;
|
|
default: break; /* int32/uint32/int64/uint64: no truncation needed */
|
|
}
|
|
}
|
|
break;
|
|
|
|
case N_POSTINC: {
|
|
Type t = get_type(n->ch[0]);
|
|
int stride = (t.ptrs > 0) ? pointee_size(t.tbase, t.ptrs) : 1;
|
|
gen_expr(n->ch[0]); EL("push eax\n");
|
|
EL("add eax, %d\n", stride); store_lval(n->ch[0]); EL("pop eax\n");
|
|
break;
|
|
}
|
|
case N_POSTDEC: {
|
|
Type t = get_type(n->ch[0]);
|
|
int stride = (t.ptrs > 0) ? pointee_size(t.tbase, t.ptrs) : 1;
|
|
gen_expr(n->ch[0]); EL("push eax\n");
|
|
EL("sub eax, %d\n", stride); store_lval(n->ch[0]); EL("pop eax\n");
|
|
break;
|
|
}
|
|
case N_PREINC: {
|
|
Type t = get_type(n->ch[0]);
|
|
int stride = (t.ptrs > 0) ? pointee_size(t.tbase, t.ptrs) : 1;
|
|
gen_expr(n->ch[0]); EL("add eax, %d\n", stride); store_lval(n->ch[0]);
|
|
break;
|
|
}
|
|
case N_PREDEC: {
|
|
Type t = get_type(n->ch[0]);
|
|
int stride = (t.ptrs > 0) ? pointee_size(t.tbase, t.ptrs) : 1;
|
|
gen_expr(n->ch[0]); EL("sub eax, %d\n", stride); store_lval(n->ch[0]);
|
|
break;
|
|
}
|
|
|
|
case N_UOPN:
|
|
gen_expr(n->ch[0]);
|
|
if (n->op==TK_MINUS) EL("neg eax\n");
|
|
else if (n->op==TK_TILDE) EL("not eax\n");
|
|
else { EL("test eax, eax\n"); EL("setz al\n"); EL("movzx eax, al\n"); }
|
|
break;
|
|
|
|
case N_BOPN:
|
|
if (n->op==TK_OR) {
|
|
la=new_lbl(); lb=new_lbl();
|
|
gen_expr(n->ch[0]); EL("test eax, eax\n"); EL("jnz .L%d\n",la);
|
|
gen_expr(n->ch[1]); EL("test eax, eax\n"); EL("jnz .L%d\n",la);
|
|
EL("xor eax, eax\n"); EL("jmp .L%d\n",lb);
|
|
E(".L%d:\n",la); EL("mov eax, 1\n"); E(".L%d:\n",lb);
|
|
} else if (n->op==TK_AND) {
|
|
la=new_lbl(); lb=new_lbl();
|
|
gen_expr(n->ch[0]); EL("test eax, eax\n"); EL("jz .L%d\n",la);
|
|
gen_expr(n->ch[1]); EL("test eax, eax\n"); EL("jz .L%d\n",la);
|
|
EL("mov eax, 1\n"); EL("jmp .L%d\n",lb);
|
|
E(".L%d:\n",la); EL("xor eax, eax\n"); E(".L%d:\n",lb);
|
|
} else {
|
|
/* Evaluate both sides; ecx=left, eax=right */
|
|
gen_expr(n->ch[0]); EL("push eax\n");
|
|
gen_expr(n->ch[1]); EL("pop ecx\n");
|
|
{
|
|
int sgn = is_signed(get_type(n->ch[0]).tbase) ||
|
|
is_signed(get_type(n->ch[1]).tbase);
|
|
switch(n->op) {
|
|
case TK_EQ: EL("cmp ecx, eax\n"); EL("sete al\n"); EL("movzx eax, al\n"); break;
|
|
case TK_NEQ: EL("cmp ecx, eax\n"); EL("setne al\n"); EL("movzx eax, al\n"); break;
|
|
case TK_LT: EL("cmp ecx, eax\n"); EL(sgn?"setl al\n" :"setb al\n"); EL("movzx eax, al\n"); break;
|
|
case TK_LEQ: EL("cmp ecx, eax\n"); EL(sgn?"setle al\n":"setbe al\n"); EL("movzx eax, al\n"); break;
|
|
case TK_GT: EL("cmp ecx, eax\n"); EL(sgn?"setg al\n" :"seta al\n"); EL("movzx eax, al\n"); break;
|
|
case TK_GEQ: EL("cmp ecx, eax\n"); EL(sgn?"setge al\n":"setae al\n"); EL("movzx eax, al\n"); break;
|
|
case TK_PLUS:
|
|
case TK_MINUS: {
|
|
Type lt = get_type(n->ch[0]);
|
|
Type rt = get_type(n->ch[1]);
|
|
if (lt.ptrs > 0 || rt.ptrs > 0)
|
|
ptr_arith(n->op, lt, rt);
|
|
else
|
|
arith(n->op, sgn);
|
|
break;
|
|
}
|
|
default: arith(n->op, sgn); break;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case N_ASGN:
|
|
if (n->op==TK_ASSIGN) {
|
|
gen_expr(n->ch[1]); store_lval(n->ch[0]);
|
|
} else {
|
|
/* Compound assignment */
|
|
TKind base;
|
|
switch(n->op){
|
|
case TK_ADDEQ: base=TK_PLUS; break; case TK_SUBEQ: base=TK_MINUS; break;
|
|
case TK_MULEQ: base=TK_STAR; break; case TK_DIVEQ: base=TK_SLASH; break;
|
|
case TK_MODEQ: base=TK_MOD; break; case TK_ANDEQ: base=TK_AMP; break;
|
|
case TK_OREQ: base=TK_PIPE; break; case TK_XOREQ: base=TK_CARET; break;
|
|
case TK_SHLEQ: base=TK_SHL; break; case TK_SHREQ: base=TK_SHR; break;
|
|
default: base=TK_EOF;
|
|
}
|
|
gen_expr(n->ch[0]); EL("push eax\n");
|
|
gen_expr(n->ch[1]); EL("pop ecx\n");
|
|
{
|
|
int sgn = is_signed(get_type(n->ch[0]).tbase) ||
|
|
is_signed(get_type(n->ch[1]).tbase);
|
|
if (base==TK_PLUS || base==TK_MINUS) {
|
|
Type lt = get_type(n->ch[0]);
|
|
Type rt = get_type(n->ch[1]);
|
|
if (lt.ptrs > 0 || rt.ptrs > 0)
|
|
ptr_arith(base, lt, rt);
|
|
else
|
|
arith(base, sgn);
|
|
} else {
|
|
arith(base, sgn);
|
|
}
|
|
}
|
|
store_lval(n->ch[0]);
|
|
}
|
|
break;
|
|
|
|
case N_TERN:
|
|
la=new_lbl(); lb=new_lbl();
|
|
gen_expr(n->ch[0]); EL("test eax, eax\n"); EL("jz .L%d\n",la);
|
|
gen_expr(n->ch[1]); EL("jmp .L%d\n",lb);
|
|
E(".L%d:\n",la); gen_expr(n->ch[2]); E(".L%d:\n",lb);
|
|
break;
|
|
|
|
case N_CALL: {
|
|
int argc=n->nlist;
|
|
for (int i=argc-1;i>=0;i--) { gen_expr(n->list[i]); EL("push eax\n"); }
|
|
if (n->ch[0]->kind==N_ID) EL("call %s\n", n->ch[0]->str);
|
|
else { gen_expr(n->ch[0]); EL("call eax\n"); }
|
|
if (argc) EL("add esp, %d\n", argc*4);
|
|
break; }
|
|
|
|
case N_IDX: {
|
|
/* arr[i] — address then dereference with correct width and signedness */
|
|
Type arr = get_type(n->ch[0]);
|
|
int psz = (arr.ptrs > 1) ? 4 : base_size(arr.tbase);
|
|
int sgn = (arr.ptrs == 1) && is_signed(arr.tbase);
|
|
gen_lval_addr(n);
|
|
switch (psz) {
|
|
case 1: EL(sgn ? "movsx eax, byte [eax]\n" : "movzx eax, byte [eax]\n"); break;
|
|
case 2: EL(sgn ? "movsx eax, word [eax]\n" : "movzx eax, word [eax]\n"); break;
|
|
default: EL("mov eax, [eax]\n"); break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
default: break;
|
|
}
|
|
}
|
|
|
|
/* ══════════════════════════════════════════════════════════
|
|
STATEMENT CODE GENERATOR
|
|
══════════════════════════════════════════════════════════ */
|
|
static void gen_stmt(Node *n) {
|
|
int la, lb, lc;
|
|
switch(n->kind) {
|
|
case N_BLK:
|
|
for(int i=0;i<n->nlist;i++) gen_stmt(n->list[i]); break;
|
|
case N_ES:
|
|
gen_expr(n->ch[0]); break;
|
|
case N_LVAR:
|
|
if (n->ch[0]) { gen_expr(n->ch[0]); E(" mov "); emit_ref(n->str); E(", eax\n"); }
|
|
break;
|
|
case N_LARR:
|
|
if (n->nlist) {
|
|
int base=find_local(n->str);
|
|
/* element size for initialiser stride */
|
|
int esz = (n->ptrs > 0) ? 4 : base_size(n->tbase);
|
|
for(int i=0;i<n->nlist;i++) {
|
|
gen_expr(n->list[i]);
|
|
int off=base+i*esz;
|
|
switch(esz) {
|
|
case 1:
|
|
if(off>0) EL("mov byte [ebp+%d], al\n",off);
|
|
else EL("mov byte [ebp%d], al\n",off);
|
|
break;
|
|
case 2:
|
|
if(off>0) EL("mov word [ebp+%d], ax\n",off);
|
|
else EL("mov word [ebp%d], ax\n",off);
|
|
break;
|
|
default:
|
|
if(off>0) EL("mov dword [ebp+%d], eax\n",off);
|
|
else EL("mov dword [ebp%d], eax\n",off);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case N_RET:
|
|
if (n->ch[0]) gen_expr(n->ch[0]);
|
|
EL("jmp .Lret%d\n", ret_lbl); break;
|
|
case N_BRK: EL("jmp .Lbrk%d\n", brk_stk[stk_top-1]); break;
|
|
case N_CONT: EL("jmp .Lcont%d\n", cont_stk[stk_top-1]); break;
|
|
case N_IF:
|
|
la=new_lbl(); lb=new_lbl();
|
|
gen_expr(n->ch[0]); EL("test eax, eax\n"); EL("jz .L%d\n",la);
|
|
gen_stmt(n->ch[1]); EL("jmp .L%d\n",lb);
|
|
E(".L%d:\n",la);
|
|
if (n->ch[2]) gen_stmt(n->ch[2]);
|
|
E(".L%d:\n",lb); break;
|
|
case N_WHILE:
|
|
la=new_lbl(); lb=new_lbl();
|
|
brk_stk[stk_top]=lb; cont_stk[stk_top]=la; stk_top++;
|
|
E(".Lcont%d:\n",la);
|
|
gen_expr(n->ch[0]); EL("test eax, eax\n"); EL("jz .Lbrk%d\n",lb);
|
|
gen_stmt(n->ch[1]); EL("jmp .Lcont%d\n",la);
|
|
E(".Lbrk%d:\n",lb); stk_top--; break;
|
|
case N_FOR:
|
|
la=new_lbl(); lb=new_lbl(); lc=new_lbl();
|
|
brk_stk[stk_top]=lb; cont_stk[stk_top]=lc; stk_top++;
|
|
if (n->ch[0]) gen_stmt(n->ch[0]);
|
|
E(".L%d:\n",la);
|
|
if (n->ch[1]) { gen_expr(n->ch[1]); EL("test eax, eax\n"); EL("jz .Lbrk%d\n",lb); }
|
|
gen_stmt(n->ch[3]);
|
|
E(".Lcont%d:\n",lc);
|
|
if (n->ch[2]) gen_expr(n->ch[2]);
|
|
EL("jmp .L%d\n",la); E(".Lbrk%d:\n",lb); stk_top--; break;
|
|
case N_SW: {
|
|
lb=new_lbl();
|
|
int ncases=n->nlist;
|
|
int *clbls=malloc(ncases*sizeof(int));
|
|
for(int i=0;i<ncases;i++) clbls[i]=new_lbl();
|
|
brk_stk[stk_top]=lb; stk_top++;
|
|
gen_expr(n->ch[0]); EL("push eax\n");
|
|
for(int i=0;i<ncases;i++) {
|
|
Node *c=n->list[i];
|
|
if (c->kind==N_CASE) {
|
|
EL("mov eax, [esp]\n");
|
|
EL("cmp eax, %ld\n", c->ch[0]->num);
|
|
EL("je .L%d\n", clbls[i]);
|
|
} else {
|
|
EL("jmp .L%d\n", clbls[i]);
|
|
}
|
|
}
|
|
EL("jmp .Lbrk%d\n",lb);
|
|
for(int i=0;i<ncases;i++) {
|
|
E(".L%d:\n",clbls[i]);
|
|
Node *c=n->list[i];
|
|
int ns=(c->kind==N_CASE||c->kind==N_DEF)?c->nlist:0;
|
|
for(int j=0;j<ns;j++) gen_stmt(c->list[j]);
|
|
}
|
|
E(".Lbrk%d:\n",lb); EL("add esp, 4\n");
|
|
stk_top--; free(clbls); break; }
|
|
default: break;
|
|
}
|
|
}
|
|
|
|
/* ══════════════════════════════════════════════════════════
|
|
LOCAL VARIABLE PRE-SCAN
|
|
══════════════════════════════════════════════════════════ */
|
|
static void scan_locals(Node *n) {
|
|
if (!n) return;
|
|
if (n->kind==N_LVAR) {
|
|
frame_size+=4;
|
|
def_local(n->str, -frame_size, n->tbase, n->ptrs);
|
|
} else if (n->kind==N_LARR) {
|
|
long cnt = n->ch[0]->num;
|
|
/* size per element */
|
|
int esz = (n->ptrs > 0) ? 4 : base_size(n->tbase);
|
|
frame_size += esz * (int)cnt;
|
|
/* Array name decays to a pointer to its element type */
|
|
def_local_array(n->str, -frame_size, n->tbase, n->ptrs + 1);
|
|
} else {
|
|
for(int i=0;i<4;i++) scan_locals(n->ch[i]);
|
|
for(int i=0;i<n->nlist;i++) scan_locals(n->list[i]);
|
|
}
|
|
}
|
|
|
|
/* ══════════════════════════════════════════════════════════
|
|
FUNCTION GENERATOR
|
|
══════════════════════════════════════════════════════════ */
|
|
static char called[256][64];
|
|
static int ncalled=0;
|
|
static void collect_calls(Node *n) {
|
|
if (!n) return;
|
|
if (n->kind==N_CALL && n->ch[0]->kind==N_ID) {
|
|
char *nm=n->ch[0]->str;
|
|
int found=0;
|
|
for(int i=0;i<ncalled;i++) if(!strcmp(called[i],nm)){found=1;break;}
|
|
if(!found && ncalled<256) strcpy(called[ncalled++],nm);
|
|
}
|
|
for(int i=0;i<4;i++) collect_calls(n->ch[i]);
|
|
for(int i=0;i<n->nlist;i++) collect_calls(n->list[i]);
|
|
}
|
|
|
|
static void gen_func(Node *fn) {
|
|
nlocals=0; frame_size=0;
|
|
scan_locals(fn->ch[0]);
|
|
int fsize=(frame_size+15)&~15;
|
|
|
|
ret_lbl=new_lbl();
|
|
E("\n%s:\n", fn->str);
|
|
EL("push ebp\n"); EL("mov ebp, esp\n");
|
|
if (fsize) EL("sub esp, %d\n", fsize);
|
|
|
|
/* Bind params: [ebp+8], [ebp+12], … with their declared types */
|
|
int poff=8;
|
|
for(int i=0;i<fn->nlist;i++) {
|
|
def_local(fn->list[i]->str, poff,
|
|
fn->list[i]->tbase, fn->list[i]->ptrs);
|
|
poff+=4;
|
|
}
|
|
|
|
gen_stmt(fn->ch[0]);
|
|
|
|
E(".Lret%d:\n", ret_lbl);
|
|
EL("mov esp, ebp\n"); EL("pop ebp\n"); EL("ret\n");
|
|
}
|
|
|
|
/* ══════════════════════════════════════════════════════════
|
|
STRING DATA EMITTER
|
|
══════════════════════════════════════════════════════════ */
|
|
/* FIX(bug2): takes explicit length; walks to p<=end to include null terminator */
|
|
static void emit_str_data(const char *s, int len) {
|
|
E("db ");
|
|
int first=1;
|
|
const char *end = s + len; /* points at the null terminator */
|
|
for(const char *p=s; p<=end; p++) {
|
|
unsigned char c=(unsigned char)*p;
|
|
if (c>=32 && c<127 && c!='"' && c!='\\') {
|
|
if (!first) E(",");
|
|
E("\"");
|
|
while (p<=end && (unsigned char)*p>=32 && (unsigned char)*p<127
|
|
&& *p!='"' && *p!='\\') { fputc(*p,out); p++; }
|
|
p--;
|
|
E("\"");
|
|
} else {
|
|
if (!first) E(",");
|
|
E("%d", c);
|
|
}
|
|
first=0;
|
|
}
|
|
E("\n");
|
|
}
|
|
|
|
/* ══════════════════════════════════════════════════════════
|
|
TOP-LEVEL CODE GENERATOR
|
|
══════════════════════════════════════════════════════════ */
|
|
|
|
/* Function return-type table — populated before codegen so get_type(N_CALL) works */
|
|
#define MAX_FUNS 256
|
|
static struct { char name[64]; TKind tbase; int ptrs; } funs[MAX_FUNS];
|
|
static int nfuns=0;
|
|
static void def_fun(const char *nm, TKind tbase, int ptrs) {
|
|
if (nfuns==MAX_FUNS) return;
|
|
strncpy(funs[nfuns].name, nm, 63);
|
|
funs[nfuns].tbase = tbase;
|
|
funs[nfuns].ptrs = ptrs;
|
|
nfuns++;
|
|
}
|
|
static Type fun_ret_type(const char *nm) {
|
|
for (int i=0;i<nfuns;i++)
|
|
if (!strcmp(funs[i].name, nm)) return (Type){funs[i].tbase, funs[i].ptrs};
|
|
return T_INT;
|
|
}
|
|
|
|
static void codegen(Node *prog) {
|
|
/* Register all global variable types before codegen so get_type() works */
|
|
for(int i=0;i<prog->nlist;i++) {
|
|
Node *d=prog->list[i];
|
|
if(d->kind==N_GVAR)
|
|
def_global(d->str, d->tbase, d->ptrs);
|
|
else if(d->kind==N_GARR)
|
|
def_global_array(d->str, d->tbase, d->ptrs + 1); /* array decays to pointer */
|
|
else if(d->kind==N_FUNC || d->kind==N_FDECL)
|
|
def_fun(d->str, d->tbase, d->ptrs); /* register return type */
|
|
}
|
|
|
|
/* Collect defined function names */
|
|
char defined[256][64]; int ndef=0;
|
|
for(int i=0;i<prog->nlist;i++)
|
|
if(prog->list[i]->kind==N_FUNC)
|
|
strcpy(defined[ndef++], prog->list[i]->str);
|
|
|
|
collect_calls(prog);
|
|
|
|
E("BITS 32\n");
|
|
E("section .text\n");
|
|
for(int i=0;i<ncalled;i++) {
|
|
int found=0;
|
|
for(int j=0;j<ndef;j++) if(!strcmp(called[i],defined[j])){found=1;break;}
|
|
if(!found) E("extern %s\n", called[i]);
|
|
}
|
|
for(int i=0;i<ndef;i++) E("global %s\n", defined[i]);
|
|
|
|
for(int i=0;i<prog->nlist;i++)
|
|
if(prog->list[i]->kind==N_FUNC)
|
|
gen_func(prog->list[i]);
|
|
|
|
/* ── .data section: string literals + explicitly initialised globals ── */
|
|
int has_data = (str_cnt > 0);
|
|
for(int i=0;i<prog->nlist;i++) {
|
|
Node *d=prog->list[i];
|
|
/* FIX(bug1): guard with kind==N_NUM before checking num, so N_STR/N_ID
|
|
initializers don't accidentally satisfy num==0 and leak into .bss too */
|
|
if (d->kind==N_GVAR && d->ch[0] != NULL &&
|
|
!(d->ch[0]->kind==N_NUM && d->ch[0]->num==0)) has_data=1;
|
|
if (d->kind==N_GARR && d->nlist > 0) has_data=1;
|
|
}
|
|
if (has_data) {
|
|
E("\nsection .data\n");
|
|
for(int i=0;i<prog->nlist;i++) {
|
|
Node *d=prog->list[i];
|
|
/* scalar global with non-zero initialiser */
|
|
if(d->kind==N_GVAR && d->ch[0] != NULL &&
|
|
!(d->ch[0]->kind==N_NUM && d->ch[0]->num==0)) {
|
|
const char *dw = d->ptrs ? "dd" :
|
|
(base_size(d->tbase)==1?"db":base_size(d->tbase)==2?"dw":
|
|
base_size(d->tbase)==8?"dq":"dd");
|
|
if (d->ch[0]->kind == N_NUM) {
|
|
long v = d->ch[0]->num;
|
|
E("%s: %s %ld\n", d->str, dw, v);
|
|
} else if (d->ch[0]->kind == N_STR) {
|
|
/* FIX(bug2): use length-aware intern for global string inits */
|
|
int id = intern_str_n(d->ch[0]->str, d->ch[0]->slen);
|
|
E("%s: %s _s%d\n", d->str, dw, id);
|
|
} else if (d->ch[0]->kind == N_ID) {
|
|
E("%s: %s %s\n", d->str, dw, d->ch[0]->str);
|
|
}
|
|
}
|
|
/* global array with explicit initialiser */
|
|
if(d->kind==N_GARR && d->nlist > 0) {
|
|
long cnt = d->ch[0]->num;
|
|
const char *dw = d->ptrs ? "dd" :
|
|
(base_size(d->tbase)==1?"db":base_size(d->tbase)==2?"dw":
|
|
base_size(d->tbase)==8?"dq":"dd");
|
|
E("%s: %s", d->str, dw);
|
|
for(int j=0; j<d->nlist; j++)
|
|
E("%s%ld", j ? "," : " ", d->list[j]->num);
|
|
for(long j=d->nlist; j<cnt; j++)
|
|
E("%s0", (j > 0 || d->nlist > 0) ? "," : " ");
|
|
E("\n");
|
|
}
|
|
}
|
|
/* FIX(bug2): pass length to emit_str_data */
|
|
for(int i=0;i<str_cnt;i++) {
|
|
E("_s%d: ", i);
|
|
emit_str_data(str_val[i], str_val_len[i]);
|
|
}
|
|
}
|
|
|
|
/* ── .bss section: zero-init scalars and uninitialised arrays ── */
|
|
int has_bss = 0;
|
|
for(int i=0;i<prog->nlist;i++) {
|
|
Node *d=prog->list[i];
|
|
/* FIX(bug1): check kind==N_NUM before num==0, so string/id inits
|
|
don't produce a duplicate symbol in both .data and .bss */
|
|
if (d->kind==N_GVAR && (!d->ch[0] || (d->ch[0]->kind==N_NUM && d->ch[0]->num==0))) has_bss=1;
|
|
if (d->kind==N_GARR && d->nlist == 0) has_bss=1;
|
|
}
|
|
if (has_bss) {
|
|
E("\nsection .bss\n");
|
|
for(int i=0;i<prog->nlist;i++) {
|
|
Node *d=prog->list[i];
|
|
/* FIX(bug1): same guard as has_bss check above */
|
|
if(d->kind==N_GVAR && (!d->ch[0] || (d->ch[0]->kind==N_NUM && d->ch[0]->num==0))) {
|
|
const char *rs = d->ptrs ? "resd" :
|
|
(base_size(d->tbase)==1?"resb":base_size(d->tbase)==2?"resw":
|
|
base_size(d->tbase)==8?"resq":"resd");
|
|
E("%s: %s 1\n", d->str, rs);
|
|
}
|
|
if(d->kind==N_GARR && d->nlist == 0) {
|
|
long cnt = d->ch[0]->num;
|
|
const char *rs = d->ptrs ? "resd" :
|
|
(base_size(d->tbase)==1?"resb":base_size(d->tbase)==2?"resw":
|
|
base_size(d->tbase)==8?"resq":"resd");
|
|
E("%s: %s %ld\n", d->str, rs, cnt);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* ══════════════════════════════════════════════════════════
|
|
MAIN
|
|
══════════════════════════════════════════════════════════ */
|
|
static char *read_file(const char *path) {
|
|
FILE *f=fopen(path,"r");
|
|
if(!f) die("cannot open: %s", path);
|
|
fseek(f,0,SEEK_END); long sz=ftell(f); rewind(f);
|
|
char *buf=malloc(sz+1);
|
|
fread(buf,1,sz,f); buf[sz]=0; fclose(f);
|
|
return buf;
|
|
}
|
|
|
|
int main(int argc, char **argv) {
|
|
if (argc<2) { fprintf(stderr,"usage: %s <source.cm> [out.asm]\n",argv[0]); return 1; }
|
|
char *source = read_file(argv[1]);
|
|
src=source; src_pos=0;
|
|
Node *prog = parse_prog();
|
|
out = (argc>=3) ? fopen(argv[2],"w") : stdout;
|
|
if (!out) die("cannot open output: %s", argv[2]);
|
|
codegen(prog);
|
|
if (argc>=3) { fclose(out); fprintf(stderr,"wrote %s\n",argv[2]); }
|
|
return 0;
|
|
}
|