From da2fcb62605f9dbae028e9c0d365678f0033e7e1 Mon Sep 17 00:00:00 2001 From: RoyR Date: Sat, 14 Mar 2026 14:14:37 -0400 Subject: [PATCH] Initial commit --- MANUAL.md | 1257 +++++++++++++++++++++++++++++++++++ Makefile | 101 +++ QUICKREF.md | 332 ++++++++++ README.md | 349 ++++++++++ README_TESTS.md | 286 ++++++++ TROUBLESHOOTING.md | 623 +++++++++++++++++ common.c | 1433 ++++++++++++++++++++++++++++++++++++++++ examples/arrays.cm | 48 ++ examples/bitwise.cm | 60 ++ examples/bubblesort.cm | 40 ++ examples/calculator.cm | 101 +++ examples/fibonacci.cm | 23 + examples/hello.cm | 11 + examples/linkedlist.cm | 159 +++++ examples/pointers.cm | 38 ++ examples/primes.cm | 68 ++ examples/strings.cm | 70 ++ examples/switch.cm | 65 ++ examples/types.cm | 50 ++ run_tests.sh | 18 + test_runner.c | 619 +++++++++++++++++ test_suite.cm | 1024 ++++++++++++++++++++++++++++ 22 files changed, 6775 insertions(+) create mode 100644 MANUAL.md create mode 100644 Makefile create mode 100644 QUICKREF.md create mode 100644 README.md create mode 100644 README_TESTS.md create mode 100644 TROUBLESHOOTING.md create mode 100644 common.c create mode 100644 examples/arrays.cm create mode 100644 examples/bitwise.cm create mode 100644 examples/bubblesort.cm create mode 100644 examples/calculator.cm create mode 100644 examples/fibonacci.cm create mode 100644 examples/hello.cm create mode 100644 examples/linkedlist.cm create mode 100644 examples/pointers.cm create mode 100644 examples/primes.cm create mode 100644 examples/strings.cm create mode 100644 examples/switch.cm create mode 100644 examples/types.cm create mode 100644 run_tests.sh create mode 100644 test_runner.c create mode 100644 test_suite.cm diff --git a/MANUAL.md b/MANUAL.md new file mode 100644 index 0000000..9ac4059 --- /dev/null +++ b/MANUAL.md @@ -0,0 +1,1257 @@ +# Common Language Reference Manual + +**Version 1.0** +**Target**: x86-32 (IA-32) Linux ELF +**Calling Convention**: cdecl +**Author**: Common Compiler Project + +--- + +## Table of Contents + +1. [Introduction](#introduction) +2. [Compiler Usage](#compiler-usage) +3. [Lexical Elements](#lexical-elements) +4. [Type System](#type-system) +5. [Declarations](#declarations) +6. [Expressions](#expressions) +7. [Statements](#statements) +8. [Functions](#functions) +9. [Scope and Linkage](#scope-and-linkage) +10. [Memory Model](#memory-model) +11. [Assembly Interface](#assembly-interface) +12. [Limitations](#limitations) +13. [Examples](#examples) + +--- + +## 1. Introduction + +Common is a statically-typed, imperative programming language that compiles to x86-32 assembly (NASM syntax). It provides a minimal yet complete set of features for systems programming: + +- Integer types from 8 to 64 bits +- Pointers and arrays +- Functions with parameters +- Control flow (if, while, for, switch) +- Full operator set (arithmetic, logical, bitwise) +- Direct C library interoperability + +### Design Philosophy + +- **No runtime dependencies**: Compiled programs link only against libc +- **Explicit control**: No hidden allocations or implicit conversions +- **Predictable output**: Direct mapping to assembly +- **C compatibility**: Can call and be called by C code + +--- + +## 2. Compiler Usage + +### Building the Compiler + +```bash +gcc -o common common.c +``` + +### Compiling Programs + +```bash +# Compile Common source to NASM assembly +./common source.cm output.asm + +# Assemble to object file +nasm -f elf32 output.asm -o output.o + +# Link (requires 32-bit support) +gcc -m32 output.o -o executable +``` + +### One-Line Compilation + +```bash +./common source.cm output.asm && nasm -f elf32 output.asm && gcc -m32 output.o -o program +``` + +### Compiler Output + +The compiler writes NASM x86-32 assembly to stdout (or specified file) using: +- **ELF32** object format +- **cdecl** calling convention +- **Sections**: `.text`, `.data`, `.bss` + +### Error Reporting + +Errors are reported to stderr with line numbers: + +``` +line 42: syntax error near 'token' +line 15: Unknown char '~' +``` + +--- + +## 3. Lexical Elements + +### Comments + +```c +// Single-line comment (C++ style) + +/* Multi-line comment + spanning multiple lines */ +``` + +Comments are stripped during lexical analysis. + +### Keywords + +``` +if else while for switch case default +break continue return +void uint8 uint16 uint32 uint64 + int8 int16 int32 int64 +``` + +### Identifiers + +``` +[a-zA-Z_][a-zA-Z0-9_]* +``` + +- Must start with letter or underscore +- Case-sensitive +- No length limit (internal buffer: 256 chars) + +### Integer Literals + +```c +42 // Decimal +0x2A // Hexadecimal +052 // Octal +0b101010 // Binary (if supported by strtoul) +``` + +Literals are parsed by `strtoul()` with base 0 (auto-detect). + +### String Literals + +```c +"Hello, World!" +"Line 1\nLine 2" +"Tab\there" +``` + +Supported escape sequences: +- `\n` - newline +- `\t` - tab +- `\r` - carriage return +- `\0` - null character +- `\\` - backslash +- `\"` - quote +- Any other `\x` - literal `x` + +String literals are null-terminated and stored in `.data` section. + +### Operators and Punctuation + +**Multi-character operators**: +``` +== != <= >= && || << >> ++ -- ++= -= *= /= %= &= |= ^= <<= >>= +``` + +**Single-character operators**: +``` ++ - * / % & | ^ ~ ! < > = +``` + +**Punctuation**: +``` +( ) { } [ ] ; , : ? +``` + +--- + +## 4. Type System + +### Integer Types + +| Type | Size | Range (Unsigned) | Range (Signed) | +|---------|-------|----------------------|-----------------------------| +| uint8 | 1 byte| 0 to 255 | - | +| int8 | 1 byte| - | -128 to 127 | +| uint16 | 2 bytes| 0 to 65,535 | - | +| int16 | 2 bytes| - | -32,768 to 32,767 | +| uint32 | 4 bytes| 0 to 4,294,967,295 | - | +| int32 | 4 bytes| - | -2,147,483,648 to 2,147,483,647 | +| uint64 | 8 bytes| 0 to 2^64-1 | - | +| int64 | 8 bytes| - | -2^63 to 2^63-1 | + +**Note**: 64-bit types are partially supported. They occupy 8 bytes in memory but arithmetic operations truncate to 32 bits on x86-32. + +### Void Type + +```c +void +``` + +- Used only for function return types +- Cannot declare variables of type void +- `void` in parameter list means "no parameters" + +### Pointer Types + +```c +int32 *ptr; // Pointer to int32 +uint8 **pptr; // Pointer to pointer to uint8 +void *generic; // Generic pointer (4 bytes) +``` + +- All pointers are 4 bytes (32-bit addresses) +- Pointer arithmetic scales by pointee size +- Can be cast between types + +### Array Types + +```c +int32 arr[10]; // Array of 10 int32 +uint8 matrix[5][5]; // Not supported (single dimension only) +``` + +Arrays: +- Decay to pointers when used in expressions +- Cannot be returned from functions +- Cannot be assigned (use element-wise copy) + +### Type Qualifiers + +Common has no type qualifiers (no `const`, `volatile`, `restrict`). + +--- + +## 5. Declarations + +### Variable Declarations + +**Local variables**: +```c +int32 x; // Uninitialized +int32 y = 42; // Initialized +uint8 c = 'A'; // Character (just an int) +``` + +**Global variables**: +```c +int32 global_var; // Zero-initialized (.bss) +int32 initialized = 100; // Explicitly initialized (.data) +``` + +### Array Declarations + +**Local arrays**: +```c +int32 arr[10]; // Uninitialized +int32 nums[5] = { 1, 2, 3, 4, 5 }; // Initialized +uint8 partial[10] = { 1, 2 }; // Rest zero-filled +``` + +**Global arrays**: +```c +int32 global_arr[100]; // Zero-initialized (.bss) +int32 data[3] = { 10, 20, 30 }; // Initialized (.data) +``` + +### Pointer Declarations + +```c +int32 *ptr; // Pointer to int32 +uint8 *str; // Pointer to uint8 (common for strings) +void *generic; // Generic pointer +int32 **pptr; // Pointer to pointer +``` + +### Type Syntax + +``` +type_specifier ::= base_type pointer_suffix +base_type ::= "int8" | "int16" | "int32" | "int64" + | "uint8" | "uint16" | "uint32" | "uint64" + | "void" +pointer_suffix ::= ("*")* +``` + +Examples: +```c +int32 x; // Base type: int32, no pointers +uint8 *s; // Base type: uint8, 1 pointer level +void **pp; // Base type: void, 2 pointer levels +``` + +--- + +## 6. Expressions + +### Primary Expressions + +```c +42 // Integer literal +"string" // String literal +variable // Identifier +(expression) // Parenthesized expression +``` + +### Postfix Expressions + +```c +array[index] // Array subscript +function(args) // Function call +expr++ // Post-increment +expr-- // Post-decrement +``` + +### Unary Expressions + +```c +++expr // Pre-increment +--expr // Pre-decrement +-expr // Negation +!expr // Logical NOT +~expr // Bitwise NOT +&expr // Address-of +*expr // Dereference +(type)expr // Type cast +``` + +### Binary Expressions + +**Arithmetic**: +```c +a + b // Addition +a - b // Subtraction +a * b // Multiplication +a / b // Division +a % b // Modulo +``` + +**Bitwise**: +```c +a & b // Bitwise AND +a | b // Bitwise OR +a ^ b // Bitwise XOR +a << b // Left shift +a >> b // Right shift (arithmetic for signed, logical for unsigned) +``` + +**Comparison**: +```c +a == b // Equal +a != b // Not equal +a < b // Less than +a <= b // Less than or equal +a > b // Greater than +a >= b // Greater than or equal +``` + +**Logical**: +```c +a && b // Logical AND (short-circuit) +a || b // Logical OR (short-circuit) +``` + +### Assignment Expressions + +```c +a = b // Assignment +a += b // Add and assign +a -= b // Subtract and assign +a *= b // Multiply and assign +a /= b // Divide and assign +a %= b // Modulo and assign +a &= b // AND and assign +a |= b // OR and assign +a ^= b // XOR and assign +a <<= b // Left shift and assign +a >>= b // Right shift and assign +``` + +### Ternary Expression + +```c +condition ? true_expr : false_expr +``` + +Example: +```c +max = (a > b) ? a : b; +``` + +### Operator Precedence + +From highest to lowest: + +| Level | Operators | Associativity | +|-------|----------------------------|---------------| +| 1 | `()` `[]` `++` `--` (post) | Left to right | +| 2 | `++` `--` (pre) `+` `-` `!` `~` `&` `*` `(cast)` | Right to left | +| 3 | `*` `/` `%` | Left to right | +| 4 | `+` `-` | Left to right | +| 5 | `<<` `>>` | Left to right | +| 6 | `<` `<=` `>` `>=` | Left to right | +| 7 | `==` `!=` | Left to right | +| 8 | `&` | Left to right | +| 9 | `^` | Left to right | +| 10 | `|` | Left to right | +| 11 | `&&` | Left to right | +| 12 | `||` | Left to right | +| 13 | `?:` | Right to left | +| 14 | `=` `+=` `-=` etc. | Right to left | + +### Pointer Arithmetic + +```c +int32 *p = arr; +p + 1 // Points to next int32 (address + 4) +p - 1 // Points to previous int32 (address - 4) +p[i] // Equivalent to *(p + i) +``` + +Pointer arithmetic automatically scales by the size of the pointed-to type: +- `uint8*` increments by 1 +- `uint16*` increments by 2 +- `int32*` increments by 4 +- Any pointer-to-pointer increments by 4 + +### Type Conversions + +**Explicit casting**: +```c +(uint8)value // Truncate to 8 bits +(int32)byte_value // Sign-extend or zero-extend +(uint32*)ptr // Pointer type conversion +``` + +**Implicit conversions**: +- Arrays decay to pointers +- Smaller integers promote to int32 in expressions + +--- + +## 7. Statements + +### Expression Statement + +```c +expression; +``` + +Examples: +```c +x = 42; +function_call(); +x++; +``` + +### Compound Statement (Block) + +```c +{ + statement1; + statement2; + ... +} +``` + +Blocks create new scopes for local variables. + +### If Statement + +```c +if (condition) + statement + +if (condition) + statement +else + statement +``` + +Examples: +```c +if (x > 0) + printf("positive\n"); + +if (x > 0) { + printf("positive\n"); +} else if (x < 0) { + printf("negative\n"); +} else { + printf("zero\n"); +} +``` + +### While Statement + +```c +while (condition) + statement +``` + +Example: +```c +while (x < 100) { + x = x * 2; +} +``` + +### For Statement + +```c +for (init; condition; increment) + statement +``` + +The `init` can be: +- Empty: `for (; condition; increment)` +- Expression: `for (x = 0; x < 10; x++)` +- Declaration: `for (int32 i = 0; i < 10; i++)` + +Example: +```c +for (int32 i = 0; i < 10; i = i + 1) { + sum = sum + i; +} +``` + +### Switch Statement + +```c +switch (expression) { + case value1: + statements + break; + case value2: + statements + break; + default: + statements +} +``` + +- Cases must be integer constants +- Fall-through is allowed (no automatic break) +- `default` is optional + +Example: +```c +switch (day) { + case 0: + printf("Sunday\n"); + break; + case 6: + printf("Saturday\n"); + break; + default: + printf("Weekday\n"); +} +``` + +### Break Statement + +```c +break; +``` + +Exits the innermost `while`, `for`, or `switch` statement. + +### Continue Statement + +```c +continue; +``` + +Skips to the next iteration of the innermost `while` or `for` loop. + +### Return Statement + +```c +return; // Return from void function +return expression; // Return value +``` + +Example: +```c +return 42; +return x + y; +return; +``` + +--- + +## 8. Functions + +### Function Declarations + +```c +return_type function_name(parameter_list); +``` + +Forward declaration (prototype): +```c +int32 add(int32 a, int32 b); +``` + +### Function Definitions + +```c +return_type function_name(parameter_list) { + statements +} +``` + +Example: +```c +int32 add(int32 a, int32 b) { + return a + b; +} +``` + +### Parameters + +```c +void no_params(void) { } // No parameters +int32 one_param(int32 x) { } // One parameter +int32 two_params(int32 x, uint8 *s) { } // Multiple parameters +``` + +Parameters are passed by value. To modify caller's data, use pointers: + +```c +void swap(int32 *a, int32 *b) { + int32 temp = *a; + *a = *b; + *b = temp; +} +``` + +### Return Values + +```c +int32 get_value(void) { + return 42; +} + +void no_return(void) { + // No return statement needed + return; // Optional +} +``` + +Return value is passed in `eax` register (32-bit). + +### Recursion + +Recursion is fully supported: + +```c +int32 factorial(int32 n) { + if (n <= 1) + return 1; + return n * factorial(n - 1); +} +``` + +### Calling Convention + +Functions use **cdecl** convention: +- Arguments pushed right-to-left on stack +- Caller cleans up stack +- Return value in `eax` +- `eax`, `ecx`, `edx` are caller-saved +- `ebx`, `esi`, `edi`, `ebp` are callee-saved + +### Calling C Functions + +Common can call C library functions: + +```c +// Declare C functions +void printf(uint8 *format, ...); +void *malloc(uint32 size); +void free(void *ptr); + +int32 main(void) { + printf("Hello from Common\n"); + void *mem = malloc(100); + free(mem); + return 0; +} +``` + +**Note**: Variadic functions (`...`) can be declared but not defined in Common. + +--- + +## 9. Scope and Linkage + +### Scope Rules + +**Global scope**: +- Variables and functions declared outside any function +- Visible to all functions in the file + +**Local scope**: +- Variables declared inside a function or block +- Visible only within that function/block +- Shadows global variables with the same name + +**Block scope**: +```c +{ + int32 x = 1; + { + int32 x = 2; // Different variable, shadows outer x + printf("%d\n", x); // Prints 2 + } + printf("%d\n", x); // Prints 1 +} +``` + +### Linkage + +**External linkage** (default for functions): +```c +int32 global_function(void) { ... } +``` +Symbol is exported (`global` directive in assembly). + +**No linkage** (local variables): +```c +void func(void) { + int32 local; // No linkage +} +``` + +**Static linkage**: Not supported. All functions have external linkage. + +### Name Resolution + +1. Check local scope (function parameters and locals) +2. Check global scope +3. If not found, assumed to be external symbol + +--- + +## 10. Memory Model + +### Stack Layout + +``` +High Address ++------------------+ +| Return address | ++------------------+ +| Saved EBP | <-- EBP ++------------------+ +| Local variable 1 | EBP - 4 ++------------------+ +| Local variable 2 | EBP - 8 ++------------------+ +| ... | ++------------------+ +| Array data | (grows down) ++------------------+ <-- ESP +Low Address +``` + +### Function Call Stack + +```c +caller(): + push arg2 + push arg1 + call callee + add esp, 8 // Clean up arguments + +callee(arg1, arg2): + push ebp // Save old frame pointer + mov ebp, esp // Set up new frame + sub esp, N // Allocate locals + ... + mov esp, ebp // Restore stack + pop ebp + ret +``` + +Arguments accessed via `[ebp+8]`, `[ebp+12]`, etc. +Locals accessed via `[ebp-4]`, `[ebp-8]`, etc. + +### Data Sections + +**.text**: Read-only code +```nasm +section .text +function_name: + ; assembly code +``` + +**.data**: Initialized data +```nasm +section .data +global_var: dd 42 +string: db "Hello", 0 +``` + +**.bss**: Zero-initialized data +```nasm +section .bss +uninit_var: resd 1 +array: resb 100 +``` + +### Size Directives + +| Directive | Size | Common Type | +|-----------|-------|----------------| +| `resb`/`db` | 1 byte | uint8/int8 | +| `resw`/`dw` | 2 bytes| uint16/int16 | +| `resd`/`dd` | 4 bytes| uint32/int32/pointers | +| `resq`/`dq` | 8 bytes| uint64/int64 | + +### Alignment + +- Stack is 16-byte aligned (per System V ABI) +- Local variables are 4-byte aligned +- Arrays follow element alignment + +--- + +## 11. Assembly Interface + +### Generated Assembly Structure + +```nasm +BITS 32 +section .text + +; External function declarations +extern printf +extern malloc + +; Exported functions +global main +global my_function + +main: + push ebp + mov ebp, esp + sub esp, 16 ; Allocate locals + ; ... function body ... + mov esp, ebp + pop ebp + ret + +section .data +_s0: db "Hello", 0 ; String literal + +section .bss +global_var: resd 1 ; Global variable +``` + +### Register Usage + +**Caller-saved** (may be modified by called function): +- `eax` - Return value, scratch +- `ecx` - Scratch, left operand +- `edx` - Scratch, division remainder + +**Callee-saved** (preserved across calls): +- `ebx` - Base register +- `esi` - Source index +- `edi` - Destination index +- `ebp` - Frame pointer +- `esp` - Stack pointer + +**Common usage**: +- `eax` - Expression results, return values +- `ecx` - Left operand in binary operations +- `[ebp+N]` - Function parameters +- `[ebp-N]` - Local variables + +### Calling C from Assembly + +```nasm +; Call: printf("Value: %d\n", x); +push dword [ebp-4] ; Push x +push _s0 ; Push format string +call printf +add esp, 8 ; Clean up (2 args × 4 bytes) +``` + +### Inline Assembly + +Not supported. Use C library functions or write separate assembly files. + +--- + +## 12. Limitations + +### Language Limitations + +1. **Single-file compilation only** + - No `#include` or import mechanism + - All code must be in one source file + - Use forward declarations for ordering + +2. **No structures or unions** + - Can simulate with arrays: `node[0]` = data, `node[1]` = next + - Manual offset calculation required + +3. **No floating point** + - Integer arithmetic only + - No `float`, `double`, or `long double` + +4. **No preprocessor** + - No `#define`, `#ifdef`, etc. + - No macro expansion + - No file inclusion + +5. **No enums** + - Use integer constants instead + +6. **Limited 64-bit support** + - 64-bit types exist but operations truncate to 32-bit + - Full 64-bit arithmetic not implemented + +7. **No static/extern keywords** + - All functions are global + - No static local variables + - No explicit extern declarations + +8. **Single-dimensional arrays only** + - Multidimensional arrays not supported + - Can use pointer arithmetic for 2D: `arr[i * width + j]` + +9. **No goto** + - Use loops and breaks instead + +10. **No comma operator** + - Cannot use `a = (b, c)` + +### Implementation Limitations + +1. **Fixed buffer sizes** + - 256 identifiers/strings + - 256 local variables per function + - 256 global variables total + - 512 string literals + +2. **No optimization** + - Generated code is unoptimized + - Expressions fully evaluated (no constant folding) + +3. **Limited error messages** + - Basic syntax errors reported + - No semantic analysis warnings + - No type mismatch warnings + +4. **x86-32 only** + - Not portable to other architectures + - Requires 32-bit toolchain + +### Workarounds + +**Structures**: Use arrays +```c +// Instead of: struct { int x; int y; } point; +int32 point[2]; // point[0] = x, point[1] = y +``` + +**Multidimensional arrays**: Manual indexing +```c +// Instead of: int matrix[10][10]; +int32 matrix[100]; +int32 value = matrix[row * 10 + col]; +``` + +**Enums**: Integer constants +```c +// Instead of: enum { RED, GREEN, BLUE }; +int32 RED = 0; +int32 GREEN = 1; +int32 BLUE = 2; +``` + +--- + +## 13. Examples + +### Hello World + +```c +void puts(uint8 *s); + +int32 main(void) { + puts("Hello, World!"); + return 0; +} +``` + +### Factorial (Iterative) + +```c +int32 factorial(int32 n) { + int32 result = 1; + for (int32 i = 2; i <= n; i = i + 1) { + result = result * i; + } + return result; +} +``` + +### Fibonacci (Recursive) + +```c +int32 fib(int32 n) { + if (n <= 1) + return n; + return fib(n - 1) + fib(n - 2); +} +``` + +### String Length + +```c +int32 strlen(uint8 *s) { + int32 len = 0; + while (s[len]) + len = len + 1; + return len; +} +``` + +### Array Sum + +```c +int32 sum_array(int32 *arr, int32 len) { + int32 total = 0; + for (int32 i = 0; i < len; i = i + 1) { + total = total + arr[i]; + } + return total; +} +``` + +### Pointer Swap + +```c +void swap(int32 *a, int32 *b) { + int32 temp = *a; + *a = *b; + *b = temp; +} +``` + +### Bubble Sort + +```c +void bubble_sort(int32 *arr, int32 n) { + for (int32 i = 0; i < n - 1; i = i + 1) { + for (int32 j = 0; j < n - i - 1; j = j + 1) { + if (arr[j] > arr[j + 1]) { + int32 temp = arr[j]; + arr[j] = arr[j + 1]; + arr[j + 1] = temp; + } + } + } +} +``` + +### Binary Search + +```c +int32 binary_search(int32 *arr, int32 n, int32 target) { + int32 left = 0; + int32 right = n - 1; + + while (left <= right) { + int32 mid = left + (right - left) / 2; + + if (arr[mid] == target) + return mid; + + if (arr[mid] < target) + left = mid + 1; + else + right = mid - 1; + } + + return -1; // Not found +} +``` + +### Linked List (Simulated) + +```c +void *malloc(uint32 size); +void free(void *ptr); + +// Node: [0] = data, [1] = next pointer +int32 *create_node(int32 value) { + int32 *node = (int32*)malloc(8); + node[0] = value; + node[1] = 0; + return node; +} + +void insert_front(int32 **head, int32 value) { + int32 *new_node = create_node(value); + new_node[1] = (int32)(*head); + *head = new_node; +} +``` + +### Bitwise Operations + +```c +// Check if bit N is set +int32 is_bit_set(uint32 value, int32 n) { + return (value >> n) & 1; +} + +// Set bit N +uint32 set_bit(uint32 value, int32 n) { + return value | (1 << n); +} + +// Clear bit N +uint32 clear_bit(uint32 value, int32 n) { + return value & ~(1 << n); +} + +// Toggle bit N +uint32 toggle_bit(uint32 value, int32 n) { + return value ^ (1 << n); +} +``` + +--- + +## Appendix A: Grammar Summary + +``` +program ::= declaration* + +declaration ::= + | type_spec identifier "(" param_list ")" ( ";" | block ) + | type_spec identifier ";" + | type_spec identifier "=" expr ";" + | type_spec identifier "[" expr "]" ";" + | type_spec identifier "[" expr "]" "=" "{" expr_list "}" ";" + +type_spec ::= base_type "*"* + +base_type ::= "void" | "int8" | "int16" | "int32" | "int64" + | "uint8" | "uint16" | "uint32" | "uint64" + +param_list ::= "void" | ( param ( "," param )* )? + +param ::= type_spec identifier + +block ::= "{" statement* "}" + +statement ::= + | block + | type_spec identifier ";" + | type_spec identifier "=" expr ";" + | type_spec identifier "[" expr "]" ( "=" "{" expr_list "}" )? ";" + | expr ";" + | "if" "(" expr ")" statement ( "else" statement )? + | "while" "(" expr ")" statement + | "for" "(" (decl | expr)? ";" expr? ";" expr? ")" statement + | "switch" "(" expr ")" "{" case_clause* "}" + | "return" expr? ";" + | "break" ";" + | "continue" ";" + +case_clause ::= + | "case" expr ":" statement* + | "default" ":" statement* + +expr ::= assignment + +assignment ::= ternary ( assign_op ternary )? + +assign_op ::= "=" | "+=" | "-=" | "*=" | "/=" | "%=" + | "&=" | "|=" | "^=" | "<<=" | ">>=" + +ternary ::= logical_or ( "?" expr ":" ternary )? + +logical_or ::= logical_and ( "||" logical_and )* + +logical_and ::= bit_or ( "&&" bit_or )* + +bit_or ::= bit_xor ( "|" bit_xor )* + +bit_xor ::= bit_and ( "^" bit_and )* + +bit_and ::= equality ( "&" equality )* + +equality ::= relational ( ("==" | "!=") relational )* + +relational ::= shift ( ("<" | "<=" | ">" | ">=") shift )* + +shift ::= additive ( ("<<" | ">>") additive )* + +additive ::= multiplicative ( ("+" | "-") multiplicative )* + +multiplicative ::= unary ( ("*" | "/" | "%") unary )* + +unary ::= + | postfix + | "++" unary + | "--" unary + | "-" unary + | "!" unary + | "~" unary + | "&" unary + | "*" unary + | "(" type_spec ")" unary + +postfix ::= + | primary + | postfix "[" expr "]" + | postfix "(" expr_list? ")" + | postfix "++" + | postfix "--" + +primary ::= + | integer_literal + | string_literal + | identifier + | "(" expr ")" +``` + +--- + +## Appendix B: Quick Reference Card + +**Types**: void, int8, int16, int32, int64, uint8, uint16, uint32, uint64 + +**Operators**: + - * / % & | ^ ~ ! < > <= >= == != << >> && || ?: = += -= *= /= %= &= |= ^= <<= >>= ++ -- & * [] + +**Keywords**: if else while for switch case default break continue return + +**Control**: if/else, while, for, switch/case, break, continue, return + +**Functions**: type name(params) { body } + +**Arrays**: type name[size], type name[size] = { values } + +**Pointers**: type *name, &var, *ptr, ptr[index] + +**Comments**: // line, /* block */ + +--- + +*End of Common Language Reference Manual* diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..48c6654 --- /dev/null +++ b/Makefile @@ -0,0 +1,101 @@ +# Makefile for Common compiler test suite and examples + +CC = gcc +CFLAGS = -std=c99 -Wall -O2 +NASM = nasm +NASMFLAGS = -f elf32 +LD = gcc +LDFLAGS = -m32 + +# Compiler +COMPILER = common +COMPILER_SRC = common.c + +# Test runner +TEST_RUNNER = test_runner +TEST_RUNNER_SRC = test_runner.c + +# Example programs +EXAMPLES_DIR = examples +EXAMPLES = hello fibonacci arrays pointers bubblesort bitwise types switch primes strings calculator + +# Default target +.PHONY: all +all: $(COMPILER) $(TEST_RUNNER) + +# Build the Common compiler +$(COMPILER): $(COMPILER_SRC) + $(CC) $(CFLAGS) -o $@ $< + @echo "Built Common compiler" + +# Build the test runner +$(TEST_RUNNER): $(TEST_RUNNER_SRC) + $(CC) $(CFLAGS) -o $@ $< + @echo "Built test runner" + +# Run tests +.PHONY: test +test: $(COMPILER) $(TEST_RUNNER) + @echo "Running test suite..." + ./$(TEST_RUNNER) + +# Build all examples +.PHONY: examples +examples: $(COMPILER) $(EXAMPLES) + +# Pattern rule for building examples +# Usage: make hello, make fibonacci, etc. +$(EXAMPLES): %: $(EXAMPLES_DIR)/%.cm $(COMPILER) + @echo "Building $@..." + ./$(COMPILER) $(EXAMPLES_DIR)/$@.cm $@.asm + $(NASM) $(NASMFLAGS) $@.asm -o $@.o + $(LD) $(LDFLAGS) $@.o -o $@ + @echo "Built $@ successfully" + +# Run all examples +.PHONY: run-examples +run-examples: examples + @echo "=== Running Examples ===" + @for prog in $(EXAMPLES); do \ + echo ""; \ + echo ">>> Running $$prog..."; \ + ./$$prog || true; \ + done + +# Clean all generated files +.PHONY: clean +clean: + rm -f $(COMPILER) $(TEST_RUNNER) + rm -f *.asm *.o $(EXAMPLES) + rm -f /tmp/test.cm /tmp/test.asm /tmp/test.o /tmp/test /tmp/test.out /tmp/test.err + @echo "Cleaned all generated files" + +# Help +.PHONY: help +help: + @echo "Common Compiler Makefile" + @echo "" + @echo "Targets:" + @echo " all - Build compiler and test runner (default)" + @echo " test - Run the test suite" + @echo " examples - Build all example programs" + @echo " run-examples - Build and run all examples" + @echo " clean - Remove all generated files" + @echo "" + @echo "Build individual examples:" + @echo " make hello" + @echo " make fibonacci" + @echo " make arrays" + @echo " make pointers" + @echo " make bubblesort" + @echo " make bitwise" + @echo " make types" + @echo " make switch" + @echo " make primes" + @echo " make strings" + @echo " make calculator" + @echo "" + @echo "Run individual examples:" + @echo " make hello && ./hello" + @echo " make fibonacci && ./fibonacci" + @echo " etc." diff --git a/QUICKREF.md b/QUICKREF.md new file mode 100644 index 0000000..7d80914 --- /dev/null +++ b/QUICKREF.md @@ -0,0 +1,332 @@ +# Common Language Quick Reference + +## Compilation + +```bash +./common source.cm output.asm +nasm -f elf32 output.asm -o output.o +gcc -m32 output.o -o program +``` + +## Types + +```c +void // No return value +int8 int16 int32 int64 // Signed integers +uint8 uint16 uint32 uint64 // Unsigned integers +type* // Pointer to type +type[N] // Array of N elements +``` + +## Variables + +```c +int32 x; // Declaration +int32 y = 42; // Initialization +int32 arr[10]; // Array +int32 nums[3] = {1, 2, 3}; // Array with initializer +int32 *ptr; // Pointer +``` + +## Operators + +### Arithmetic +```c ++ - * / % // Add, Sub, Mul, Div, Mod +``` + +### Comparison +```c +== != < <= > >= // Equal, Not-equal, Less, etc. +``` + +### Logical +```c +&& || ! // AND, OR, NOT (short-circuit) +``` + +### Bitwise +```c +& | ^ ~ // AND, OR, XOR, NOT +<< >> // Left shift, Right shift +``` + +### Assignment +```c += += -= *= /= %= // Assign, Add-assign, etc. +&= |= ^= <<= >>= // Bitwise assign ops +``` + +### Increment/Decrement +```c +++ -- // Increment, Decrement (pre/post) +``` + +### Pointer/Array +```c +&x // Address of x +*ptr // Dereference ptr +arr[i] // Array index (same as *(arr+i)) +``` + +### Ternary +```c +cond ? true_val : false_val // Conditional expression +``` + +## Control Flow + +### If-Else +```c +if (condition) + statement; + +if (condition) { + statements; +} else { + statements; +} +``` + +### While Loop +```c +while (condition) { + statements; +} +``` + +### For Loop +```c +for (int32 i = 0; i < n; i++) { + statements; +} +``` + +### Switch +```c +switch (expr) { + case 1: + statements; + break; + case 2: + statements; + break; + default: + statements; +} +``` + +### Break/Continue/Return +```c +break; // Exit loop or switch +continue; // Next loop iteration +return; // Return from void function +return expr; // Return value +``` + +## Functions + +### Declaration +```c +int32 add(int32 a, int32 b); // Forward declaration +``` + +### Definition +```c +int32 add(int32 a, int32 b) { + return a + b; +} +``` + +### No Parameters +```c +void func(void) { + // ... +} +``` + +### Main Function +```c +int32 main(void) { + // Entry point + return 0; +} +``` + +## Pointers + +```c +int32 x = 42; +int32 *p = &x; // p points to x +*p = 100; // Set x to 100 via pointer +int32 y = *p; // Read through pointer +``` + +## Arrays + +```c +int32 arr[5]; // Declare +arr[0] = 10; // Set element +int32 x = arr[2]; // Get element + +// Array initialization +int32 nums[5] = {1, 2, 3, 4, 5}; + +// Arrays decay to pointers +int32 *p = arr; // p points to arr[0] +``` + +## Strings + +```c +uint8 *str = "Hello"; // String literal +printf("%s\n", str); // Print string + +// String as array +uint8 msg[] = "Hello"; +msg[0] = 'h'; // Modify +``` + +## Comments + +```c +// Single-line comment + +/* Multi-line + comment */ +``` + +## Type Casting + +```c +(type)expression // Cast to type + +int32 x = 1000; +uint8 y = (uint8)x; // Truncate to 8 bits + +uint8 *s = (uint8*)"string"; // Pointer cast +``` + +## Common Patterns + +### Swap +```c +void swap(int32 *a, int32 *b) { + int32 temp = *a; + *a = *b; + *b = temp; +} +``` + +### String Length +```c +int32 strlen(uint8 *s) { + int32 len = 0; + while (s[len]) len++; + return len; +} +``` + +### Array Sum +```c +int32 sum(int32 *arr, int32 n) { + int32 total = 0; + for (int32 i = 0; i < n; i++) + total += arr[i]; + return total; +} +``` + +### Min/Max +```c +int32 min(int32 a, int32 b) { + return (a < b) ? a : b; +} + +int32 max(int32 a, int32 b) { + return (a > b) ? a : b; +} +``` + +## Calling C Functions + +```c +// Declare before use +void printf(uint8 *fmt, ...); +void *malloc(uint32 size); +void free(void *ptr); + +// Use +printf("Value: %d\n", x); +void *mem = malloc(100); +free(mem); +``` + +## Operator Precedence (High to Low) + +1. `()` `[]` `.` `->` +2. `!` `~` `++` `--` `+` `-` `*` `&` (unary) `(cast)` +3. `*` `/` `%` +4. `+` `-` +5. `<<` `>>` +6. `<` `<=` `>` `>=` +7. `==` `!=` +8. `&` +9. `^` +10. `|` +11. `&&` +12. `||` +13. `?:` +14. `=` `+=` `-=` etc. + +## Limitations + +- No structs/unions (use arrays) +- No enums (use int32 constants) +- No floats (integers only) +- No preprocessor (#define, #include) +- Single file compilation only +- 1D arrays only (simulate 2D: `arr[row*width+col]`) +- No goto +- No static/extern keywords +- 64-bit types partially supported + +## Common Gotchas + +```c +// Assignment vs. Equality +if (x = 5) // WRONG: assigns 5 to x +if (x == 5) // RIGHT: compares x to 5 + +// Array indexing +int32 arr[10]; +arr[10] = 0; // WRONG: out of bounds +arr[9] = 0; // RIGHT: last element + +// Pointer arithmetic scales by type size +int32 *p = arr; +p + 1; // Points 4 bytes ahead (size of int32) + +// Semicolons required +if (x > 0) + y = 1 // WRONG: missing semicolon + +if (x > 0) + y = 1; // RIGHT +``` + +## Error Messages + +``` +line N: syntax error near 'token' +line N: Unknown char 'X' +line N: expected expression +line N: too many locals/globals/strings +``` + +Check: +- Missing semicolons +- Mismatched braces/parentheses +- Undeclared variables +- Type mismatches +- Buffer limits exceeded diff --git a/README.md b/README.md new file mode 100644 index 0000000..41dde98 --- /dev/null +++ b/README.md @@ -0,0 +1,349 @@ +# Common Programming Language + +A minimalist, dependency-free compiler for a C-like language that targets x86-32 assembly. + +## Overview + +Common is a statically-typed systems programming language with: +- **No runtime dependencies** - compiles to standalone executables +- **Direct C interoperability** - call and be called by C code +- **Predictable codegen** - straightforward mapping to assembly +- **Complete type system** - 8 integer types, pointers, arrays +- **Full control flow** - if/else, loops, switch, functions +- **Zero external dependencies** - just libc, gcc, and nasm + +## Quick Start + +### Build the Compiler + +```bash +gcc -o common common.c +``` + +### Hello World + +Create `hello.cm`: +```c +void puts(uint8 *s); + +int32 main(void) { + puts("Hello, World!"); + return 0; +} +``` + +Compile and run: +```bash +./common hello.cm hello.asm +nasm -f elf32 hello.asm -o hello.o +gcc -m32 hello.o -o hello +./hello +``` + +Or use the Makefile: +```bash +make # Build compiler and test suite +make test # Run all tests +make hello # Build hello example +make examples # Build all examples +make run-examples # Build and run all examples +``` + +## Documentation + +### For Users + +- **[Quick Reference](QUICKREF.md)** - One-page cheat sheet for syntax and operators +- **[Reference Manual](MANUAL.md)** - Complete language specification (80+ pages) +- **[Troubleshooting Guide](TROUBLESHOOTING.md)** - Solutions to common problems + +### For Developers + +- **[Test Suite README](README_TESTS.md)** - How to run and write tests +- **[Source Code](common.c)** - Well-commented compiler implementation + +## Language Features + +### Types + +```c +// Integers +int8 int16 int32 int64 // Signed +uint8 uint16 uint32 uint64 // Unsigned + +// Pointers and arrays +int32 *ptr; // Pointer +int32 arr[10]; // Array +uint8 *str = "text"; // String +``` + +### Control Flow + +```c +if (x > 0) { ... } +while (x < 100) { ... } +for (int32 i = 0; i < n; i++) { ... } +switch (x) { case 1: ... break; } +``` + +### Operators + +```c +// Arithmetic: + - * / % +// Comparison: == != < <= > >= +// Logical: && || ! +// Bitwise: & | ^ ~ << >> +// Pointers: & * +// Increment: ++ -- +``` + +### Functions + +```c +int32 add(int32 a, int32 b) { + return a + b; +} + +int32 factorial(int32 n) { + if (n <= 1) return 1; + return n * factorial(n - 1); +} +``` + +## Example Programs + +All examples are in the `examples/` directory: + +| Program | Description | +|---------|-------------| +| **hello.cm** | Hello World | +| **fibonacci.cm** | Recursive Fibonacci | +| **arrays.cm** | Array operations | +| **pointers.cm** | Pointer manipulation | +| **bubblesort.cm** | Bubble sort algorithm | +| **bitwise.cm** | Bitwise operations | +| **types.cm** | Type casting examples | +| **switch.cm** | Switch statements | +| **primes.cm** | Prime number calculator | +| **strings.cm** | String functions | +| **calculator.cm** | Expression evaluator | +| **linkedlist.cm** | Linked list (simulated) | + +Build any example: +```bash +make fibonacci && ./fibonacci +make bubblesort && ./bubblesort +``` + +## Test Suite + +The test suite includes 60+ automated tests covering: +- Arithmetic and operators +- Variables and arrays +- Control flow +- Functions and recursion +- Pointers and type casting +- All integer types + +Run tests: +```bash +make test +# or +./run_tests.sh +``` + +## Compilation Pipeline + +``` +source.cm → [common compiler] → output.asm → [nasm] → output.o → [gcc] → executable +``` + +1. **Common compiler**: Parses source, generates NASM assembly +2. **NASM**: Assembles to ELF32 object file +3. **GCC**: Links with C runtime library + +## Requirements + +- **GCC** with 32-bit support (gcc-multilib) +- **NASM** assembler +- **Linux** or compatible environment (WSL works) + +Installation: +```bash +# Ubuntu/Debian +sudo apt-get install build-essential gcc-multilib nasm + +# Fedora/RHEL +sudo dnf install gcc glibc-devel.i686 nasm + +# Arch +sudo pacman -S gcc lib32-gcc-libs nasm +``` + +## Language Limitations + +- **Single file compilation** - no modules or includes +- **No structs/unions** - use arrays for structured data +- **No floating point** - integers only +- **No preprocessor** - no #define, #include +- **1D arrays only** - simulate 2D with manual indexing +- **Partial 64-bit support** - types exist but ops truncate to 32-bit + +See [MANUAL.md](MANUAL.md) for complete details and workarounds. + +## Implementation Details + +**Target**: x86-32 (IA-32) ELF +**Calling convention**: cdecl +**Stack alignment**: 16-byte (System V ABI) +**Registers**: +- `eax`: return values, expressions +- `ecx`: left operand +- `edx`: scratch +- `ebp`: frame pointer +- `esp`: stack pointer + +**Code sections**: +- `.text`: executable code +- `.data`: initialized globals, strings +- `.bss`: zero-initialized globals + +## Architecture + +The compiler is a single-pass implementation in C99: + +``` +┌─────────────┐ +│ Lexer │ Tokenize source +├─────────────┤ +│ Parser │ Build AST +├─────────────┤ +│ Type Check │ Infer expression types +├─────────────┤ +│ Code Gen │ Emit NASM assembly +└─────────────┘ +``` + +Key components: +- **Lexer** (150 LOC): Tokenization with lookahead +- **Parser** (400 LOC): Recursive descent parser +- **Type System** (200 LOC): Type inference for pointer arithmetic +- **Code Generator** (800 LOC): Assembly emission + +Total: ~2000 lines of C99 + +## C Interoperability + +Common can call C functions: + +```c +// Declare C functions +void printf(uint8 *fmt, ...); +void *malloc(uint32 size); +void free(void *ptr); + +int32 main(void) { + printf("Allocated %d bytes\n", 100); + void *mem = malloc(100); + free(mem); + return 0; +} +``` + +C can call Common functions: +```c +// common.cm +int32 compute(int32 x) { + return x * x; +} + +// main.c +extern int compute(int); +int main() { + printf("%d\n", compute(10)); +} +``` + +Compile: +```bash +./common common.cm common.asm +nasm -f elf32 common.asm -o common.o +gcc -m32 main.c common.o -o program +``` + +## Comparison to C + +### Similar to C +- Syntax and semantics +- Type system (with fewer types) +- Pointer arithmetic +- Control flow +- Function calls (cdecl) + +### Different from C +- No preprocessor +- No structs/unions +- No enums +- No static/extern keywords +- No goto +- Single file only +- Simpler type system + +### Simpler than C +- No type qualifiers (const, volatile) +- No storage classes (auto, register) +- No function pointers (can cast to void*) +- No variadic function definitions +- No bitfields +- No flexible array members + +## Project Structure + +``` +. +├── common.c # Compiler source (2000 LOC) +├── Makefile # Build automation +├── run_tests.sh # Quick test script +│ +├── MANUAL.md # Complete language reference +├── QUICKREF.md # One-page cheat sheet +├── TROUBLESHOOTING.md # Problem solutions +├── README_TESTS.md # Test suite documentation +│ +├── test_runner.c # Automated test harness +│ +└── examples/ # Example programs + ├── hello.cm + ├── fibonacci.cm + ├── arrays.cm + ├── pointers.cm + ├── bubblesort.cm + ├── bitwise.cm + ├── types.cm + ├── switch.cm + ├── primes.cm + ├── strings.cm + ├── calculator.cm + └── linkedlist.cm +``` + +## License + +Public domain / CC0. Use freely for any purpose. + +## Credits + +Inspired by: +- **C** - Dennis Ritchie and Brian Kernighan +- **chibicc** - Rui Ueyama's educational C compiler +- **8cc** - Rui Ueyama's C compiler +- **tcc** - Fabrice Bellard's Tiny C Compiler + +Built for programmers who value: +- Simplicity over features +- Control over convenience +- Learning over abstraction + +--- + +Start with the [Quick Reference](QUICKREF.md) or dive into the [Manual](MANUAL.md). diff --git a/README_TESTS.md b/README_TESTS.md new file mode 100644 index 0000000..440d7ad --- /dev/null +++ b/README_TESTS.md @@ -0,0 +1,286 @@ +# Common Compiler - Test Suite and Examples + +This directory contains a comprehensive test suite and example programs for the Common programming language compiler. + +## Building the Compiler + +First, build the compiler: + +```bash +gcc -o common common.c +``` + +## Test Suite + +### Building and Running Tests + +The test suite is a dependency-free C99 program that automatically compiles, assembles, links, and runs test programs. + +```bash +# Build the test runner +gcc -std=c99 -o test_runner test_runner.c + +# Run all tests +./test_runner +``` + +The test runner will: +1. Compile each test program with the Common compiler +2. Assemble with NASM +3. Link with GCC +4. Execute and verify the results +5. Report pass/fail status + +### Test Coverage + +The test suite includes over 60 tests covering: + +- **Arithmetic**: add, subtract, multiply, divide, modulo +- **Variables**: local and global variables with initialization +- **Control Flow**: if/else, while, for, switch/case, break, continue +- **Operators**: + - Comparison: ==, !=, <, <=, >, >= + - Logical: &&, ||, ! + - Bitwise: &, |, ^, ~, <<, >> + - Increment/Decrement: ++, -- + - Compound Assignment: +=, -=, *=, /=, etc. +- **Functions**: calls, recursion, multiple parameters +- **Arrays**: declaration, initialization, indexing +- **Pointers**: address-of, dereference, pointer arithmetic +- **Type Casting**: explicit casts between integer types +- **Integer Types**: uint8, uint16, uint32, uint64, int8, int16, int32, int64 +- **Ternary Operator**: ? : + +## Example Programs + +The `examples/` directory contains practical programs demonstrating Common language features. + +### Compiling Examples + +All examples follow this pattern: + +```bash +./common examples/hello.cm hello.asm +nasm -f elf32 hello.asm -o hello.o +gcc -m32 hello.o -o hello +./hello +``` + +Or use this one-liner: + +```bash +./common examples/hello.cm hello.asm && nasm -f elf32 hello.asm && gcc -m32 hello.o -o hello && ./hello +``` + +### Available Examples + +#### hello.cm +Basic "Hello, World!" program. + +```bash +./common examples/hello.cm hello.asm && nasm -f elf32 hello.asm && gcc -m32 hello.o -o hello +./hello +``` + +#### fibonacci.cm +Recursive Fibonacci number calculator. Demonstrates: +- Function recursion +- Conditionals +- Loops + +```bash +./common examples/fibonacci.cm fib.asm && nasm -f elf32 fib.asm && gcc -m32 fib.o -o fib +./fib +``` + +#### arrays.cm +Array manipulation with sum and reverse operations. Demonstrates: +- Array initialization +- Array traversal +- Function parameters with arrays +- Array modification + +```bash +./common examples/arrays.cm arrays.asm && nasm -f elf32 arrays.asm && gcc -m32 arrays.o -o arrays +./arrays +``` + +#### pointers.cm +Pointer operations and pointer arithmetic. Demonstrates: +- Pointer declaration and dereferencing +- Address-of operator +- Pointer arithmetic +- Pointer to pointer +- Pass-by-reference with pointers + +```bash +./common examples/pointers.cm ptrs.asm && nasm -f elf32 ptrs.asm && gcc -m32 ptrs.o -o ptrs +./ptrs +``` + +#### bubblesort.cm +Bubble sort implementation. Demonstrates: +- Nested loops +- Array sorting +- Swap algorithm + +```bash +./common examples/bubblesort.cm sort.asm && nasm -f elf32 sort.asm && gcc -m32 sort.o -o sort +./sort +``` + +#### bitwise.cm +Comprehensive bitwise operations. Demonstrates: +- Bitwise AND, OR, XOR, NOT +- Bit shifts +- Bit counting +- Bit manipulation algorithms + +```bash +./common examples/bitwise.cm bits.asm && nasm -f elf32 bits.asm && gcc -m32 bits.o -o bits +./bits +``` + +#### types.cm +Different integer types and type casting. Demonstrates: +- uint8, uint16, uint32 +- int8, int16, int32 +- Type casting +- Sign extension +- Truncation behavior + +```bash +./common examples/types.cm types.asm && nasm -f elf32 types.asm && gcc -m32 types.o -o types +./types +``` + +#### switch.cm +Switch/case statement usage. Demonstrates: +- Switch statements +- Case labels +- Default case +- Fall-through behavior + +```bash +./common examples/switch.cm switch.asm && nasm -f elf32 switch.asm && gcc -m32 switch.o -o switch +./switch +``` + +#### primes.cm +Prime number calculator. Demonstrates: +- Mathematical algorithms +- Optimized loop conditions +- Complex conditionals + +```bash +./common examples/primes.cm primes.asm && nasm -f elf32 primes.asm && gcc -m32 primes.o -o primes +./primes +``` + +#### strings.cm +String manipulation functions. Demonstrates: +- String literals +- Character arrays +- String length, copy, compare +- String reversal + +```bash +./common examples/strings.cm strings.asm && nasm -f elf32 strings.asm && gcc -m32 strings.o -o strings +./strings +``` + +#### calculator.cm +Expression calculator with global state. Demonstrates: +- Global variables +- Multiple function definitions +- Function composition +- Error handling +- State tracking + +```bash +./common examples/calculator.cm calc.asm && nasm -f elf32 calc.asm && gcc -m32 calc.o -o calc +./calc +``` + +## Common Language Quick Reference + +### Types +```c +uint8, uint16, uint32, uint64 +int8, int16, int32, int64 +void +``` + +### Variables +```c +int32 x; // Declaration +int32 y = 42; // Declaration with initialization +int32 arr[10]; // Array declaration +int32 *ptr; // Pointer declaration +``` + +### Control Flow +```c +if (condition) { ... } +if (condition) { ... } else { ... } +while (condition) { ... } +for (init; condition; increment) { ... } +switch (expr) { + case value: ... break; + default: ... +} +``` + +### Operators +```c +// Arithmetic: +, -, *, /, % +// Comparison: ==, !=, <, <=, >, >= +// Logical: &&, ||, ! +// Bitwise: &, |, ^, ~, <<, >> +// Assignment: =, +=, -=, *=, /=, %=, &=, |=, ^=, <<=, >>= +// Increment/Decrement: ++, -- +// Ternary: ? : +// Address: &, * +``` + +### Functions +```c +int32 function_name(int32 param1, uint8 *param2) { + return value; +} +``` + +### Comments +```c +// Single line comment +/* Multi-line + comment */ +``` + +## Requirements + +- GCC with 32-bit support (gcc-multilib) +- NASM assembler +- Linux (or compatible environment) + +On Ubuntu/Debian: +```bash +sudo apt-get install gcc-multilib nasm +``` + +## Troubleshooting + +**Error: "cannot find -lgcc_s"** +- Install 32-bit libraries: `sudo apt-get install gcc-multilib` + +**Error: "nasm: command not found"** +- Install NASM: `sudo apt-get install nasm` + +**Test failures** +- Ensure the compiler binary is in the current directory: `./common` +- Check that you have write access to `/tmp/` +- Verify 32-bit support is installed + +## License + +The test suite and examples are provided as-is for testing and demonstration purposes. diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md new file mode 100644 index 0000000..f2aca50 --- /dev/null +++ b/TROUBLESHOOTING.md @@ -0,0 +1,623 @@ +# Common Compiler Troubleshooting Guide + +## Installation Issues + +### Problem: "gcc: command not found" + +**Solution**: Install GCC +```bash +# Ubuntu/Debian +sudo apt-get install build-essential + +# Fedora/RHEL +sudo dnf install gcc + +# Arch +sudo pacman -S gcc +``` + +### Problem: "nasm: command not found" + +**Solution**: Install NASM assembler +```bash +# Ubuntu/Debian +sudo apt-get install nasm + +# Fedora/RHEL +sudo dnf install nasm + +# Arch +sudo pacman -S nasm +``` + +### Problem: "fatal error: bits/libc-header-start.h: No such file" + +**Cause**: Missing 32-bit development libraries + +**Solution**: Install 32-bit support +```bash +# Ubuntu/Debian +sudo apt-get install gcc-multilib + +# Fedora/RHEL +sudo dnf install glibc-devel.i686 libgcc.i686 +``` + +--- + +## Compilation Issues + +### Problem: Compiler fails to build + +**Error**: +``` +gcc -o common common.c +common.c:15:10: fatal error: stdio.h: No such file or directory +``` + +**Solution**: Install build essentials +```bash +sudo apt-get install build-essential +``` + +### Problem: "Permission denied" when running compiler + +**Solution**: Make compiler executable +```bash +chmod +x ./common +``` + +Or run directly: +```bash +gcc -o common common.c +./common source.cm output.asm +``` + +--- + +## Assembly Issues + +### Problem: "error: invalid combination of opcode and operands" + +**Cause**: NASM version incompatibility or corrupt assembly output + +**Debug Steps**: +1. Check assembly output: + ```bash + ./common source.cm output.asm + cat output.asm + ``` + +2. Verify NASM version: + ```bash + nasm -version # Should be 2.x or higher + ``` + +3. Try manual assembly: + ```bash + nasm -f elf32 output.asm -o output.o + ``` + +### Problem: "undefined reference to function_name" + +**Cause**: Function called but not defined or linked + +**Solutions**: + +1. **Missing function definition**: + ```c + // Declare AND define the function + int32 helper(int32 x) { + return x * 2; + } + ``` + +2. **C library function not linked**: + ```bash + # Make sure you're linking with gcc + gcc -m32 output.o -o program + # NOT: ld output.o -o program + ``` + +3. **External library needed**: + ```bash + gcc -m32 output.o -lm -o program # Link math library + ``` + +--- + +## Linker Issues + +### Problem: "cannot find -lgcc_s" + +**Cause**: Missing 32-bit GCC support libraries + +**Solution**: +```bash +sudo apt-get install gcc-multilib +``` + +### Problem: "/usr/bin/ld: i386 architecture of input file is incompatible with i386:x86-64" + +**Cause**: Mixing 32-bit and 64-bit object files + +**Solution**: Ensure consistent 32-bit compilation: +```bash +nasm -f elf32 output.asm -o output.o # Must be elf32 +gcc -m32 output.o -o program # Must use -m32 +``` + +### Problem: "undefined reference to main" + +**Cause**: No main function in source + +**Solution**: Add main function: +```c +int32 main(void) { + // Your code here + return 0; +} +``` + +--- + +## Runtime Issues + +### Problem: Segmentation fault + +**Common Causes**: + +1. **Null pointer dereference**: + ```c + int32 *ptr = 0; + *ptr = 42; // CRASH: dereferencing NULL + ``` + + **Fix**: Check pointers before dereferencing + ```c + if (ptr != 0) { + *ptr = 42; + } + ``` + +2. **Array out of bounds**: + ```c + int32 arr[10]; + arr[10] = 5; // CRASH: index 10 is out of bounds (0-9) + ``` + + **Fix**: Check array bounds + ```c + if (index < 10) { + arr[index] = 5; + } + ``` + +3. **Stack overflow (infinite recursion)**: + ```c + int32 recurse(int32 n) { + return recurse(n); // CRASH: no base case + } + ``` + + **Fix**: Add base case + ```c + int32 recurse(int32 n) { + if (n <= 0) return 0; + return recurse(n - 1); + } + ``` + +4. **Writing to read-only memory**: + ```c + uint8 *str = "constant"; + str[0] = 'C'; // CRASH: string literals are read-only + ``` + + **Fix**: Use array for modifiable strings + ```c + uint8 str[20]; + str[0] = 'C'; // OK + ``` + +### Problem: Wrong output values + +**Debug Steps**: + +1. **Check integer overflow**: + ```c + int8 x = 127; + x = x + 1; // Wraps to -128 + ``` + +2. **Check division by zero**: + ```c + int32 result = 10 / 0; // Undefined behavior + ``` + + **Fix**: + ```c + if (divisor != 0) { + result = dividend / divisor; + } + ``` + +3. **Check type truncation**: + ```c + int32 large = 1000; + uint8 small = (uint8)large; // Truncated to 232 (1000 % 256) + ``` + +### Problem: Program hangs / infinite loop + +**Common Causes**: + +1. **Loop condition never false**: + ```c + uint32 i = 10; + while (i >= 0) { // INFINITE: unsigned i never < 0 + i = i - 1; + } + ``` + + **Fix**: + ```c + int32 i = 10; + while (i >= 0) { + i = i - 1; + } + ``` + +2. **Missing loop increment**: + ```c + for (int32 i = 0; i < 10; ) { // Missing i++ + // ... + } + ``` + +--- + +## Compiler Error Messages + +### "line N: syntax error near 'token'" + +**Causes**: +- Missing semicolon +- Mismatched braces/parentheses +- Invalid expression syntax + +**Debug**: +1. Check line N and surrounding lines +2. Look for missing `;` on previous line +3. Count braces: `{` should match `}` +4. Check operator usage + +**Example**: +```c +int32 x = 10 // ERROR: missing semicolon +int32 y = 20; +``` + +### "line N: Unknown char 'X'" + +**Cause**: Invalid character in source + +**Common Examples**: +- Smart quotes: `"` `"` instead of `"` +- Non-ASCII characters +- Tab characters in wrong places + +**Fix**: Use plain ASCII text editor + +### "line N: expected expression" + +**Cause**: Invalid or incomplete expression + +**Example**: +```c +int32 x = ; // ERROR: no expression after = +int32 y = + 5; // ERROR: + needs left operand +``` + +### "too many locals" + +**Cause**: More than 256 local variables in a function + +**Solution**: +1. Reduce number of variables +2. Use arrays instead of individual variables +3. Split into multiple functions + +### "too many strings" + +**Cause**: More than 512 string literals in program + +**Solution**: +1. Reuse string literals +2. Build strings programmatically +3. Use character arrays + +--- + +## Debugging Techniques + +### Print Debugging + +```c +void printf(uint8 *fmt, ...); + +int32 main(void) { + int32 x = 10; + printf("x = %d\n", x); // Print values + + int32 *ptr = &x; + printf("ptr = %p, *ptr = %d\n", ptr, *ptr); // Print pointers + + return 0; +} +``` + +### Check Assembly Output + +```bash +./common source.cm output.asm +less output.asm # Review generated assembly +``` + +Look for: +- Correct function labels +- Proper stack setup +- Expected instructions + +### Use GDB + +```bash +# Compile with debug info +gcc -m32 -g output.o -o program + +# Run in debugger +gdb ./program + +# GDB commands: +(gdb) break main # Set breakpoint at main +(gdb) run # Run program +(gdb) next # Step to next line +(gdb) print x # Print variable x +(gdb) backtrace # Show call stack +(gdb) quit # Exit gdb +``` + +### Valgrind (Memory Errors) + +```bash +# Install valgrind +sudo apt-get install valgrind + +# Run with valgrind +valgrind --leak-check=full ./program +``` + +--- + +## Common Mistakes + +### 1. Assignment in Condition + +**Wrong**: +```c +if (x = 5) { // Assigns 5 to x, always true + // ... +} +``` + +**Right**: +```c +if (x == 5) { // Compares x to 5 + // ... +} +``` + +### 2. Infinite Loop with Unsigned + +**Wrong**: +```c +for (uint32 i = 10; i >= 0; i--) { // Infinite: unsigned never < 0 + // ... +} +``` + +**Right**: +```c +for (int32 i = 10; i >= 0; i--) { + // ... +} +``` + +### 3. Pointer vs. Value + +**Wrong**: +```c +void increment(int32 x) { + x = x + 1; // Only modifies local copy +} + +int32 val = 5; +increment(val); +// val is still 5 +``` + +**Right**: +```c +void increment(int32 *x) { + *x = *x + 1; // Modifies through pointer +} + +int32 val = 5; +increment(&val); +// val is now 6 +``` + +### 4. Array Decay + +**Confusing**: +```c +int32 arr[10]; +int32 *ptr = arr; // arr decays to pointer + +// arr and &arr are different: +arr // Pointer to first element (type: int32*) +&arr // Pointer to entire array (type: int32(*)[10]) +``` + +### 5. String Modification + +**Wrong**: +```c +uint8 *str = "Hello"; +str[0] = 'h'; // CRASH: string literal is read-only +``` + +**Right**: +```c +uint8 str[20] = "Hello"; // Array, modifiable +str[0] = 'h'; // OK +``` + +--- + +## Performance Issues + +### Slow Compilation + +**Causes**: +- Very large source file +- Many string literals + +**Solutions**: +- None (no optimization flags) +- Split code into modules (not supported in single-file compiler) + +### Slow Execution + +**Common Causes**: + +1. **Inefficient algorithms**: + ```c + // O(n²) - slow + for (int32 i = 0; i < n; i++) + for (int32 j = 0; j < n; j++) + // ... + ``` + +2. **Excessive function calls**: + ```c + // Fibonacci - exponential time + int32 fib(int32 n) { + if (n <= 1) return n; + return fib(n-1) + fib(n-2); // Recomputes same values + } + ``` + + **Fix**: Use iterative version or memoization + +3. **No optimizations**: The compiler doesn't optimize. Write efficient code. + +--- + +## Getting Help + +### Information to Provide + +When asking for help, include: + +1. **Source code** (minimal example that reproduces issue) +2. **Compilation command** used +3. **Full error message** (copy-paste, not screenshot) +4. **System information**: + ```bash + uname -a + gcc --version + nasm -version + ``` + +### Minimal Example + +Reduce your code to the smallest program that shows the problem: + +```c +// Minimal example showing segfault +int32 main(void) { + int32 *ptr = 0; + *ptr = 42; // Crash here + return 0; +} +``` + +### Check Examples First + +Before reporting a bug, verify the example programs work: + +```bash +make test +make examples +``` + +If examples work but your code doesn't, the issue is likely in your code, not the compiler. + +--- + +## Known Issues + +### 64-bit Types + +**Issue**: 64-bit arithmetic truncates to 32 bits + +```c +uint64 x = 5000000000; // Stored as 64-bit +uint64 y = x * 2; // Multiplied as 32-bit, overflow +``` + +**Workaround**: Use 32-bit types or implement 64-bit arithmetic manually + +### Single File Limitation + +**Issue**: Cannot split code across multiple files + +**Workaround**: Put all code in one file, use forward declarations + +### No Preprocessor + +**Issue**: No `#define`, `#include`, etc. + +**Workaround**: +- Use const variables instead of #define +- Copy/paste shared code +- Write wrapper script to concatenate files + +--- + +## Environment-Specific Issues + +### WSL (Windows Subsystem for Linux) + +Usually works fine. If issues: +```bash +sudo apt update +sudo apt install gcc-multilib nasm +``` + +### macOS + +**Problem**: macOS doesn't support Linux ELF32 + +**Solution**: Use a Linux VM or Docker container + +### 64-bit Only Systems + +**Problem**: No 32-bit support installed + +**Solution**: Install multilib packages (see Installation Issues above) + +--- + +*For more help, see the full manual (MANUAL.md) or examples (examples/)* diff --git a/common.c b/common.c new file mode 100644 index 0000000..a9542bd --- /dev/null +++ b/common.c @@ -0,0 +1,1433 @@ +/* + * Public domain / CC0. Use freely for any purpose. RoyR 2026 + * common.c — Compiler for the "Common" language + * Outputs NASM x86_32 assembly (cdecl, ELF32) + * + * Build: gcc -o common common.c + * Usage: ./common source.cm > output.asm + * nasm -f elf32 output.asm -o output.o + * gcc -m32 output.o -o output + */ + +#include +#include +#include +#include +#include + +/* ══════════════════════════════════════════════════════════ + TOKENS + ══════════════════════════════════════════════════════════ */ +typedef enum { + /* literals */ + TK_NUM, TK_STR, TK_ID, + /* types */ + TK_UINT8, TK_UINT16, TK_UINT32, TK_UINT64, TK_VOID, + TK_INT8, TK_INT16, TK_INT32, TK_INT64, + /* keywords */ + TK_IF, TK_ELSE, TK_WHILE, TK_FOR, + TK_SWITCH, TK_CASE, TK_DEFAULT, + TK_BREAK, TK_CONTINUE, TK_RETURN, + /* operators (multi-char first) */ + TK_EQ, TK_NEQ, TK_LEQ, TK_GEQ, TK_AND, TK_OR, + TK_SHL, TK_SHR, TK_INC, TK_DEC, + TK_ADDEQ, TK_SUBEQ, TK_MULEQ, TK_DIVEQ, TK_MODEQ, + TK_ANDEQ, TK_OREQ, TK_XOREQ, TK_SHLEQ, TK_SHREQ, + /* single-char operators / punctuation */ + TK_PLUS, TK_MINUS, TK_STAR, TK_SLASH, TK_MOD, + TK_AMP, TK_PIPE, TK_CARET, TK_TILDE, TK_BANG, + TK_LT, TK_GT, TK_ASSIGN, + TK_LPAREN, TK_RPAREN, TK_LBRACE, TK_RBRACE, + TK_LBRACK, TK_RBRACK, + TK_SEMI, TK_COMMA, TK_COLON, TK_QUESTION, + TK_EOF +} TKind; + +typedef struct { + TKind kind; + char str[512]; /* identifier / string value — 512 to match str_val */ + int slen; /* FIX(bug2+3): explicit byte count for str, excl. terminator */ + long num; /* numeric value */ + int line; /* source line where token starts */ +} Token; + +/* ══════════════════════════════════════════════════════════ + LEXER + ══════════════════════════════════════════════════════════ */ +static const char *src; +static int src_pos; +static int src_line = 1; /* current line number (1-based) */ +static Token tok; /* current token */ + +static void die(const char *fmt, ...) { + fprintf(stderr, "line %d: ", src_line); + va_list ap; va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); + exit(1); +} + +static void skip_ws(void) { + for (;;) { + while (isspace((unsigned char)src[src_pos])) { + if (src[src_pos] == '\n') src_line++; + src_pos++; + } + if (src[src_pos]=='/' && src[src_pos+1]=='/') { + while (src[src_pos] && src[src_pos]!='\n') src_pos++; + } else if (src[src_pos]=='/' && src[src_pos+1]=='*') { + src_pos += 2; + while (src[src_pos] && !(src[src_pos-1]=='*' && src[src_pos]=='/')) { + if (src[src_pos] == '\n') src_line++; + src_pos++; + } + if (src[src_pos]) src_pos++; + } else break; + } +} + +static struct { const char *kw; TKind tk; } kw_table[] = { + {"if",TK_IF},{"else",TK_ELSE},{"while",TK_WHILE},{"for",TK_FOR}, + {"switch",TK_SWITCH},{"case",TK_CASE},{"default",TK_DEFAULT}, + {"break",TK_BREAK},{"continue",TK_CONTINUE},{"return",TK_RETURN}, + {"void",TK_VOID},{"uint8",TK_UINT8},{"uint16",TK_UINT16}, + {"uint32",TK_UINT32},{"uint64",TK_UINT64}, + {"int8",TK_INT8},{"int16",TK_INT16},{"int32",TK_INT32}, + {"int64", TK_INT64},{NULL,TK_EOF} +}; + +static void next(void) { + skip_ws(); + tok.line = src_line; + char c = src[src_pos]; + if (!c) { tok.kind = TK_EOF; return; } + + /* number */ + if (isdigit((unsigned char)c)) { + char *end; + tok.num = (long)strtoul(src+src_pos, &end, 0); + src_pos = (int)(end - src); + tok.kind = TK_NUM; return; + } + + /* string literal */ + if (c == '"') { + src_pos++; int i=0; + while (src[src_pos] && src[src_pos]!='"') { + /* FIX(bug3): bounds check before writing into tok.str */ + if (i >= 511) die("string literal too long"); + if (src[src_pos]=='\\') { + src_pos++; + switch(src[src_pos]) { + case 'n': tok.str[i++]='\n'; break; + case 't': tok.str[i++]='\t'; break; + case 'r': tok.str[i++]='\r'; break; + case '0': tok.str[i++]='\0'; break; + default: tok.str[i++]=src[src_pos]; break; + } + } else tok.str[i++]=src[src_pos]; + src_pos++; + } + if (src[src_pos]=='"') src_pos++; + tok.str[i]=0; + tok.slen=i; /* FIX(bug2): record true byte count */ + tok.kind=TK_STR; + return; + } + + /* identifier / keyword */ + if (isalpha((unsigned char)c) || c=='_') { + int i=0; + while (isalnum((unsigned char)src[src_pos]) || src[src_pos]=='_') { + /* FIX(bug3): bounds check for identifiers too */ + if (i >= 511) die("identifier too long"); + tok.str[i++]=src[src_pos++]; + } + tok.str[i]=0; + tok.slen=i; + tok.kind=TK_ID; + for (int k=0; kw_table[k].kw; k++) + if (!strcmp(tok.str, kw_table[k].kw)) { tok.kind=kw_table[k].tk; return; } + return; + } + + /* multi/single char operators */ + src_pos++; + #define PEEK src[src_pos] + #define EAT src_pos++ + switch(c) { + case '+': if(PEEK=='+'){ EAT; tok.kind=TK_INC; } + else if(PEEK=='='){ EAT; tok.kind=TK_ADDEQ; } + else tok.kind=TK_PLUS; break; + case '-': if(PEEK=='-'){ EAT; tok.kind=TK_DEC; } + else if(PEEK=='='){ EAT; tok.kind=TK_SUBEQ; } + else tok.kind=TK_MINUS; break; + case '*': if(PEEK=='='){ EAT; tok.kind=TK_MULEQ; } else tok.kind=TK_STAR; break; + case '/': if(PEEK=='='){ EAT; tok.kind=TK_DIVEQ; } else tok.kind=TK_SLASH; break; + case '%': if(PEEK=='='){ EAT; tok.kind=TK_MODEQ; } else tok.kind=TK_MOD; break; + case '&': if(PEEK=='&'){ EAT; tok.kind=TK_AND; } + else if(PEEK=='='){ EAT; tok.kind=TK_ANDEQ; } + else tok.kind=TK_AMP; break; + case '|': if(PEEK=='|'){ EAT; tok.kind=TK_OR; } + else if(PEEK=='='){ EAT; tok.kind=TK_OREQ; } + else tok.kind=TK_PIPE; break; + case '^': if(PEEK=='='){ EAT; tok.kind=TK_XOREQ; } else tok.kind=TK_CARET; break; + case '<': if(PEEK=='<'){ EAT; if(PEEK=='='){ EAT; tok.kind=TK_SHLEQ; } else tok.kind=TK_SHL; } + else if(PEEK=='='){ EAT; tok.kind=TK_LEQ; } else tok.kind=TK_LT; break; + case '>': if(PEEK=='>'){ EAT; if(PEEK=='='){ EAT; tok.kind=TK_SHREQ; } else tok.kind=TK_SHR; } + else if(PEEK=='='){ EAT; tok.kind=TK_GEQ; } else tok.kind=TK_GT; break; + case '=': if(PEEK=='='){ EAT; tok.kind=TK_EQ; } else tok.kind=TK_ASSIGN; break; + case '!': if(PEEK=='='){ EAT; tok.kind=TK_NEQ; } else tok.kind=TK_BANG; break; + case '~': tok.kind=TK_TILDE; break; + case '(': tok.kind=TK_LPAREN; break; + case ')': tok.kind=TK_RPAREN; break; + case '{': tok.kind=TK_LBRACE; break; + case '}': tok.kind=TK_RBRACE; break; + case '[': tok.kind=TK_LBRACK; break; + case ']': tok.kind=TK_RBRACK; break; + case ';': tok.kind=TK_SEMI; break; + case ',': tok.kind=TK_COMMA; break; + case ':': tok.kind=TK_COLON; break; + case '?': tok.kind=TK_QUESTION;break; + default: die("Unknown char '%c'", c); + } + #undef PEEK + #undef EAT +} + +static void expect(TKind k) { + if (tok.kind != k) { + fprintf(stderr, "line %d: syntax error near '%s'\n", tok.line, tok.str); + exit(1); + } + next(); +} +static int accept(TKind k) { + if (tok.kind == k) { next(); return 1; } + return 0; +} +static int is_type(void) { + return tok.kind==TK_UINT8||tok.kind==TK_UINT16|| + tok.kind==TK_UINT32||tok.kind==TK_UINT64||tok.kind==TK_VOID|| + tok.kind==TK_INT8||tok.kind==TK_INT16||tok.kind==TK_INT32|| + tok.kind==TK_INT64; +} +static int is_signed(TKind t) { + return t==TK_INT8||t==TK_INT16||t==TK_INT32||t==TK_INT64; +} +static int is_64bit(TKind t) { + return t==TK_UINT64||t==TK_INT64; +} + +/* ══════════════════════════════════════════════════════════ + AST NODES + ══════════════════════════════════════════════════════════ */ +typedef enum { + N_PROG, N_FUNC, N_FDECL, N_GVAR, N_GARR, + N_BLK, N_LVAR, N_LARR, N_ES, N_RET, N_BRK, N_CONT, + N_IF, N_WHILE, N_FOR, N_SW, N_CASE, N_DEF, + N_NUM, N_STR, N_ID, + N_BOPN, N_UOPN, N_ASGN, N_TERN, + N_CALL, N_IDX, N_ADDR, N_DEREF, N_CAST, + N_POSTINC, N_POSTDEC, N_PREINC, N_PREDEC, + N_PARAM +} NKind; + +typedef struct Node Node; +struct Node { + NKind kind; + TKind op; /* operator token */ + long num; + char str[512]; /* FIX(bug3): was 256, now 512 to match str_val */ + int slen; /* FIX(bug2): explicit byte count for str literals */ + /* type */ + TKind tbase; /* TK_UINT8 … TK_VOID */ + int ptrs; /* pointer depth */ + /* children */ + Node *ch[4]; /* left/right/body/else */ + /* lists */ + Node **list; /* params, args, stmts, cases */ + int nlist; + int cap; +}; + +static Node *alloc_node(NKind k) { + Node *n = calloc(1, sizeof(Node)); + n->kind = k; return n; +} +static void list_push(Node *n, Node *child) { + if (n->nlist == n->cap) { + n->cap = n->cap ? n->cap*2 : 4; + n->list = realloc(n->list, n->cap * sizeof(Node*)); + } + n->list[n->nlist++] = child; +} + +/* ══════════════════════════════════════════════════════════ + PARSER + ══════════════════════════════════════════════════════════ */ +static void parse_type(TKind *base, int *ptrs) { + *base = tok.kind; next(); + *ptrs = 0; + while (tok.kind==TK_STAR) { (*ptrs)++; next(); } +} + +static Node *expr(void); + +/* forward decls */ +static Node *stmt(void); +static Node *block(void); + +static Node *primary(void) { + Node *n; + if (tok.kind==TK_NUM) { + n=alloc_node(N_NUM); n->num=tok.num; next(); return n; + } + if (tok.kind==TK_STR) { + n=alloc_node(N_STR); + /* FIX(bug2): use memcpy + slen instead of strcpy so embedded nulls survive */ + memcpy(n->str, tok.str, tok.slen+1); + n->slen=tok.slen; + next(); return n; + } + if (tok.kind==TK_ID) { + n=alloc_node(N_ID); strcpy(n->str, tok.str); next(); return n; + } + if (tok.kind==TK_LPAREN) { + next(); n=expr(); expect(TK_RPAREN); return n; + } + die("expected expression"); + return NULL; +} + +static Node *postfix(void) { + Node *e = primary(), *t; + for(;;) { + if (tok.kind==TK_LPAREN) { + next(); t=alloc_node(N_CALL); t->ch[0]=e; + if (tok.kind!=TK_RPAREN) + for(;;) { + list_push(t, expr()); + if (!accept(TK_COMMA)) break; + } + expect(TK_RPAREN); e=t; + } else if (tok.kind==TK_LBRACK) { + next(); t=alloc_node(N_IDX); t->ch[0]=e; t->ch[1]=expr(); + expect(TK_RBRACK); e=t; + } else if (tok.kind==TK_INC) { + next(); t=alloc_node(N_POSTINC); t->ch[0]=e; e=t; + } else if (tok.kind==TK_DEC) { + next(); t=alloc_node(N_POSTDEC); t->ch[0]=e; e=t; + } else break; + } + return e; +} + +static Node *unary(void) { + Node *t; + if (tok.kind==TK_MINUS||tok.kind==TK_BANG||tok.kind==TK_TILDE) { + TKind op=tok.kind; next(); + t=alloc_node(N_UOPN); t->op=op; t->ch[0]=unary(); return t; + } + if (tok.kind==TK_AMP) { + next(); t=alloc_node(N_ADDR); t->ch[0]=unary(); return t; + } + if (tok.kind==TK_STAR) { + next(); t=alloc_node(N_DEREF); t->ch[0]=unary(); return t; + } + if (tok.kind==TK_INC) { + next(); t=alloc_node(N_PREINC); t->ch[0]=unary(); return t; + } + if (tok.kind==TK_DEC) { + next(); t=alloc_node(N_PREDEC); t->ch[0]=unary(); return t; + } + /* cast: (type) expr */ + if (tok.kind==TK_LPAREN && (src[src_pos]==' '||1)) { + int saved=src_pos; Token saved_tok=tok; int saved_line=src_line; + next(); + if (is_type()) { + TKind tb; int pt; + parse_type(&tb,&pt); + if (tok.kind==TK_RPAREN) { + next(); t=alloc_node(N_CAST); + t->tbase=tb; t->ptrs=pt; t->ch[0]=unary(); return t; + } + } + src_pos=saved; tok=saved_tok; src_line=saved_line; + } + return postfix(); +} + +#define BINOP(name, next_fn, ...) \ +static Node *name(void) { \ + Node *l=next_fn(), *t; TKind ops[]={__VA_ARGS__, TK_EOF}; \ + for(;;) { \ + int found=0; \ + for(int i=0;ops[i]!=TK_EOF;i++) if(tok.kind==ops[i]){found=1;break;} \ + if(!found) break; \ + TKind op=tok.kind; next(); \ + t=alloc_node(N_BOPN); t->op=op; t->ch[0]=l; t->ch[1]=next_fn(); l=t; \ + } return l; \ +} + +BINOP(mul_expr, unary, TK_STAR, TK_SLASH, TK_MOD) +BINOP(add_expr, mul_expr, TK_PLUS, TK_MINUS) +BINOP(shf_expr, add_expr, TK_SHL, TK_SHR) +BINOP(cmp_expr, shf_expr, TK_LT, TK_LEQ, TK_GT, TK_GEQ) +BINOP(eq_expr, cmp_expr, TK_EQ, TK_NEQ) +BINOP(band_expr, eq_expr, TK_AMP) +BINOP(bxor_expr, band_expr,TK_CARET) +BINOP(bor_expr, bxor_expr,TK_PIPE) + +static Node *land_expr(void) { + Node *l=bor_expr(), *t; + while (tok.kind==TK_AND) { + next(); t=alloc_node(N_BOPN); t->op=TK_AND; + t->ch[0]=l; t->ch[1]=bor_expr(); l=t; + } return l; +} +static Node *lor_expr(void) { + Node *l=land_expr(), *t; + while (tok.kind==TK_OR) { + next(); t=alloc_node(N_BOPN); t->op=TK_OR; + t->ch[0]=l; t->ch[1]=land_expr(); l=t; + } return l; +} + +static Node *ternary(void); +static Node *ternary(void) { + Node *c=lor_expr(), *t; + if (tok.kind==TK_QUESTION) { + next(); t=alloc_node(N_TERN); t->ch[0]=c; + t->ch[1]=expr(); expect(TK_COLON); t->ch[2]=ternary(); return t; + } + return c; +} + +static TKind asgn_ops[] = { + TK_ASSIGN, TK_ADDEQ, TK_SUBEQ, TK_MULEQ, TK_DIVEQ, TK_MODEQ, + TK_ANDEQ, TK_OREQ, TK_XOREQ, TK_SHLEQ, TK_SHREQ, TK_EOF +}; +static Node *expr(void) { + Node *l=ternary(), *t; + for(int i=0; asgn_ops[i]!=TK_EOF; i++) + if (tok.kind==asgn_ops[i]) { + TKind op=tok.kind; next(); + t=alloc_node(N_ASGN); t->op=op; t->ch[0]=l; t->ch[1]=expr(); + return t; + } + return l; +} + +static Node *decl_local(void) { + TKind tb; int pt; + parse_type(&tb,&pt); + char nm[512]; strcpy(nm, tok.str); expect(TK_ID); + Node *n; + if (tok.kind==TK_LBRACK) { + next(); n=alloc_node(N_LARR); + n->tbase=tb; n->ptrs=pt; strcpy(n->str,nm); + n->ch[0]=expr(); expect(TK_RBRACK); + if (accept(TK_ASSIGN)) { + expect(TK_LBRACE); + while (tok.kind!=TK_RBRACE) { + list_push(n, expr()); accept(TK_COMMA); + } + expect(TK_RBRACE); + } + } else { + n=alloc_node(N_LVAR); + n->tbase=tb; n->ptrs=pt; strcpy(n->str,nm); + if (accept(TK_ASSIGN)) n->ch[0]=expr(); + } + expect(TK_SEMI); return n; +} + +static Node *stmt(void) { + Node *n, *t; + if (is_type()) return decl_local(); + switch(tok.kind) { + case TK_LBRACE: return block(); + case TK_IF: + n=alloc_node(N_IF); next(); + expect(TK_LPAREN); n->ch[0]=expr(); expect(TK_RPAREN); + n->ch[1]=stmt(); + if (tok.kind==TK_ELSE) { + next(); n->ch[2]=stmt(); + } + return n; + case TK_WHILE: + n=alloc_node(N_WHILE); next(); + expect(TK_LPAREN); n->ch[0]=expr(); expect(TK_RPAREN); + n->ch[1]=stmt(); return n; + case TK_FOR: + n=alloc_node(N_FOR); next(); expect(TK_LPAREN); + if (is_type()) n->ch[0]=decl_local(); /* consumes ; */ + else if (tok.kind!=TK_SEMI) { Node *es=alloc_node(N_ES); es->ch[0]=expr(); expect(TK_SEMI); n->ch[0]=es; } + else { next(); } + if (tok.kind!=TK_SEMI) n->ch[1]=expr(); + expect(TK_SEMI); + if (tok.kind!=TK_RPAREN) n->ch[2]=expr(); + expect(TK_RPAREN); n->ch[3]=stmt(); return n; + case TK_SWITCH: + n=alloc_node(N_SW); next(); + expect(TK_LPAREN); n->ch[0]=expr(); expect(TK_RPAREN); + expect(TK_LBRACE); + while (tok.kind!=TK_RBRACE) { + if (tok.kind==TK_CASE) { + next(); t=alloc_node(N_CASE); t->ch[0]=expr(); expect(TK_COLON); + while(tok.kind!=TK_CASE&&tok.kind!=TK_DEFAULT&&tok.kind!=TK_RBRACE) + list_push(t, stmt()); + list_push(n, t); + } else if (tok.kind==TK_DEFAULT) { + next(); expect(TK_COLON); + t=alloc_node(N_DEF); + while(tok.kind!=TK_CASE&&tok.kind!=TK_DEFAULT&&tok.kind!=TK_RBRACE) + list_push(t, stmt()); + list_push(n, t); + } else break; + } + expect(TK_RBRACE); return n; + case TK_RETURN: + n=alloc_node(N_RET); next(); + if (tok.kind!=TK_SEMI) n->ch[0]=expr(); + expect(TK_SEMI); return n; + case TK_BREAK: + next(); expect(TK_SEMI); return alloc_node(N_BRK); + case TK_CONTINUE: + next(); expect(TK_SEMI); return alloc_node(N_CONT); + default: + n=alloc_node(N_ES); n->ch[0]=expr(); expect(TK_SEMI); return n; + } +} + +static Node *block(void) { + Node *n=alloc_node(N_BLK); expect(TK_LBRACE); + while (tok.kind!=TK_RBRACE) list_push(n, stmt()); + expect(TK_RBRACE); return n; +} + +static Node *parse_prog(void) { + Node *prog=alloc_node(N_PROG); + next(); + while (tok.kind!=TK_EOF) { + TKind tb; int pt; + parse_type(&tb,&pt); + char nm[512]; strcpy(nm,tok.str); expect(TK_ID); + Node *d; + if (tok.kind==TK_LPAREN) { + next(); d=alloc_node(N_FUNC); + d->tbase=tb; d->ptrs=pt; strcpy(d->str,nm); + /* params */ + if (tok.kind!=TK_RPAREN) + for(;;) { + if (tok.kind==TK_VOID) { next(); break; } /* accept (void) as empty param list */ + TKind ptb; int ppt; + parse_type(&ptb,&ppt); + Node *p=alloc_node(N_PARAM); + p->tbase=ptb; p->ptrs=ppt; strcpy(p->str,tok.str); + expect(TK_ID); list_push(d, p); + if (!accept(TK_COMMA)) break; + } + expect(TK_RPAREN); + if (tok.kind==TK_SEMI) { next(); d->kind=N_FDECL; } + else d->ch[0]=block(); + } else if (tok.kind==TK_LBRACK) { + /* global array: type name[size]; + or type name[size] = { v, … }; */ + next(); + d=alloc_node(N_GARR); + d->tbase=tb; d->ptrs=pt; strcpy(d->str,nm); + d->ch[0]=expr(); /* size expression */ + expect(TK_RBRACK); + if (accept(TK_ASSIGN)) { + expect(TK_LBRACE); + while (tok.kind!=TK_RBRACE) { + list_push(d, expr()); accept(TK_COMMA); + } + expect(TK_RBRACE); + } + expect(TK_SEMI); + } else { + d=alloc_node(N_GVAR); + d->tbase=tb; d->ptrs=pt; strcpy(d->str,nm); + if (accept(TK_ASSIGN)) d->ch[0]=expr(); + expect(TK_SEMI); + } + list_push(prog, d); + } + return prog; +} + +/* ══════════════════════════════════════════════════════════ + CODE GENERATOR (NASM x86_32, cdecl) + ══════════════════════════════════════════════════════════ */ + +/* String literal pool */ +#define MAX_STRS 512 +static char str_val[MAX_STRS][512]; +static int str_val_len[MAX_STRS]; /* FIX(bug2): track explicit byte lengths */ +static int str_cnt = 0; + +/* FIX(bug2): length-aware string interning using memcmp instead of strcmp */ +static int intern_str_n(const char *s, int len) { + for (int i=0; i 0) E("[ebp+%d]", off); + else E("[ebp%d]", off); +} + +/* Emit address of a local/global (for array decay: lea not mov) */ +static void emit_addr(const char *nm) { + int off = find_local(nm); + if (off == 0x7fffffff) { E(" mov eax, %s\n", nm); return; } /* global: label IS address */ + if (off > 0) EL("lea eax, [ebp+%d]\n", off); + else EL("lea eax, [ebp%d]\n", off); +} + +static void load_nm(const char *nm) { + /* Arrays decay to a pointer: yield address, not the value stored there */ + if (local_is_array(nm)) { emit_addr(nm); return; } + if (global_is_array(nm)) { E(" mov eax, %s\n", nm); return; } + E(" mov eax, "); emit_ref(nm); E("\n"); +} + +/* ══════════════════════════════════════════════════════════ + TYPE SYSTEM (for pointer arithmetic) + ══════════════════════════════════════════════════════════ */ +typedef struct { TKind tbase; int ptrs; } Type; +static const Type T_INT = { TK_UINT32, 0 }; + +/* Size in bytes of a base type (non-pointer) */ +static int base_size(TKind tbase) { + switch (tbase) { + case TK_UINT8: case TK_INT8: return 1; + case TK_UINT16: case TK_INT16: return 2; + case TK_UINT64: case TK_INT64: return 8; + default: return 4; /* uint32, int32, void, pointer */ + } +} + +/* Stride for pointer arithmetic: the size of what the pointer points at. + pointer-to-pointer always has stride 4 (one address). */ +static int pointee_size(TKind tbase, int ptrs) { + if (ptrs > 1) return 4; + return base_size(tbase); +} + +/* forward */ +static void gen_expr(Node *n); +static void gen_stmt(Node *n); +static Type fun_ret_type(const char *nm); /* defined in codegen section */ + +/* Infer the type of an expression node. + Walks the AST without emitting any code. */ +static Type get_type(Node *n) { + if (!n) return T_INT; + switch (n->kind) { + case N_NUM: + return T_INT; + case N_STR: + return (Type){ TK_UINT8, 1 }; /* char* */ + case N_ID: { + /* Check locals first, then globals */ + for (int i=0; istr)) + return (Type){ locals[i].tbase, locals[i].ptrs }; + for (int i=0; istr)) + return (Type){ gvars[i].tbase, gvars[i].ptrs }; + return T_INT; + } + case N_ADDR: { + Type inner = get_type(n->ch[0]); + return (Type){ inner.tbase, inner.ptrs + 1 }; + } + case N_DEREF: { + Type inner = get_type(n->ch[0]); + if (inner.ptrs > 0) return (Type){ inner.tbase, inner.ptrs - 1 }; + return T_INT; + } + case N_IDX: { + Type arr = get_type(n->ch[0]); + if (arr.ptrs > 0) return (Type){ arr.tbase, arr.ptrs - 1 }; + return T_INT; + } + case N_CAST: + return (Type){ n->tbase, n->ptrs }; + case N_CALL: + if (n->ch[0] && n->ch[0]->kind == N_ID) + return fun_ret_type(n->ch[0]->str); + return T_INT; + case N_BOPN: + case N_ASGN: { + /* Arithmetic preserves a pointer type if either operand is a pointer */ + Type l = get_type(n->ch[0]); + Type r = get_type(n->ch[1]); + if (l.ptrs > 0) return l; + if (r.ptrs > 0) return r; + return T_INT; + } + case N_POSTINC: case N_POSTDEC: + case N_PREINC: case N_PREDEC: + return get_type(n->ch[0]); + case N_UOPN: + /* Negation/bitwise-not propagate the child type; the result is signed */ + if (n->op == TK_MINUS || n->op == TK_TILDE) + return (Type){ TK_INT32, 0 }; + return T_INT; + default: + return T_INT; + } +} + +/* Emit an integer scale instruction sequence. + On entry eax holds the integer to scale; on exit eax = eax * scale. */ +static void emit_scale(int scale) { + if (scale == 1) { /* nothing */ } + else if (scale == 2) EL("shl eax, 1\n"); + else if (scale == 4) EL("shl eax, 2\n"); + else if (scale == 8) EL("shl eax, 3\n"); + else EL("imul eax, %d\n", scale); +} + +/* ══════════════════════════════════════════════════════════ + LVALUE / STORE HELPERS + ══════════════════════════════════════════════════════════ */ +static void gen_lval_addr(Node *n) { + if (n->kind==N_ID) { + int off=find_local(n->str); + if (off==0x7fffffff) EL("mov eax, %s\n", n->str); + else if (off>0) EL("lea eax, [ebp+%d]\n", off); + else EL("lea eax, [ebp%d]\n", off); + } else if (n->kind==N_IDX) { + /* addr of arr[ix]: base + ix * element_size. + FIX: use gen_expr on the base, not gen_lval_addr. + gen_lval_addr on a pointer N_ID yields the stack slot address (lea), + but we need the pointer *value* (mov). gen_expr already handles both: + array identifiers decay to their base address (lea), pointer identifiers + load their value (mov). */ + Type arr = get_type(n->ch[0]); + int scale = pointee_size(arr.tbase, arr.ptrs); + gen_expr(n->ch[0]); EL("push eax\n"); + gen_expr(n->ch[1]); + emit_scale(scale); + EL("pop ecx\n"); EL("add eax, ecx\n"); + } else if (n->kind==N_DEREF) { + gen_expr(n->ch[0]); + } +} + +static void store_lval(Node *n) { + if (n->kind==N_ID) { + E(" mov "); emit_ref(n->str); E(", eax\n"); + } else if (n->kind==N_IDX || n->kind==N_DEREF) { + EL("push eax\n"); + gen_lval_addr(n); + EL("pop ecx\n"); + /* Store only as many bytes as the element type requires */ + Type t = get_type(n); + int sz = (t.ptrs > 0) ? 4 : base_size(t.tbase); + switch (sz) { + case 1: EL("mov byte [eax], cl\n"); break; + case 2: EL("mov word [eax], cx\n"); break; + default: EL("mov dword [eax], ecx\n"); break; + } + } +} + +/* ══════════════════════════════════════════════════════════ + ARITHMETIC HELPERS + ══════════════════════════════════════════════════════════ */ +static void arith(TKind op, int sgn) { + /* ecx=left, eax=right → eax=result; sgn=1 for signed operands */ + switch(op) { + case TK_PLUS: EL("add eax, ecx\n"); break; + case TK_MINUS: EL("sub ecx, eax\n"); EL("mov eax, ecx\n"); break; + case TK_STAR: EL("imul eax, ecx\n"); break; + case TK_SLASH: + EL("xchg eax, ecx\n"); + if (sgn) { EL("cdq\n"); EL("idiv ecx\n"); } + else { EL("xor edx, edx\n"); EL("div ecx\n"); } + break; + case TK_MOD: + EL("xchg eax, ecx\n"); + if (sgn) { EL("cdq\n"); EL("idiv ecx\n"); } + else { EL("xor edx, edx\n"); EL("div ecx\n"); } + EL("mov eax, edx\n"); + break; + case TK_AMP: EL("and eax, ecx\n"); break; + case TK_PIPE: EL("or eax, ecx\n"); break; + case TK_CARET: EL("xor eax, ecx\n"); break; + case TK_SHL: EL("xchg eax, ecx\n"); EL("shl eax, cl\n"); break; + case TK_SHR: + EL("xchg eax, ecx\n"); + EL(sgn ? "sar eax, cl\n" : "shr eax, cl\n"); + break; + default: break; + } +} + +/* Pointer-aware add/subtract. + On entry: ecx = left operand, eax = right operand. + On exit: eax = result. */ +static void ptr_arith(TKind op, Type lt, Type rt) { + if (op == TK_PLUS) { + if (lt.ptrs > 0) { + /* ptr + int: scale the integer (eax) by pointee size */ + int scale = pointee_size(lt.tbase, lt.ptrs); + emit_scale(scale); + EL("add eax, ecx\n"); + } else if (rt.ptrs > 0) { + /* int + ptr: scale the integer (ecx) by pointee size */ + int scale = pointee_size(rt.tbase, rt.ptrs); + if (scale == 1) { /* nothing */ } + else if (scale == 2) EL("shl ecx, 1\n"); + else if (scale == 4) EL("shl ecx, 2\n"); + else if (scale == 8) EL("shl ecx, 3\n"); + else EL("imul ecx, %d\n", scale); + EL("add eax, ecx\n"); + } else { + EL("add eax, ecx\n"); + } + } else if (op == TK_MINUS) { + if (lt.ptrs > 0 && rt.ptrs == 0) { + /* ptr - int: scale the integer (eax) by pointee size */ + int scale = pointee_size(lt.tbase, lt.ptrs); + emit_scale(scale); + EL("sub ecx, eax\n"); EL("mov eax, ecx\n"); + } else if (lt.ptrs > 0 && rt.ptrs > 0) { + /* ptr - ptr: raw byte difference divided by pointee size */ + EL("sub ecx, eax\n"); EL("mov eax, ecx\n"); + int scale = pointee_size(lt.tbase, lt.ptrs); + if (scale > 1) { + EL("xor edx, edx\n"); + EL("mov ecx, %d\n", scale); + EL("div ecx\n"); + } + } else { + EL("sub ecx, eax\n"); EL("mov eax, ecx\n"); + } + } +} + +/* ══════════════════════════════════════════════════════════ + EXPRESSION CODE GENERATOR + ══════════════════════════════════════════════════════════ */ +static void gen_expr(Node *n) { + int la, lb; + switch(n->kind) { + case N_NUM: + EL("mov eax, %ld\n", n->num); break; + case N_STR: { + /* FIX(bug2): use length-aware intern */ + int id=intern_str_n(n->str, n->slen); + EL("mov eax, _s%d\n", id); break; } + case N_ID: + load_nm(n->str); break; + case N_ADDR: + gen_lval_addr(n->ch[0]); break; + + case N_DEREF: { + /* Load from pointer; respect the pointee width and signedness */ + gen_expr(n->ch[0]); + Type inner = get_type(n->ch[0]); + int psz = (inner.ptrs > 1) ? 4 : base_size(inner.tbase); + int sgn = (inner.ptrs == 1) && is_signed(inner.tbase); + switch (psz) { + case 1: EL(sgn ? "movsx eax, byte [eax]\n" : "movzx eax, byte [eax]\n"); break; + case 2: EL(sgn ? "movsx eax, word [eax]\n" : "movzx eax, word [eax]\n"); break; + case 8: EL("mov eax, [eax]\n"); break; /* truncate 64→32; full 64-bit NYI */ + default: EL("mov eax, [eax]\n"); break; + } + break; + } + + case N_CAST: + gen_expr(n->ch[0]); + if (n->ptrs == 0) { + switch (n->tbase) { + case TK_INT8: EL("movsx eax, al\n"); break; /* sign-extend low byte */ + case TK_INT16: EL("movsx eax, ax\n"); break; /* sign-extend low word */ + case TK_UINT8: EL("and eax, 0xFF\n"); break; + case TK_UINT16: EL("and eax, 0xFFFF\n"); break; + default: break; /* int32/uint32/int64/uint64: no truncation needed */ + } + } + break; + + case N_POSTINC: { + Type t = get_type(n->ch[0]); + int stride = (t.ptrs > 0) ? pointee_size(t.tbase, t.ptrs) : 1; + gen_expr(n->ch[0]); EL("push eax\n"); + EL("add eax, %d\n", stride); store_lval(n->ch[0]); EL("pop eax\n"); + break; + } + case N_POSTDEC: { + Type t = get_type(n->ch[0]); + int stride = (t.ptrs > 0) ? pointee_size(t.tbase, t.ptrs) : 1; + gen_expr(n->ch[0]); EL("push eax\n"); + EL("sub eax, %d\n", stride); store_lval(n->ch[0]); EL("pop eax\n"); + break; + } + case N_PREINC: { + Type t = get_type(n->ch[0]); + int stride = (t.ptrs > 0) ? pointee_size(t.tbase, t.ptrs) : 1; + gen_expr(n->ch[0]); EL("add eax, %d\n", stride); store_lval(n->ch[0]); + break; + } + case N_PREDEC: { + Type t = get_type(n->ch[0]); + int stride = (t.ptrs > 0) ? pointee_size(t.tbase, t.ptrs) : 1; + gen_expr(n->ch[0]); EL("sub eax, %d\n", stride); store_lval(n->ch[0]); + break; + } + + case N_UOPN: + gen_expr(n->ch[0]); + if (n->op==TK_MINUS) EL("neg eax\n"); + else if (n->op==TK_TILDE) EL("not eax\n"); + else { EL("test eax, eax\n"); EL("setz al\n"); EL("movzx eax, al\n"); } + break; + + case N_BOPN: + if (n->op==TK_OR) { + la=new_lbl(); lb=new_lbl(); + gen_expr(n->ch[0]); EL("test eax, eax\n"); EL("jnz .L%d\n",la); + gen_expr(n->ch[1]); EL("test eax, eax\n"); EL("jnz .L%d\n",la); + EL("xor eax, eax\n"); EL("jmp .L%d\n",lb); + E(".L%d:\n",la); EL("mov eax, 1\n"); E(".L%d:\n",lb); + } else if (n->op==TK_AND) { + la=new_lbl(); lb=new_lbl(); + gen_expr(n->ch[0]); EL("test eax, eax\n"); EL("jz .L%d\n",la); + gen_expr(n->ch[1]); EL("test eax, eax\n"); EL("jz .L%d\n",la); + EL("mov eax, 1\n"); EL("jmp .L%d\n",lb); + E(".L%d:\n",la); EL("xor eax, eax\n"); E(".L%d:\n",lb); + } else { + /* Evaluate both sides; ecx=left, eax=right */ + gen_expr(n->ch[0]); EL("push eax\n"); + gen_expr(n->ch[1]); EL("pop ecx\n"); + { + int sgn = is_signed(get_type(n->ch[0]).tbase) || + is_signed(get_type(n->ch[1]).tbase); + switch(n->op) { + case TK_EQ: EL("cmp ecx, eax\n"); EL("sete al\n"); EL("movzx eax, al\n"); break; + case TK_NEQ: EL("cmp ecx, eax\n"); EL("setne al\n"); EL("movzx eax, al\n"); break; + case TK_LT: EL("cmp ecx, eax\n"); EL(sgn?"setl al\n" :"setb al\n"); EL("movzx eax, al\n"); break; + case TK_LEQ: EL("cmp ecx, eax\n"); EL(sgn?"setle al\n":"setbe al\n"); EL("movzx eax, al\n"); break; + case TK_GT: EL("cmp ecx, eax\n"); EL(sgn?"setg al\n" :"seta al\n"); EL("movzx eax, al\n"); break; + case TK_GEQ: EL("cmp ecx, eax\n"); EL(sgn?"setge al\n":"setae al\n"); EL("movzx eax, al\n"); break; + case TK_PLUS: + case TK_MINUS: { + Type lt = get_type(n->ch[0]); + Type rt = get_type(n->ch[1]); + if (lt.ptrs > 0 || rt.ptrs > 0) + ptr_arith(n->op, lt, rt); + else + arith(n->op, sgn); + break; + } + default: arith(n->op, sgn); break; + } + } + } + break; + + case N_ASGN: + if (n->op==TK_ASSIGN) { + gen_expr(n->ch[1]); store_lval(n->ch[0]); + } else { + /* Compound assignment */ + TKind base; + switch(n->op){ + case TK_ADDEQ: base=TK_PLUS; break; case TK_SUBEQ: base=TK_MINUS; break; + case TK_MULEQ: base=TK_STAR; break; case TK_DIVEQ: base=TK_SLASH; break; + case TK_MODEQ: base=TK_MOD; break; case TK_ANDEQ: base=TK_AMP; break; + case TK_OREQ: base=TK_PIPE; break; case TK_XOREQ: base=TK_CARET; break; + case TK_SHLEQ: base=TK_SHL; break; case TK_SHREQ: base=TK_SHR; break; + default: base=TK_EOF; + } + gen_expr(n->ch[0]); EL("push eax\n"); + gen_expr(n->ch[1]); EL("pop ecx\n"); + { + int sgn = is_signed(get_type(n->ch[0]).tbase) || + is_signed(get_type(n->ch[1]).tbase); + if (base==TK_PLUS || base==TK_MINUS) { + Type lt = get_type(n->ch[0]); + Type rt = get_type(n->ch[1]); + if (lt.ptrs > 0 || rt.ptrs > 0) + ptr_arith(base, lt, rt); + else + arith(base, sgn); + } else { + arith(base, sgn); + } + } + store_lval(n->ch[0]); + } + break; + + case N_TERN: + la=new_lbl(); lb=new_lbl(); + gen_expr(n->ch[0]); EL("test eax, eax\n"); EL("jz .L%d\n",la); + gen_expr(n->ch[1]); EL("jmp .L%d\n",lb); + E(".L%d:\n",la); gen_expr(n->ch[2]); E(".L%d:\n",lb); + break; + + case N_CALL: { + int argc=n->nlist; + for (int i=argc-1;i>=0;i--) { gen_expr(n->list[i]); EL("push eax\n"); } + if (n->ch[0]->kind==N_ID) EL("call %s\n", n->ch[0]->str); + else { gen_expr(n->ch[0]); EL("call eax\n"); } + if (argc) EL("add esp, %d\n", argc*4); + break; } + + case N_IDX: { + /* arr[i] — address then dereference with correct width and signedness */ + Type arr = get_type(n->ch[0]); + int psz = (arr.ptrs > 1) ? 4 : base_size(arr.tbase); + int sgn = (arr.ptrs == 1) && is_signed(arr.tbase); + gen_lval_addr(n); + switch (psz) { + case 1: EL(sgn ? "movsx eax, byte [eax]\n" : "movzx eax, byte [eax]\n"); break; + case 2: EL(sgn ? "movsx eax, word [eax]\n" : "movzx eax, word [eax]\n"); break; + default: EL("mov eax, [eax]\n"); break; + } + break; + } + + default: break; + } +} + +/* ══════════════════════════════════════════════════════════ + STATEMENT CODE GENERATOR + ══════════════════════════════════════════════════════════ */ +static void gen_stmt(Node *n) { + int la, lb, lc; + switch(n->kind) { + case N_BLK: + for(int i=0;inlist;i++) gen_stmt(n->list[i]); break; + case N_ES: + gen_expr(n->ch[0]); break; + case N_LVAR: + if (n->ch[0]) { gen_expr(n->ch[0]); E(" mov "); emit_ref(n->str); E(", eax\n"); } + break; + case N_LARR: + if (n->nlist) { + int base=find_local(n->str); + /* element size for initialiser stride */ + int esz = (n->ptrs > 0) ? 4 : base_size(n->tbase); + for(int i=0;inlist;i++) { + gen_expr(n->list[i]); + int off=base+i*esz; + switch(esz) { + case 1: + if(off>0) EL("mov byte [ebp+%d], al\n",off); + else EL("mov byte [ebp%d], al\n",off); + break; + case 2: + if(off>0) EL("mov word [ebp+%d], ax\n",off); + else EL("mov word [ebp%d], ax\n",off); + break; + default: + if(off>0) EL("mov dword [ebp+%d], eax\n",off); + else EL("mov dword [ebp%d], eax\n",off); + break; + } + } + } + break; + case N_RET: + if (n->ch[0]) gen_expr(n->ch[0]); + EL("jmp .Lret%d\n", ret_lbl); break; + case N_BRK: EL("jmp .Lbrk%d\n", brk_stk[stk_top-1]); break; + case N_CONT: EL("jmp .Lcont%d\n", cont_stk[stk_top-1]); break; + case N_IF: + la=new_lbl(); lb=new_lbl(); + gen_expr(n->ch[0]); EL("test eax, eax\n"); EL("jz .L%d\n",la); + gen_stmt(n->ch[1]); EL("jmp .L%d\n",lb); + E(".L%d:\n",la); + if (n->ch[2]) gen_stmt(n->ch[2]); + E(".L%d:\n",lb); break; + case N_WHILE: + la=new_lbl(); lb=new_lbl(); + brk_stk[stk_top]=lb; cont_stk[stk_top]=la; stk_top++; + E(".Lcont%d:\n",la); + gen_expr(n->ch[0]); EL("test eax, eax\n"); EL("jz .Lbrk%d\n",lb); + gen_stmt(n->ch[1]); EL("jmp .Lcont%d\n",la); + E(".Lbrk%d:\n",lb); stk_top--; break; + case N_FOR: + la=new_lbl(); lb=new_lbl(); lc=new_lbl(); + brk_stk[stk_top]=lb; cont_stk[stk_top]=lc; stk_top++; + if (n->ch[0]) gen_stmt(n->ch[0]); + E(".L%d:\n",la); + if (n->ch[1]) { gen_expr(n->ch[1]); EL("test eax, eax\n"); EL("jz .Lbrk%d\n",lb); } + gen_stmt(n->ch[3]); + E(".Lcont%d:\n",lc); + if (n->ch[2]) gen_expr(n->ch[2]); + EL("jmp .L%d\n",la); E(".Lbrk%d:\n",lb); stk_top--; break; + case N_SW: { + lb=new_lbl(); + int ncases=n->nlist; + int *clbls=malloc(ncases*sizeof(int)); + for(int i=0;ich[0]); EL("push eax\n"); + for(int i=0;ilist[i]; + if (c->kind==N_CASE) { + EL("mov eax, [esp]\n"); + EL("cmp eax, %ld\n", c->ch[0]->num); + EL("je .L%d\n", clbls[i]); + } else { + EL("jmp .L%d\n", clbls[i]); + } + } + EL("jmp .Lbrk%d\n",lb); + for(int i=0;ilist[i]; + int ns=(c->kind==N_CASE||c->kind==N_DEF)?c->nlist:0; + for(int j=0;jlist[j]); + } + E(".Lbrk%d:\n",lb); EL("add esp, 4\n"); + stk_top--; free(clbls); break; } + default: break; + } +} + +/* ══════════════════════════════════════════════════════════ + LOCAL VARIABLE PRE-SCAN + ══════════════════════════════════════════════════════════ */ +static void scan_locals(Node *n) { + if (!n) return; + if (n->kind==N_LVAR) { + frame_size+=4; + def_local(n->str, -frame_size, n->tbase, n->ptrs); + } else if (n->kind==N_LARR) { + long cnt = n->ch[0]->num; + /* size per element */ + int esz = (n->ptrs > 0) ? 4 : base_size(n->tbase); + frame_size += esz * (int)cnt; + /* Array name decays to a pointer to its element type */ + def_local_array(n->str, -frame_size, n->tbase, n->ptrs + 1); + } else { + for(int i=0;i<4;i++) scan_locals(n->ch[i]); + for(int i=0;inlist;i++) scan_locals(n->list[i]); + } +} + +/* ══════════════════════════════════════════════════════════ + FUNCTION GENERATOR + ══════════════════════════════════════════════════════════ */ +static char called[256][64]; +static int ncalled=0; +static void collect_calls(Node *n) { + if (!n) return; + if (n->kind==N_CALL && n->ch[0]->kind==N_ID) { + char *nm=n->ch[0]->str; + int found=0; + for(int i=0;ich[i]); + for(int i=0;inlist;i++) collect_calls(n->list[i]); +} + +static void gen_func(Node *fn) { + nlocals=0; frame_size=0; + scan_locals(fn->ch[0]); + int fsize=(frame_size+15)&~15; + + ret_lbl=new_lbl(); + E("\n%s:\n", fn->str); + EL("push ebp\n"); EL("mov ebp, esp\n"); + if (fsize) EL("sub esp, %d\n", fsize); + + /* Bind params: [ebp+8], [ebp+12], … with their declared types */ + int poff=8; + for(int i=0;inlist;i++) { + def_local(fn->list[i]->str, poff, + fn->list[i]->tbase, fn->list[i]->ptrs); + poff+=4; + } + + gen_stmt(fn->ch[0]); + + E(".Lret%d:\n", ret_lbl); + EL("mov esp, ebp\n"); EL("pop ebp\n"); EL("ret\n"); +} + +/* ══════════════════════════════════════════════════════════ + STRING DATA EMITTER + ══════════════════════════════════════════════════════════ */ +/* FIX(bug2): takes explicit length; walks to p<=end to include null terminator */ +static void emit_str_data(const char *s, int len) { + E("db "); + int first=1; + const char *end = s + len; /* points at the null terminator */ + for(const char *p=s; p<=end; p++) { + unsigned char c=(unsigned char)*p; + if (c>=32 && c<127 && c!='"' && c!='\\') { + if (!first) E(","); + E("\""); + while (p<=end && (unsigned char)*p>=32 && (unsigned char)*p<127 + && *p!='"' && *p!='\\') { fputc(*p,out); p++; } + p--; + E("\""); + } else { + if (!first) E(","); + E("%d", c); + } + first=0; + } + E("\n"); +} + +/* ══════════════════════════════════════════════════════════ + TOP-LEVEL CODE GENERATOR + ══════════════════════════════════════════════════════════ */ + +/* Function return-type table — populated before codegen so get_type(N_CALL) works */ +#define MAX_FUNS 256 +static struct { char name[64]; TKind tbase; int ptrs; } funs[MAX_FUNS]; +static int nfuns=0; +static void def_fun(const char *nm, TKind tbase, int ptrs) { + if (nfuns==MAX_FUNS) return; + strncpy(funs[nfuns].name, nm, 63); + funs[nfuns].tbase = tbase; + funs[nfuns].ptrs = ptrs; + nfuns++; +} +static Type fun_ret_type(const char *nm) { + for (int i=0;inlist;i++) { + Node *d=prog->list[i]; + if(d->kind==N_GVAR) + def_global(d->str, d->tbase, d->ptrs); + else if(d->kind==N_GARR) + def_global_array(d->str, d->tbase, d->ptrs + 1); /* array decays to pointer */ + else if(d->kind==N_FUNC || d->kind==N_FDECL) + def_fun(d->str, d->tbase, d->ptrs); /* register return type */ + } + + /* Collect defined function names */ + char defined[256][64]; int ndef=0; + for(int i=0;inlist;i++) + if(prog->list[i]->kind==N_FUNC) + strcpy(defined[ndef++], prog->list[i]->str); + + collect_calls(prog); + + E("BITS 32\n"); + E("section .text\n"); + for(int i=0;inlist;i++) + if(prog->list[i]->kind==N_FUNC) + gen_func(prog->list[i]); + + /* ── .data section: string literals + explicitly initialised globals ── */ + int has_data = (str_cnt > 0); + for(int i=0;inlist;i++) { + Node *d=prog->list[i]; + /* FIX(bug1): guard with kind==N_NUM before checking num, so N_STR/N_ID + initializers don't accidentally satisfy num==0 and leak into .bss too */ + if (d->kind==N_GVAR && d->ch[0] != NULL && + !(d->ch[0]->kind==N_NUM && d->ch[0]->num==0)) has_data=1; + if (d->kind==N_GARR && d->nlist > 0) has_data=1; + } + if (has_data) { + E("\nsection .data\n"); + for(int i=0;inlist;i++) { + Node *d=prog->list[i]; + /* scalar global with non-zero initialiser */ + if(d->kind==N_GVAR && d->ch[0] != NULL && + !(d->ch[0]->kind==N_NUM && d->ch[0]->num==0)) { + const char *dw = d->ptrs ? "dd" : + (base_size(d->tbase)==1?"db":base_size(d->tbase)==2?"dw": + base_size(d->tbase)==8?"dq":"dd"); + if (d->ch[0]->kind == N_NUM) { + long v = d->ch[0]->num; + E("%s: %s %ld\n", d->str, dw, v); + } else if (d->ch[0]->kind == N_STR) { + /* FIX(bug2): use length-aware intern for global string inits */ + int id = intern_str_n(d->ch[0]->str, d->ch[0]->slen); + E("%s: %s _s%d\n", d->str, dw, id); + } else if (d->ch[0]->kind == N_ID) { + E("%s: %s %s\n", d->str, dw, d->ch[0]->str); + } + } + /* global array with explicit initialiser */ + if(d->kind==N_GARR && d->nlist > 0) { + long cnt = d->ch[0]->num; + const char *dw = d->ptrs ? "dd" : + (base_size(d->tbase)==1?"db":base_size(d->tbase)==2?"dw": + base_size(d->tbase)==8?"dq":"dd"); + E("%s: %s", d->str, dw); + for(int j=0; jnlist; j++) + E("%s%ld", j ? "," : " ", d->list[j]->num); + for(long j=d->nlist; j 0 || d->nlist > 0) ? "," : " "); + E("\n"); + } + } + /* FIX(bug2): pass length to emit_str_data */ + for(int i=0;inlist;i++) { + Node *d=prog->list[i]; + /* FIX(bug1): check kind==N_NUM before num==0, so string/id inits + don't produce a duplicate symbol in both .data and .bss */ + if (d->kind==N_GVAR && (!d->ch[0] || (d->ch[0]->kind==N_NUM && d->ch[0]->num==0))) has_bss=1; + if (d->kind==N_GARR && d->nlist == 0) has_bss=1; + } + if (has_bss) { + E("\nsection .bss\n"); + for(int i=0;inlist;i++) { + Node *d=prog->list[i]; + /* FIX(bug1): same guard as has_bss check above */ + if(d->kind==N_GVAR && (!d->ch[0] || (d->ch[0]->kind==N_NUM && d->ch[0]->num==0))) { + const char *rs = d->ptrs ? "resd" : + (base_size(d->tbase)==1?"resb":base_size(d->tbase)==2?"resw": + base_size(d->tbase)==8?"resq":"resd"); + E("%s: %s 1\n", d->str, rs); + } + if(d->kind==N_GARR && d->nlist == 0) { + long cnt = d->ch[0]->num; + const char *rs = d->ptrs ? "resd" : + (base_size(d->tbase)==1?"resb":base_size(d->tbase)==2?"resw": + base_size(d->tbase)==8?"resq":"resd"); + E("%s: %s %ld\n", d->str, rs, cnt); + } + } + } +} + +/* ══════════════════════════════════════════════════════════ + MAIN + ══════════════════════════════════════════════════════════ */ +static char *read_file(const char *path) { + FILE *f=fopen(path,"r"); + if(!f) die("cannot open: %s", path); + fseek(f,0,SEEK_END); long sz=ftell(f); rewind(f); + char *buf=malloc(sz+1); + fread(buf,1,sz,f); buf[sz]=0; fclose(f); + return buf; +} + +int main(int argc, char **argv) { + if (argc<2) { fprintf(stderr,"usage: %s [out.asm]\n",argv[0]); return 1; } + char *source = read_file(argv[1]); + src=source; src_pos=0; + Node *prog = parse_prog(); + out = (argc>=3) ? fopen(argv[2],"w") : stdout; + if (!out) die("cannot open output: %s", argv[2]); + codegen(prog); + if (argc>=3) { fclose(out); fprintf(stderr,"wrote %s\n",argv[2]); } + return 0; +} diff --git a/examples/arrays.cm b/examples/arrays.cm new file mode 100644 index 0000000..7f22c11 --- /dev/null +++ b/examples/arrays.cm @@ -0,0 +1,48 @@ +// Public domain / CC0. Use freely for any purpose. RoyR 2026 +// arrays.cm - Array operations +// Demonstrates: arrays, loops, array initialization + +void printf(uint8 *fmt); + +int32 sum_array(int32 *arr, int32 len) { + int32 total = 0; + for (int32 i = 0; i < len; i = i + 1) { + total = total + arr[i]; + } + return total; +} + +void reverse_array(int32 *arr, int32 len) { + int32 i = 0; + int32 j = len - 1; + while (i < j) { + int32 temp = arr[i]; + arr[i] = arr[j]; + arr[j] = temp; + i = i + 1; + j = j - 1; + } +} + +int32 main(void) { + int32 numbers[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + + printf("Original array: "); + for (int32 i = 0; i < 10; i = i + 1) { + printf("%d ", numbers[i]); + } + printf("\n"); + + int32 total = sum_array(numbers, 10); + printf("Sum: %d\n", total); + + reverse_array(numbers, 10); + + printf("Reversed array: "); + for (int32 i = 0; i < 10; i = i + 1) { + printf("%d ", numbers[i]); + } + printf("\n"); + + return 0; +} diff --git a/examples/bitwise.cm b/examples/bitwise.cm new file mode 100644 index 0000000..8469b86 --- /dev/null +++ b/examples/bitwise.cm @@ -0,0 +1,60 @@ +// Public domain / CC0. Use freely for any purpose. RoyR 2026 +// bitwise.cm - Bitwise operations +// Demonstrates: bitwise operators, bit manipulation, shifts + +void printf(uint8 *fmt); + +void print_binary(uint32 n) { + printf("0b"); + for (int32 i = 31; i >= 0; i = i - 1) { + printf("%d", (n >> i) & 1); + if (i % 4 == 0 && i != 0) printf("_"); + } +} + +int32 count_set_bits(uint32 n) { + int32 count = 0; + while (n) { + count = count + (n & 1); + n = n >> 1; + } + return count; +} + +uint32 reverse_bits(uint32 n) { + uint32 result = 0; + for (int32 i = 0; i < 32; i = i + 1) { + result = result << 1; + result = result | (n & 1); + n = n >> 1; + } + return result; +} + +int32 is_power_of_two(uint32 n) { + return n && ((n & (n - 1)) == 0); +} + +int32 main(void) { + uint32 a = 0xAB; + uint32 b = 0x55; + + printf("a = %d = ", a); print_binary(a); printf("\n"); + printf("b = %d = ", b); print_binary(b); printf("\n"); + + printf("\nBitwise AND: %d = ", a & b); print_binary(a & b); printf("\n"); + printf("Bitwise OR: %d = ", a | b); print_binary(a | b); printf("\n"); + printf("Bitwise XOR: %d = ", a ^ b); print_binary(a ^ b); printf("\n"); + printf("Bitwise NOT: %d = ", ~a); print_binary(~a); printf("\n"); + + printf("\nLeft shift (a << 2): %d\n", a << 2); + printf("Right shift (a >> 2): %d\n", a >> 2); + + printf("\nSet bits in a: %d\n", count_set_bits(a)); + printf("Set bits in b: %d\n", count_set_bits(b)); + + printf("\nIs 64 power of two? %d\n", is_power_of_two(64)); + printf("Is 63 power of two? %d\n", is_power_of_two(63)); + + return 0; +} diff --git a/examples/bubblesort.cm b/examples/bubblesort.cm new file mode 100644 index 0000000..cb7be13 --- /dev/null +++ b/examples/bubblesort.cm @@ -0,0 +1,40 @@ +// Public domain / CC0. Use freely for any purpose. RoyR 2026 +// bubblesort.cm - Bubble sort implementation +// Demonstrates: nested loops, array manipulation, comparisons + +void printf(uint8 *fmt); + +void bubble_sort(int32 *arr, int32 n) { + for (int32 i = 0; i < n - 1; i = i + 1) { + for (int32 j = 0; j < n - i - 1; j = j + 1) { + if (arr[j] > arr[j + 1]) { + // Swap + int32 temp = arr[j]; + arr[j] = arr[j + 1]; + arr[j + 1] = temp; + } + } + } +} + +void print_array(int32 *arr, int32 n) { + for (int32 i = 0; i < n; i = i + 1) { + printf("%d ", arr[i]); + } + printf("\n"); +} + +int32 main(void) { + int32 numbers[10] = { 64, 34, 25, 12, 22, 11, 90, 88, 45, 50 }; + int32 n = 10; + + printf("Unsorted array: "); + print_array(numbers, n); + + bubble_sort(numbers, n); + + printf("Sorted array: "); + print_array(numbers, n); + + return 0; +} diff --git a/examples/calculator.cm b/examples/calculator.cm new file mode 100644 index 0000000..dc13cd5 --- /dev/null +++ b/examples/calculator.cm @@ -0,0 +1,101 @@ +// Public domain / CC0. Use freely for any purpose. RoyR 2026 +// calculator.cm - Simple expression calculator +// Demonstrates: global variables, function composition, control flow + +void printf(uint8 *fmt); + +// Global state for the calculator +int32 last_result = 0; +int32 operation_count = 0; + +int32 add(int32 a, int32 b) { + operation_count = operation_count + 1; + last_result = a + b; + return last_result; +} + +int32 subtract(int32 a, int32 b) { + operation_count = operation_count + 1; + last_result = a - b; + return last_result; +} + +int32 multiply(int32 a, int32 b) { + operation_count = operation_count + 1; + last_result = a * b; + return last_result; +} + +int32 divide(int32 a, int32 b) { + if (b == 0) { + printf("Error: Division by zero\n"); + return 0; + } + operation_count = operation_count + 1; + last_result = a / b; + return last_result; +} + +int32 power(int32 base, int32 exp) { + operation_count = operation_count + 1; + int32 result = 1; + for (int32 i = 0; i < exp; i = i + 1) { + result = result * base; + } + last_result = result; + return result; +} + +int32 factorial(int32 n) { + operation_count = operation_count + 1; + if (n <= 1) { + last_result = 1; + return 1; + } + int32 result = 1; + for (int32 i = 2; i <= n; i = i + 1) { + result = result * i; + } + last_result = result; + return result; +} + +void print_stats(void) { + printf("\n=== Calculator Statistics ===\n"); + printf("Total operations: %d\n", operation_count); + printf("Last result: %d\n", last_result); + printf("============================\n"); +} + +int32 main(void) { + printf("Calculator Demo\n"); + printf("===============\n\n"); + + // Basic arithmetic + printf("10 + 5 = %d\n", add(10, 5)); + printf("10 - 5 = %d\n", subtract(10, 5)); + printf("10 * 5 = %d\n", multiply(10, 5)); + printf("10 / 5 = %d\n", divide(10, 5)); + + // Power function + printf("\n2^10 = %d\n", power(2, 10)); + printf("5^3 = %d\n", power(5, 3)); + + // Factorial + printf("\n5! = %d\n", factorial(5)); + printf("10! = %d\n", factorial(10)); + + // Complex expression: (5 + 3) * (10 - 2) + int32 a = add(5, 3); + int32 b = subtract(10, 2); + int32 result = multiply(a, b); + printf("\n(5 + 3) * (10 - 2) = %d\n", result); + + // Division by zero test + printf("\nTesting division by zero:\n"); + divide(10, 0); + + print_stats(); + + return 0; +} diff --git a/examples/fibonacci.cm b/examples/fibonacci.cm new file mode 100644 index 0000000..4a59f14 --- /dev/null +++ b/examples/fibonacci.cm @@ -0,0 +1,23 @@ +// Public domain / CC0. Use freely for any purpose. RoyR 2026 +// fibonacci.cm - Calculate Fibonacci numbers +// Demonstrates: recursion, function calls, conditionals + +void printf(uint8 *fmt); + +int32 fibonacci(int32 n) { + if (n <= 1) { + return n; + } + return fibonacci(n - 1) + fibonacci(n - 2); +} + +int32 main(void) { + printf("Fibonacci sequence:\n"); + + for (int32 i = 0; i < 15; i = i + 1) { + int32 fib = fibonacci(i); + printf("fib(%d) = %d\n", i, fib); + } + + return 0; +} diff --git a/examples/hello.cm b/examples/hello.cm new file mode 100644 index 0000000..7de3383 --- /dev/null +++ b/examples/hello.cm @@ -0,0 +1,11 @@ +// Public domain / CC0. Use freely for any purpose. RoyR 2026 +// hello.cm - Hello World program +// Compile: ./common hello.cm hello.asm && nasm -f elf32 hello.asm && gcc -m32 hello.o -o hello + +void putchar(int32 c); +void puts(uint8 *s); + +int32 main(void) { + puts("Hello, World!"); + return 0; +} diff --git a/examples/linkedlist.cm b/examples/linkedlist.cm new file mode 100644 index 0000000..636058d --- /dev/null +++ b/examples/linkedlist.cm @@ -0,0 +1,159 @@ +// Public domain / CC0. Use freely for any purpose. RoyR 2026 +// linkedlist.cm - Linked list implementation +// Demonstrates: structs simulated with arrays, complex pointer manipulation, memory layout + +void printf(uint8 *fmt); +void *malloc(uint32 size); +void free(void *ptr); + +// Node structure simulated as: +// [0] = data (int32) +// [4] = next pointer (int32*) +// Total size: 8 bytes per node + +int32 *create_node(int32 value) { + int32 *node = (int32*)malloc(8); + node[0] = value; // data + node[1] = 0; // next = NULL + return node; +} + +void insert_front(int32 **head, int32 value) { + int32 *new_node = create_node(value); + new_node[1] = (int32)(*head); // new->next = head + *head = new_node; // head = new_node +} + +void insert_back(int32 **head, int32 value) { + int32 *new_node = create_node(value); + + if (*head == 0) { + *head = new_node; + return; + } + + int32 *current = *head; + while (current[1] != 0) { + current = (int32*)current[1]; + } + current[1] = (int32)new_node; +} + +int32 list_length(int32 *head) { + int32 count = 0; + int32 *current = head; + while (current != 0) { + count = count + 1; + current = (int32*)current[1]; + } + return count; +} + +void print_list(int32 *head) { + int32 *current = head; + printf("["); + while (current != 0) { + printf("%d", current[0]); + current = (int32*)current[1]; + if (current != 0) printf(", "); + } + printf("]\n"); +} + +int32 find_value(int32 *head, int32 value) { + int32 *current = head; + int32 index = 0; + while (current != 0) { + if (current[0] == value) { + return index; + } + current = (int32*)current[1]; + index = index + 1; + } + return -1; +} + +void delete_value(int32 **head, int32 value) { + if (*head == 0) return; + + // Check if head node contains the value + if ((*head)[0] == value) { + int32 *temp = *head; + *head = (int32*)(*head)[1]; + free(temp); + return; + } + + // Search for the value in remaining nodes + int32 *current = *head; + while (current[1] != 0) { + int32 *next = (int32*)current[1]; + if (next[0] == value) { + current[1] = next[1]; // current->next = next->next + free(next); + return; + } + current = next; + } +} + +void free_list(int32 *head) { + int32 *current = head; + while (current != 0) { + int32 *next = (int32*)current[1]; + free(current); + current = next; + } +} + +int32 main(void) { + int32 *list = 0; // Empty list (NULL) + + printf("Linked List Demo\n"); + printf("================\n\n"); + + printf("Inserting at front: 3, 2, 1\n"); + insert_front(&list, 3); + insert_front(&list, 2); + insert_front(&list, 1); + print_list(list); + + printf("\nInserting at back: 4, 5, 6\n"); + insert_back(&list, 4); + insert_back(&list, 5); + insert_back(&list, 6); + print_list(list); + + printf("\nList length: %d\n", list_length(list)); + + printf("\nSearching for values:\n"); + int32 search_vals[4] = { 1, 4, 7, 5 }; + for (int32 i = 0; i < 4; i = i + 1) { + int32 val = search_vals[i]; + int32 pos = find_value(list, val); + if (pos >= 0) { + printf(" %d found at position %d\n", val, pos); + } else { + printf(" %d not found\n", val); + } + } + + printf("\nDeleting value 3\n"); + delete_value(&list, 3); + print_list(list); + + printf("\nDeleting value 1 (head)\n"); + delete_value(&list, 1); + print_list(list); + + printf("\nDeleting value 6 (tail)\n"); + delete_value(&list, 6); + print_list(list); + + printf("\nFinal list length: %d\n", list_length(list)); + + free_list(list); + printf("\nList freed\n"); + + return 0; +} diff --git a/examples/pointers.cm b/examples/pointers.cm new file mode 100644 index 0000000..0e5b128 --- /dev/null +++ b/examples/pointers.cm @@ -0,0 +1,38 @@ +// Public domain / CC0. Use freely for any purpose. RoyR 2026 +// pointers.cm - Pointer operations +// Demonstrates: pointers, pointer arithmetic, dereferencing + +void printf(uint8 *fmt); + +void swap(int32 *a, int32 *b) { + int32 temp = *a; + *a = *b; + *b = temp; +} + +int32 main(void) { + int32 x = 42; + int32 y = 17; + + printf("Before swap: x=%d, y=%d\n", x, y); + swap(&x, &y); + printf("After swap: x=%d, y=%d\n", x, y); + + // Pointer arithmetic with arrays + int32 arr[5] = { 10, 20, 30, 40, 50 }; + int32 *ptr = arr; + + printf("\nArray traversal with pointer arithmetic:\n"); + for (int32 i = 0; i < 5; i = i + 1) { + printf("arr[%d] = %d (via pointer: %d)\n", i, arr[i], *(ptr + i)); + } + + // Pointer to pointer + int32 value = 99; + int32 *p1 = &value; + int32 **p2 = &p1; + + printf("\nPointer to pointer: **p2 = %d\n", **p2); + + return 0; +} diff --git a/examples/primes.cm b/examples/primes.cm new file mode 100644 index 0000000..6b65942 --- /dev/null +++ b/examples/primes.cm @@ -0,0 +1,68 @@ +// Public domain / CC0. Use freely for any purpose. RoyR 2026 +// primes.cm - Prime number calculator +// Demonstrates: algorithms, loops, mathematical operations + +void printf(uint8 *fmt); + +int32 is_prime(int32 n) { + if (n <= 1) return 0; + if (n <= 3) return 1; + if (n % 2 == 0 || n % 3 == 0) return 0; + + int32 i = 5; + while (i * i <= n) { + if (n % i == 0 || n % (i + 2) == 0) { + return 0; + } + i = i + 6; + } + return 1; +} + +int32 nth_prime(int32 n) { + if (n == 1) return 2; + + int32 count = 1; + int32 candidate = 3; + + while (count < n) { + if (is_prime(candidate)) { + count = count + 1; + } + candidate = candidate + 2; + } + + return candidate - 2; +} + +int32 count_primes_under(int32 limit) { + int32 count = 0; + for (int32 i = 2; i < limit; i = i + 1) { + if (is_prime(i)) { + count = count + 1; + } + } + return count; +} + +int32 main(void) { + printf("First 20 prime numbers:\n"); + for (int32 i = 1; i <= 20; i = i + 1) { + printf("%d ", nth_prime(i)); + if (i % 10 == 0) printf("\n"); + } + printf("\n"); + + int32 limit = 100; + int32 count = count_primes_under(limit); + printf("\nThere are %d primes less than %d\n", count, limit); + + printf("\nPrime check:\n"); + int32 test_nums[8] = { 1, 2, 15, 17, 97, 100, 101, 121 }; + for (int32 i = 0; i < 8; i = i + 1) { + int32 num = test_nums[i]; + printf(" %d is %s\n", num, is_prime(num) ? "prime" : "not prime"); + } + + return 0; +} diff --git a/examples/strings.cm b/examples/strings.cm new file mode 100644 index 0000000..28de533 --- /dev/null +++ b/examples/strings.cm @@ -0,0 +1,70 @@ +// Public domain / CC0. Use freely for any purpose. RoyR 2026 +// strings.cm - String manipulation +// Demonstrates: string literals, character arrays, string functions + +void printf(uint8 *fmt); + +int32 str_len(uint8 *s) { + int32 len = 0; + while (s[len]) { + len = len + 1; + } + return len; +} + +void str_copy(uint8 *dest, uint8 *src) { + int32 i = 0; + while (src[i]) { + dest[i] = src[i]; + i = i + 1; + } + dest[i] = 0; +} + +int32 str_cmp(uint8 *s1, uint8 *s2) { + int32 i = 0; + while (s1[i] && s2[i]) { + if (s1[i] != s2[i]) { + return s1[i] - s2[i]; + } + i = i + 1; + } + return s1[i] - s2[i]; +} + +void str_reverse(uint8 *s) { + int32 len = str_len(s); + int32 i = 0; + int32 j = len - 1; + while (i < j) { + uint8 temp = s[i]; + s[i] = s[j]; + s[j] = temp; + i = i + 1; + j = j - 1; + } +} + +int32 main(void) { + uint8 *msg = "Hello, World!"; + printf("Original string: %s\n", msg); + printf("Length: %d\n", str_len(msg)); + + uint8 buffer[100]; + str_copy(buffer, msg); + printf("Copied string: %s\n", buffer); + + str_reverse(buffer); + printf("Reversed: %s\n", buffer); + + uint8 *s1 = "apple"; + uint8 *s2 = "banana"; + uint8 *s3 = "apple"; + + printf("\nString comparison:\n"); + printf(" '%s' vs '%s': %d\n", s1, s2, str_cmp(s1, s2)); + printf(" '%s' vs '%s': %d\n", s1, s3, str_cmp(s1, s3)); + printf(" '%s' vs '%s': %d\n", s2, s1, str_cmp(s2, s1)); + + return 0; +} diff --git a/examples/switch.cm b/examples/switch.cm new file mode 100644 index 0000000..5596d36 --- /dev/null +++ b/examples/switch.cm @@ -0,0 +1,65 @@ +// Public domain / CC0. Use freely for any purpose. RoyR 2026 +// switch.cm - Switch statement demonstration +// Demonstrates: switch/case, default, break, fall-through + +void printf(uint8 *fmt); + +uint8 *get_day_name(int32 day) { + switch (day) { + case 0: return "Sunday"; + case 1: return "Monday"; + case 2: return "Tuesday"; + case 3: return "Wednesday"; + case 4: return "Thursday"; + case 5: return "Friday"; + case 6: return "Saturday"; + default: return "Invalid day"; + } +} + +int32 is_weekend(int32 day) { + switch (day) { + case 0: + case 6: + return 1; + default: + return 0; + } +} + +uint8 *get_grade(int32 score) { + int32 category = score / 10; + + switch (category) { + case 10: + case 9: + return "A"; + case 8: + return "B"; + case 7: + return "C"; + case 6: + return "D"; + default: + return "F"; + } +} + +int32 main(void) { + printf("Days of the week:\n"); + for (int32 i = 0; i < 8; i = i + 1) { + printf(" Day %d: %s", i, get_day_name(i)); + if (is_weekend(i)) { + printf(" (weekend!)"); + } + printf("\n"); + } + + printf("\nGrade calculator:\n"); + int32 scores[6] = { 95, 87, 76, 65, 54, 100 }; + for (int32 i = 0; i < 6; i = i + 1) { + printf(" Score %d: Grade %s\n", scores[i], get_grade(scores[i])); + } + + return 0; +} diff --git a/examples/types.cm b/examples/types.cm new file mode 100644 index 0000000..b725ef3 --- /dev/null +++ b/examples/types.cm @@ -0,0 +1,50 @@ +// Public domain / CC0. Use freely for any purpose. RoyR 2026 +// types.cm - Type casting and different integer sizes +// Demonstrates: different integer types, casting, overflow behavior + +void printf(uint8 *fmt); + +int32 main(void) { + // Different integer sizes + uint8 byte_val = 255; + uint16 word_val = 65535; + uint32 dword_val = 4294967295; + + int8 sbyte_val = -128; + int16 sword_val = -32768; + int32 sdword_val = -2147483648; + + printf("Unsigned types:\n"); + printf(" uint8: %d\n", byte_val); + printf(" uint16: %d\n", word_val); + printf(" uint32: %u\n", dword_val); + + printf("\nSigned types:\n"); + printf(" int8: %d\n", sbyte_val); + printf(" int16: %d\n", sword_val); + printf(" int32: %d\n", sdword_val); + + // Type casting + printf("\nType casting:\n"); + int32 large = 1000; + uint8 small = (uint8)large; + printf(" (uint8)1000 = %d (truncated to 8 bits)\n", small); + + int32 value = 298; + uint8 truncated = (uint8)value; + printf(" (uint8)298 = %d (298 %% 256 = 42)\n", truncated); + + // Sign extension + int8 neg = -1; + int32 extended = neg; + printf("\nSign extension:\n"); + printf(" int8(-1) extended to int32: %d\n", extended); + + // Unsigned to signed + uint8 unsigned_val = 200; + int8 signed_val = (int8)unsigned_val; + printf("\nUnsigned to signed:\n"); + printf(" (int8)200 = %d (interpreted as signed)\n", signed_val); + + return 0; +} diff --git a/run_tests.sh b/run_tests.sh new file mode 100644 index 0000000..780e912 --- /dev/null +++ b/run_tests.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Public domain / CC0. Use freely for any purpose. RoyR 2026 +# run_tests.sh - Quick test runner script + +set -e + +echo "Building Common compiler..." +gcc -o common common.c + +echo "Building test runner..." +gcc -std=c99 -o test_runner test_runner.c + +echo "" +echo "Running tests..." +echo "================" +./test_runner + +exit $? diff --git a/test_runner.c b/test_runner.c new file mode 100644 index 0000000..11d26a4 --- /dev/null +++ b/test_runner.c @@ -0,0 +1,619 @@ +/* + * Public domain / CC0. Use freely for any purpose. RoyR 2026 + * test_runner.c - Test harness for Common compiler + * + * Build: gcc -std=c99 -o test_runner test_runner.c + * Usage: ./test_runner + */ + +#include +#include +#include +#include +#include + +typedef struct { + const char *name; + const char *source; + int expected_exit; + const char *expected_output; +} Test; + +static int test_count = 0; +static int test_passed = 0; +static int test_failed = 0; + +static int run_command(const char *cmd) { + int status = system(cmd); + if (WIFEXITED(status)) { + return WEXITSTATUS(status); + } + return -1; +} + +static char *read_file(const char *path) { + FILE *f = fopen(path, "r"); + if (!f) return NULL; + fseek(f, 0, SEEK_END); + long sz = ftell(f); + rewind(f); + char *buf = malloc(sz + 1); + fread(buf, 1, sz, f); + buf[sz] = 0; + fclose(f); + return buf; +} + +static void run_test(Test *t) { + char cmd[1024]; + test_count++; + + printf("Test %d: %s ... ", test_count, t->name); + fflush(stdout); + + /* Write source file */ + FILE *f = fopen("/tmp/test.cm", "w"); + if (!f) { printf("FAIL (cannot write source)\n"); test_failed++; return; } + fprintf(f, "%s", t->source); + fclose(f); + + /* Compile */ + if (run_command("./common /tmp/test.cm /tmp/test.asm 2>/tmp/test.err") != 0) { + printf("FAIL (compiler error)\n"); + test_failed++; + return; + } + + /* Assemble */ + if (run_command("nasm -f elf32 /tmp/test.asm -o /tmp/test.o 2>/tmp/test.err") != 0) { + printf("FAIL (assembler error)\n"); + test_failed++; + return; + } + + /* Link */ + if (run_command("gcc -m32 /tmp/test.o -o /tmp/test 2>/tmp/test.err") != 0) { + printf("FAIL (linker error)\n"); + test_failed++; + return; + } + + /* Run and capture output */ + int exit_code = run_command("/tmp/test > /tmp/test.out 2>&1"); + + /* Check exit code */ + if (exit_code != t->expected_exit) { + printf("FAIL (exit=%d, expected=%d)\n", exit_code, t->expected_exit); + test_failed++; + return; + } + + /* Check output if specified */ + if (t->expected_output) { + char *output = read_file("/tmp/test.out"); + if (!output) { + printf("FAIL (cannot read output)\n"); + test_failed++; + return; + } + if (strcmp(output, t->expected_output) != 0) { + printf("FAIL (output mismatch)\n"); + printf(" Expected: %s\n", t->expected_output); + printf(" Got: %s\n", output); + free(output); + test_failed++; + return; + } + free(output); + } + + printf("PASS\n"); + test_passed++; +} + +/* ============================================ + TEST CASES + ============================================ */ + +Test tests[] = { + /* Basic arithmetic */ + { + "simple_return", + "int32 main(void) { return 42; }", + 42, + NULL + }, + + { + "addition", + "int32 main(void) { return 10 + 32; }", + 42, + NULL + }, + + { + "subtraction", + "int32 main(void) { return 50 - 8; }", + 42, + NULL + }, + + { + "multiplication", + "int32 main(void) { return 6 * 7; }", + 42, + NULL + }, + + { + "division", + "int32 main(void) { return 84 / 2; }", + 42, + NULL + }, + + { + "modulo", + "int32 main(void) { return 142 % 100; }", + 42, + NULL + }, + + /* Variables */ + { + "local_variable", + "int32 main(void) { int32 x; x = 42; return x; }", + 42, + NULL + }, + + { + "variable_with_init", + "int32 main(void) { int32 x = 42; return x; }", + 42, + NULL + }, + + { + "multiple_variables", + "int32 main(void) { int32 x = 10; int32 y = 32; return x + y; }", + 42, + NULL + }, + + /* Global variables */ + { + "global_variable", + "int32 g = 42;\n" + "int32 main(void) { return g; }", + 42, + NULL + }, + + { + "global_read_write", + "int32 g;\n" + "int32 main(void) { g = 42; return g; }", + 42, + NULL + }, + + /* Control flow */ + { + "if_true", + "int32 main(void) { if (1) return 42; return 0; }", + 42, + NULL + }, + + { + "if_false", + "int32 main(void) { if (0) return 0; return 42; }", + 42, + NULL + }, + + { + "if_else", + "int32 main(void) { if (0) return 0; else return 42; }", + 42, + NULL + }, + + { + "while_loop", + "int32 main(void) {\n" + " int32 x = 0;\n" + " while (x < 42) x = x + 1;\n" + " return x;\n" + "}", + 42, + NULL + }, + + { + "for_loop", + "int32 main(void) {\n" + " int32 sum = 0;\n" + " for (int32 i = 0; i < 10; i = i + 1) sum = sum + i;\n" + " return sum;\n" + "}", + 45, + NULL + }, + + /* Comparisons */ + { + "eq_true", + "int32 main(void) { return 42 == 42; }", + 1, + NULL + }, + + { + "eq_false", + "int32 main(void) { return 42 == 43; }", + 0, + NULL + }, + + { + "neq_true", + "int32 main(void) { return 42 != 43; }", + 1, + NULL + }, + + { + "lt_true", + "int32 main(void) { return 10 < 42; }", + 1, + NULL + }, + + { + "gt_true", + "int32 main(void) { return 42 > 10; }", + 1, + NULL + }, + + /* Logical operators */ + { + "and_true", + "int32 main(void) { return 1 && 1; }", + 1, + NULL + }, + + { + "and_false", + "int32 main(void) { return 1 && 0; }", + 0, + NULL + }, + + { + "or_true", + "int32 main(void) { return 0 || 1; }", + 1, + NULL + }, + + { + "or_false", + "int32 main(void) { return 0 || 0; }", + 0, + NULL + }, + + /* Bitwise operators */ + { + "bitwise_and", + "int32 main(void) { return 63 & 42; }", + 42, + NULL + }, + + { + "bitwise_or", + "int32 main(void) { return 32 | 10; }", + 42, + NULL + }, + + { + "bitwise_xor", + "int32 main(void) { return 50 ^ 24; }", + 42, + NULL + }, + + { + "left_shift", + "int32 main(void) { return 21 << 1; }", + 42, + NULL + }, + + { + "right_shift", + "int32 main(void) { return 84 >> 1; }", + 42, + NULL + }, + + /* Unary operators */ + { + "negation", + "int32 main(void) { return -(-42); }", + 42, + NULL + }, + + { + "logical_not", + "int32 main(void) { return !0; }", + 1, + NULL + }, + + { + "bitwise_not", + "int32 main(void) { return ~(-43); }", + 42, + NULL + }, + + /* Increment/decrement */ + { + "post_increment", + "int32 main(void) { int32 x = 41; int32 y = x++; return x; }", + 42, + NULL + }, + + { + "pre_increment", + "int32 main(void) { int32 x = 41; int32 y = ++x; return x; }", + 42, + NULL + }, + + { + "post_decrement", + "int32 main(void) { int32 x = 43; int32 y = x--; return x; }", + 42, + NULL + }, + + /* Compound assignment */ + { + "add_assign", + "int32 main(void) { int32 x = 10; x += 32; return x; }", + 42, + NULL + }, + + { + "sub_assign", + "int32 main(void) { int32 x = 50; x -= 8; return x; }", + 42, + NULL + }, + + /* Ternary operator */ + { + "ternary_true", + "int32 main(void) { return 1 ? 42 : 0; }", + 42, + NULL + }, + + { + "ternary_false", + "int32 main(void) { return 0 ? 0 : 42; }", + 42, + NULL + }, + + /* Functions */ + { + "function_call", + "int32 add(int32 a, int32 b) { return a + b; }\n" + "int32 main(void) { return add(10, 32); }", + 42, + NULL + }, + + { + "recursive_function", + "int32 fib(int32 n) {\n" + " if (n <= 1) return n;\n" + " return fib(n - 1) + fib(n - 2);\n" + "}\n" + "int32 main(void) { return fib(10); }", + 55, + NULL + }, + + /* Arrays */ + { + "local_array", + "int32 main(void) {\n" + " int32 arr[5];\n" + " arr[0] = 42;\n" + " return arr[0];\n" + "}", + 42, + NULL + }, + + { + "array_init", + "int32 main(void) {\n" + " int32 arr[3] = { 10, 32, 99 };\n" + " return arr[0] + arr[1];\n" + "}", + 42, + NULL + }, + + { + "global_array", + "int32 arr[3] = { 10, 32, 99 };\n" + "int32 main(void) { return arr[0] + arr[1]; }", + 42, + NULL + }, + + /* Pointers */ + { + "address_and_deref", + "int32 main(void) {\n" + " int32 x = 42;\n" + " int32 *p = &x;\n" + " return *p;\n" + "}", + 42, + NULL + }, + + { + "pointer_assignment", + "int32 main(void) {\n" + " int32 x = 0;\n" + " int32 *p = &x;\n" + " *p = 42;\n" + " return x;\n" + "}", + 42, + NULL + }, + + /* Type casting */ + { + "cast_to_uint8", + "int32 main(void) { return (uint8)42; }", + 42, + NULL + }, + + { + "cast_truncate", + "int32 main(void) { return (uint8)298; }", + 42, + NULL + }, + + /* Different integer sizes */ + { + "uint8_type", + "int32 main(void) { uint8 x = 42; return x; }", + 42, + NULL + }, + + { + "uint16_type", + "int32 main(void) { uint16 x = 42; return x; }", + 42, + NULL + }, + + { + "int8_signed", + "int32 main(void) { int8 x = -42; return -x; }", + 42, + NULL + }, + + /* Switch statement */ + { + "switch_basic", + "int32 main(void) {\n" + " int32 x = 2;\n" + " switch (x) {\n" + " case 1: return 10;\n" + " case 2: return 42;\n" + " case 3: return 20;\n" + " }\n" + " return 0;\n" + "}", + 42, + NULL + }, + + { + "switch_default", + "int32 main(void) {\n" + " int32 x = 99;\n" + " switch (x) {\n" + " case 1: return 10;\n" + " default: return 42;\n" + " }\n" + "}", + 42, + NULL + }, + + /* Break/continue */ + { + "break_loop", + "int32 main(void) {\n" + " int32 x = 0;\n" + " while (1) {\n" + " x = x + 1;\n" + " if (x == 42) break;\n" + " }\n" + " return x;\n" + "}", + 42, + NULL + }, + + { + "continue_loop", + "int32 main(void) {\n" + " int32 x = 0;\n" + " int32 sum = 0;\n" + " while (x < 50) {\n" + " x = x + 1;\n" + " if (x > 42) continue;\n" + " sum = sum + 1;\n" + " }\n" + " return sum;\n" + "}", + 42, + NULL + }, + + /* End marker */ + { NULL, NULL, 0, NULL } +}; + +int main(void) { + printf("Common Compiler Test Suite\n"); + printf("===========================\n\n"); + + /* Check if compiler exists */ + if (access("./common", X_OK) != 0) { + fprintf(stderr, "Error: ./common not found or not executable\n"); + fprintf(stderr, "Please build it first: gcc -o common common.c\n"); + return 1; + } + + /* Run all tests */ + for (int i = 0; tests[i].name != NULL; i++) { + run_test(&tests[i]); + } + + /* Summary */ + printf("\n===========================\n"); + printf("Total: %d\n", test_count); + printf("Passed: %d\n", test_passed); + printf("Failed: %d\n", test_failed); + printf("===========================\n"); + + return test_failed > 0 ? 1 : 0; +} diff --git a/test_suite.cm b/test_suite.cm new file mode 100644 index 0000000..6e3219b --- /dev/null +++ b/test_suite.cm @@ -0,0 +1,1024 @@ +// ============================================================ +// Public domain / CC0. Use freely for any purpose. RoyR 2026 +// test_suite.cm — Extensive tests for the Common compiler +// +// Build & run: +// gcc -o common common.c +// ./common test_suite.cm test_suite.asm +// nasm -f elf32 test_suite.asm -o test_suite.o +// gcc -m32 -no-pie test_suite.o -o test_suite +// ./test_suite +// +// A passing run prints only "ALL TESTS PASSED". +// Any failure prints the failing test name and exits with 1. +// ============================================================ + +// --------------- C library declarations -------------------- +void printf(uint8* fmt); +void exit(uint32 code); + +// --------------- Test harness ------------------------------ +uint32 g_tests_run = 0; +uint32 g_tests_failed = 0; + +// We call these with a string literal + an int; we use a +// hand-rolled dispatcher because Common has no varargs. +void print_str(uint8* s); +void print_int(uint32 n); +void print_nl(); + +// Minimal wrappers around printf so we only need one extern +void print_str(uint8* s) { + printf(s); +} +void print_int(uint32 n) { + // itoa into a local buffer and print + uint8 buf[32]; + uint32 i = 30; + buf[31] = 0; + if (n == 0) { + buf[30] = 48; // '0' + print_str(&buf[30]); + return; + } + while (n > 0) { + buf[i] = (n % 10) + 48; + n = n / 10; + i = i - 1; + } + i = i + 1; + print_str(&buf[i]); +} +void print_nl() { + printf("\n"); +} + +void assert_eq(uint8* name, uint32 got, uint32 expected) { + g_tests_run = g_tests_run + 1; + if (got != expected) { + g_tests_failed = g_tests_failed + 1; + print_str("FAIL: "); + print_str(name); + print_str(" got="); + print_int(got); + print_str(" expected="); + print_int(expected); + print_nl(); + } +} + +// ============================================================ +// 1. BASIC ARITHMETIC +// ============================================================ +void test_arithmetic() { + uint32 a = 10; + uint32 b = 3; + + assert_eq("add", a + b, 13); + assert_eq("sub", a - b, 7); + assert_eq("mul", a * b, 30); + assert_eq("div", a / b, 3); + assert_eq("mod", a % b, 1); + assert_eq("neg", 0 - a, 4294967286); // wrap-around uint32 + assert_eq("add_chain", 1+2+3+4+5, 15); + assert_eq("precedence", 2+3*4, 14); + assert_eq("parens", (2+3)*4, 20); + assert_eq("mixed", 10-2*3+1, 5); +} + +// ============================================================ +// 2. BITWISE OPERATIONS +// ============================================================ +void test_bitwise() { + uint32 x = 0xFF; + uint32 y = 0x0F; + + assert_eq("band", x & y, 15); + assert_eq("bor", x | y, 255); + assert_eq("bxor", x ^ y, 240); + assert_eq("bnot", ~0, 4294967295); + assert_eq("shl", 1 << 4, 16); + assert_eq("shr", 256 >> 3, 32); + assert_eq("shl8", 0x01 << 8, 256); + assert_eq("shr_big",0x80000000 >> 1, 0x40000000); + assert_eq("xor_inv",0xDEADBEEF ^ 0xFFFFFFFF, 0x21524110); + assert_eq("and_mask",0xABCDEF & 0x00FF00, 0x00CD00); +} + +// ============================================================ +// 3. COMPARISON & LOGICAL OPERATORS +// ============================================================ +void test_comparisons() { + assert_eq("lt_true", 3 < 5, 1); + assert_eq("lt_false", 5 < 3, 0); + assert_eq("leq_eq", 4 <= 4, 1); + assert_eq("leq_less", 3 <= 4, 1); + assert_eq("leq_false", 5 <= 4, 0); + assert_eq("gt_true", 5 > 3, 1); + assert_eq("gt_false", 3 > 5, 0); + assert_eq("geq_eq", 4 >= 4, 1); + assert_eq("eq_true", 7 == 7, 1); + assert_eq("eq_false", 7 == 8, 0); + assert_eq("neq_true", 7 != 8, 1); + assert_eq("neq_false", 7 != 7, 0); + + assert_eq("land_tt", 1 && 1, 1); + assert_eq("land_tf", 1 && 0, 0); + assert_eq("land_ft", 0 && 1, 0); + assert_eq("lor_ff", 0 || 0, 0); + assert_eq("lor_tf", 1 || 0, 1); + assert_eq("lnot_t", !0, 1); + assert_eq("lnot_f", !1, 0); + assert_eq("lnot_big", !42, 0); +} + +// ============================================================ +// 4. ASSIGNMENT OPERATORS +// ============================================================ +void test_compound_assign() { + uint32 v = 10; + v += 5; assert_eq("addeq", v, 15); + v -= 3; assert_eq("subeq", v, 12); + v *= 2; assert_eq("muleq", v, 24); + v /= 4; assert_eq("diveq", v, 6); + v %= 4; assert_eq("modeq", v, 2); + v = 0xFF; + v &= 0x0F; assert_eq("andeq", v, 15); + v |= 0xF0; assert_eq("oreq", v, 255); + v ^= 0xFF; assert_eq("xoreq", v, 0); + v = 1; + v <<= 3; assert_eq("shleq", v, 8); + v >>= 1; assert_eq("shreq", v, 4); +} + +// ============================================================ +// 5. INCREMENT / DECREMENT +// ============================================================ +void test_incdec() { + uint32 a = 5; + uint32 b; + + b = a++; assert_eq("post_inc_ret", b, 5); + assert_eq("post_inc_var", a, 6); + + b = a--; assert_eq("post_dec_ret", b, 6); + assert_eq("post_dec_var", a, 5); + + b = ++a; assert_eq("pre_inc_ret", b, 6); + assert_eq("pre_inc_var", a, 6); + + b = --a; assert_eq("pre_dec_ret", b, 5); + assert_eq("pre_dec_var", a, 5); +} + +// ============================================================ +// 6. TERNARY OPERATOR +// ============================================================ +void test_ternary() { + uint32 x = 10; + assert_eq("tern_true", x > 5 ? 1 : 0, 1); + assert_eq("tern_false", x < 5 ? 1 : 0, 0); + assert_eq("tern_value", x > 5 ? x : 0, 10); + assert_eq("tern_nested", x > 5 ? (x > 8 ? 2 : 1) : 0, 2); + assert_eq("tern_in_expr",(x > 5 ? 3 : 7) + 10, 13); +} + +// ============================================================ +// 7. IF / ELSE +// ============================================================ +void test_if_else() { + uint32 r = 0; + + if (1) r = 1; + assert_eq("if_true", r, 1); + + if (0) r = 99; + assert_eq("if_no_exec", r, 1); + + if (0) r = 0; + else r = 2; + assert_eq("else_branch", r, 2); + + // else-if chain + uint32 v = 5; + if (v == 1) r = 10; + else if (v == 2) r = 20; + else if (v == 5) r = 50; + else r = 99; + assert_eq("else_if_chain", r, 50); + + // nested if + uint32 a = 3; + uint32 b = 7; + if (a < b) { + if (a < 5) r = 1; + else r = 2; + } else { + r = 3; + } + assert_eq("nested_if", r, 1); +} + +// ============================================================ +// 8. WHILE LOOP +// ============================================================ +void test_while() { + uint32 i = 0; + uint32 sum = 0; + while (i < 10) { + sum += i; + i++; + } + assert_eq("while_sum", sum, 45); + + // break + i = 0; sum = 0; + while (1) { + if (i == 5) break; + sum += i; + i++; + } + assert_eq("while_break", sum, 10); + + // continue + i = 0; sum = 0; + while (i < 10) { + i++; + if (i % 2 == 0) continue; + sum += i; + } + assert_eq("while_continue", sum, 25); // 1+3+5+7+9 +} + +// ============================================================ +// 9. FOR LOOP +// ============================================================ +void test_for() { + uint32 sum = 0; + uint32 i; + for (i = 0; i < 10; i++) sum += i; + assert_eq("for_sum", sum, 45); + + // for with decl + sum = 0; + for (uint32 j = 1; j <= 5; j++) sum += j; + assert_eq("for_decl", sum, 15); + + // nested for + sum = 0; + for (uint32 a = 0; a < 4; a++) + for (uint32 b = 0; b < 4; b++) + sum++; + assert_eq("nested_for", sum, 16); + + // break in for + sum = 0; + for (i = 0; i < 100; i++) { + if (i == 10) break; + sum += i; + } + assert_eq("for_break", sum, 45); + + // continue in for + sum = 0; + for (i = 0; i < 10; i++) { + if (i % 2 != 0) continue; + sum += i; + } + assert_eq("for_continue", sum, 20); // 0+2+4+6+8 +} + +// ============================================================ +// 10. SWITCH / CASE +// ============================================================ +void test_switch() { + uint32 r; + uint32 v = 3; + + switch (v) { + case 1: r = 10; break; + case 2: r = 20; break; + case 3: r = 30; break; + default: r = 99; break; + } + assert_eq("switch_hit", r, 30); + + v = 7; + switch (v) { + case 1: r = 1; break; + case 2: r = 2; break; + default: r = 42; break; + } + assert_eq("switch_default", r, 42); + + // fall-through (no break before next case) + v = 1; + r = 0; + switch (v) { + case 1: r += 1; + case 2: r += 2; break; + case 3: r += 4; break; + } + assert_eq("switch_fallthrough", r, 3); +} + +// ============================================================ +// 11. FUNCTIONS — CALL, RETURN, RECURSION +// ============================================================ +uint32 add(uint32 a, uint32 b) { + return a + b; +} + +uint32 factorial(uint32 n) { + if (n <= 1) return 1; + return n * factorial(n - 1); +} + +uint32 fib(uint32 n) { + if (n <= 1) return n; + return fib(n - 1) + fib(n - 2); +} + +uint32 gcd(uint32 a, uint32 b) { + while (b != 0) { + uint32 t = b; + b = a % b; + a = t; + } + return a; +} + +void test_functions() { + assert_eq("fn_add", add(3, 4), 7); + assert_eq("fn_add_zero", add(0, 0), 0); + assert_eq("fn_fact_0", factorial(0), 1); + assert_eq("fn_fact_1", factorial(1), 1); + assert_eq("fn_fact_5", factorial(5), 120); + assert_eq("fn_fact_10", factorial(10), 3628800); + assert_eq("fn_fib_0", fib(0), 0); + assert_eq("fn_fib_1", fib(1), 1); + assert_eq("fn_fib_10", fib(10), 55); + assert_eq("fn_gcd_12_8", gcd(12, 8), 4); + assert_eq("fn_gcd_100_75",gcd(100, 75), 25); + assert_eq("fn_gcd_prime", gcd(17, 13), 1); +} + +// ============================================================ +// 12. LOCAL ARRAYS +// ============================================================ +void test_local_arrays() { + uint32 arr[8]; + uint32 i; + + // fill and read back + for (i = 0; i < 8; i++) arr[i] = i * i; + assert_eq("arr_0", arr[0], 0); + assert_eq("arr_3", arr[3], 9); + assert_eq("arr_7", arr[7], 49); + + // initialiser list + uint32 primes[5] = {2, 3, 5, 7, 11}; + assert_eq("arr_init_0", primes[0], 2); + assert_eq("arr_init_4", primes[4], 11); + + // sum via pointer walk + uint32 sum = 0; + for (i = 0; i < 5; i++) sum += primes[i]; + assert_eq("arr_sum", sum, 28); + + // uint8 array + uint8 bytes[4] = {10, 20, 30, 40}; + assert_eq("u8arr_0", bytes[0], 10); + assert_eq("u8arr_3", bytes[3], 40); +} + +// ============================================================ +// 13. POINTERS — BASIC ADDRESS / DEREFERENCE +// ============================================================ +void test_pointers_basic() { + uint32 v = 42; + uint32* p = &v; + + assert_eq("ptr_deref", *p, 42); + assert_eq("ptr_deref_eq", *p == v, 1); + + *p = 99; + assert_eq("ptr_write", v, 99); + assert_eq("ptr_write_p", *p, 99); + + // pointer to pointer + uint32** pp = &p; + assert_eq("pptr_deref", **pp, 99); + **pp = 7; + assert_eq("pptr_write", v, 7); + + // address of array element + uint32 arr[4] = {10, 20, 30, 40}; + uint32* q = &arr[2]; + assert_eq("arr_elem_ptr", *q, 30); + *q = 300; + assert_eq("arr_elem_write",arr[2], 300); +} + +// ============================================================ +// 14. POINTER ARITHMETIC — uint32* (stride 4) +// ============================================================ +void test_ptr_arith_u32() { + uint32 arr[6] = {10, 20, 30, 40, 50, 60}; + uint32* p = arr; + + assert_eq("u32p_base", *p, 10); + assert_eq("u32p_plus1", *(p+1), 20); + assert_eq("u32p_plus2", *(p+2), 30); + assert_eq("u32p_plus5", *(p+5), 60); + + // pointer increment + p++; + assert_eq("u32p_inc", *p, 20); + p += 2; + assert_eq("u32p_addeq", *p, 40); + p--; + assert_eq("u32p_dec", *p, 30); + + // pointer subtraction (integer result) + uint32* base = arr; + uint32* cur = arr; + cur += 4; + assert_eq("u32p_diff", cur - base, 4); + + // write through incremented pointer + uint32* w = arr; + *(w + 3) = 400; + assert_eq("u32p_write", arr[3], 400); + + // walk with pointer in for loop + uint32 sum = 0; + uint32* it = arr; + uint32 i; + for (i = 0; i < 6; i++) { + sum += *it; + it++; + } + // arr is now {10,20,30,400,50,60} + assert_eq("u32p_walk_sum", sum, 570); +} + +// ============================================================ +// 15. POINTER ARITHMETIC — uint8* (stride 1) +// ============================================================ +void test_ptr_arith_u8() { + uint8 bytes[8] = {1, 2, 3, 4, 5, 6, 7, 8}; + uint8* p = bytes; + + assert_eq("u8p_base", *p, 1); + assert_eq("u8p_plus3", *(p+3), 4); + assert_eq("u8p_plus7", *(p+7), 8); + + p++; + assert_eq("u8p_inc", *p, 2); + p += 3; + assert_eq("u8p_addeq", *p, 5); + + uint8* base = bytes; + uint8* end = bytes; + end += 8; + assert_eq("u8p_diff", end - base, 8); + + // write + *(bytes + 4) = 50; + assert_eq("u8p_write", bytes[4], 50); + + // sum via pointer walk + uint32 sum = 0; + uint8* it = bytes; + uint32 i; + for (i = 0; i < 8; i++) { sum += *it; it++; } + // bytes: {1,2,3,4,50,6,7,8} + assert_eq("u8p_walk_sum", sum, 81); +} + +// ============================================================ +// 16. POINTER ARITHMETIC — uint16* (stride 2) +// ============================================================ +void test_ptr_arith_u16() { + uint16 shorts[4] = {100, 200, 300, 400}; + uint16* p = shorts; + + assert_eq("u16p_base", *p, 100); + assert_eq("u16p_plus1", *(p+1), 200); + assert_eq("u16p_plus3", *(p+3), 400); + + p++; + assert_eq("u16p_inc", *p, 200); + + uint16* base = shorts; + p = shorts; + p += 3; + assert_eq("u16p_diff", p - base, 3); +} + +// ============================================================ +// 17. POINTER TO POINTER (stride 4 always) +// ============================================================ +void test_ptr_to_ptr() { + uint32 a = 1; + uint32 b = 2; + uint32 c = 3; + + uint32* ptrs[3]; + ptrs[0] = &a; + ptrs[1] = &b; + ptrs[2] = &c; + + uint32** pp = ptrs; + assert_eq("pp_0", **pp, 1); + assert_eq("pp_1", **(pp+1), 2); + assert_eq("pp_2", **(pp+2), 3); + + pp++; + assert_eq("pp_inc", **pp, 2); +} + +// ============================================================ +// 18. CAST +// ============================================================ +void test_cast() { + uint32 big = 0x1234FF; + uint8 small = (uint8)big; + assert_eq("cast_u32_u8", small, 255); + + uint32 x = 300; + uint8 y = (uint8)x; + assert_eq("cast_300_u8", y, 44); // 300 % 256 + + uint32 z = (uint32)y; + assert_eq("cast_u8_u32", z, 44); +} + +// ============================================================ +// 19. GLOBAL VARIABLES +// ============================================================ +uint32 g_counter = 0; +uint32 g_accum = 100; + +void increment_global() { g_counter++; } +void add_to_accum(uint32 v) { g_accum += v; } + +void test_globals() { + assert_eq("global_init_counter", g_counter, 0); + assert_eq("global_init_accum", g_accum, 100); + + increment_global(); + increment_global(); + increment_global(); + assert_eq("global_inc_3", g_counter, 3); + + add_to_accum(50); + add_to_accum(25); + assert_eq("global_accum", g_accum, 175); +} + +// ============================================================ +// 20. STRING LITERALS (pointer identity / walking) +// ============================================================ +uint32 strlen_cm(uint8* s) { + uint32 len = 0; + while (*s != 0) { s++; len++; } + return len; +} + +int32 strcmp_cm(uint8* a, uint8* b) { + while (*a != 0 && *a == *b) { a++; b++; } + return *a - *b; +} + +void strcpy_cm(uint8* dst, uint8* src) { + while (*src != 0) { *dst = *src; dst++; src++; } + *dst = 0; +} + +void test_strings() { + assert_eq("strlen_empty", strlen_cm(""), 0); + assert_eq("strlen_hello", strlen_cm("hello"), 5); + assert_eq("strlen_long", strlen_cm("abcdefghij"), 10); + + assert_eq("strcmp_eq", strcmp_cm("abc","abc"), 0); + assert_eq("strcmp_lt", strcmp_cm("abc","abd") < 0 ? 1 : 0, 1); + assert_eq("strcmp_gt", strcmp_cm("abd","abc") > 0 ? 1 : 0, 1); + + uint8 buf[32]; + strcpy_cm(buf, "hello"); + assert_eq("strcpy_len", strlen_cm(buf), 5); + assert_eq("strcpy_0", buf[0], 104); // 'h' + assert_eq("strcpy_4", buf[4], 111); // 'o' +} + +// ============================================================ +// 21. BUBBLE SORT (arrays + pointers together) +// ============================================================ +void bubble_sort(uint32* arr, uint32 n) { + uint32 i; + uint32 j; + for (i = 0; i < n - 1; i++) { + for (j = 0; j < n - 1 - i; j++) { + if (*(arr + j) > *(arr + j + 1)) { + uint32 tmp = *(arr + j); + *(arr + j) = *(arr + j + 1); + *(arr + j + 1)= tmp; + } + } + } +} + +void test_sort() { + uint32 data[8] = {5, 3, 8, 1, 9, 2, 7, 4}; + bubble_sort(data, 8); + assert_eq("sort_0", data[0], 1); + assert_eq("sort_1", data[1], 2); + assert_eq("sort_2", data[2], 3); + assert_eq("sort_3", data[3], 4); + assert_eq("sort_4", data[4], 5); + assert_eq("sort_5", data[5], 7); + assert_eq("sort_6", data[6], 8); + assert_eq("sort_7", data[7], 9); + + // Already sorted + uint32 sorted[4] = {1, 2, 3, 4}; + bubble_sort(sorted, 4); + assert_eq("sort_already_0", sorted[0], 1); + assert_eq("sort_already_3", sorted[3], 4); + + // Reverse sorted + uint32 rev[5] = {5, 4, 3, 2, 1}; + bubble_sort(rev, 5); + assert_eq("sort_rev_0", rev[0], 1); + assert_eq("sort_rev_4", rev[4], 5); +} + +// ============================================================ +// 22. MEMSET / MEMCPY via pointer arithmetic +// ============================================================ +void memset_cm(uint8* dst, uint8 val, uint32 n) { + uint32 i; + for (i = 0; i < n; i++) { *dst = val; dst++; } +} + +void memcpy_cm(uint8* dst, uint8* src, uint32 n) { + uint32 i; + for (i = 0; i < n; i++) { *dst = *src; dst++; src++; } +} + +uint32 memcmp_cm(uint8* a, uint8* b, uint32 n) { + uint32 i; + for (i = 0; i < n; i++) { + if (*a != *b) return 0; + a++; b++; + } + return 1; +} + +void test_mem_ops() { + uint8 buf[16]; + memset_cm(buf, 0, 16); + assert_eq("memset_0", buf[0], 0); + assert_eq("memset_15", buf[15], 0); + + memset_cm(buf, 0xFF, 8); + assert_eq("memset_ff_0", buf[0], 255); + assert_eq("memset_ff_7", buf[7], 255); + assert_eq("memset_ff_8", buf[8], 0); // untouched + + uint8 src[8] = {1,2,3,4,5,6,7,8}; + uint8 dst[8]; + memset_cm(dst, 0, 8); + memcpy_cm(dst, src, 8); + assert_eq("memcpy_0", dst[0], 1); + assert_eq("memcpy_7", dst[7], 8); + assert_eq("memcmp_eq", memcmp_cm(src, dst, 8), 1); + + dst[3] = 99; + assert_eq("memcmp_ne", memcmp_cm(src, dst, 8), 0); +} + +// ============================================================ +// 23. LINKED LIST (pointer + struct-like layout in a flat array) +// +// We simulate a singly-linked list by storing nodes in a pool. +// Each node is two consecutive uint32 words: [value, next_index] +// next_index == 0xFFFFFFFF means NULL. +// ============================================================ +uint32 node_pool[64]; // 32 nodes × 2 words each +uint32 pool_ptr = 0; + +uint32 alloc_node_ll(uint32 val) { + uint32 idx = pool_ptr; + node_pool[idx * 2] = val; + node_pool[idx * 2 + 1] = 0xFFFFFFFF; + pool_ptr = pool_ptr + 1; + return idx; +} + +void ll_set_next(uint32 idx, uint32 next_idx) { + node_pool[idx * 2 + 1] = next_idx; +} + +uint32 ll_val(uint32 idx) { return node_pool[idx * 2]; } +uint32 ll_next(uint32 idx) { return node_pool[idx * 2 + 1]; } + +uint32 ll_length(uint32 head) { + uint32 len = 0; + uint32 cur = head; + while (cur != 0xFFFFFFFF) { len++; cur = ll_next(cur); } + return len; +} + +uint32 ll_sum(uint32 head) { + uint32 s = 0; + uint32 cur = head; + while (cur != 0xFFFFFFFF) { s += ll_val(cur); cur = ll_next(cur); } + return s; +} + +void test_linked_list() { + // Build list: 10 -> 20 -> 30 -> 40 -> NULL + uint32 n0 = alloc_node_ll(10); + uint32 n1 = alloc_node_ll(20); + uint32 n2 = alloc_node_ll(30); + uint32 n3 = alloc_node_ll(40); + ll_set_next(n0, n1); + ll_set_next(n1, n2); + ll_set_next(n2, n3); + + assert_eq("ll_len", ll_length(n0), 4); + assert_eq("ll_sum", ll_sum(n0), 100); + assert_eq("ll_head", ll_val(n0), 10); + assert_eq("ll_tail", ll_val(n3), 40); + assert_eq("ll_next0", ll_next(n0), n1); + assert_eq("ll_next3", ll_next(n3), 0xFFFFFFFF); +} + +// ============================================================ +// 24. BIT-MANIPULATION ALGORITHMS +// ============================================================ +uint32 popcount(uint32 n) { + uint32 c = 0; + while (n != 0) { c += n & 1; n >>= 1; } + return c; +} + +uint32 reverse_bits(uint32 n) { + uint32 r = 0; + uint32 i; + for (i = 0; i < 32; i++) { + r = (r << 1) | (n & 1); + n >>= 1; + } + return r; +} + +uint32 is_power_of_two(uint32 n) { + if (n == 0) return 0; + return (n & (n - 1)) == 0 ? 1 : 0; +} + +void test_bit_ops() { + assert_eq("popcount_0", popcount(0), 0); + assert_eq("popcount_1", popcount(1), 1); + assert_eq("popcount_ff", popcount(0xFF), 8); + assert_eq("popcount_dead", popcount(0xDEADBEEF), 24); + + assert_eq("revbits_0", reverse_bits(0), 0); + assert_eq("revbits_1", reverse_bits(0x80000000), 1); + assert_eq("revbits_2", reverse_bits(1), 0x80000000); + + assert_eq("pow2_0", is_power_of_two(0), 0); + assert_eq("pow2_1", is_power_of_two(1), 1); + assert_eq("pow2_2", is_power_of_two(2), 1); + assert_eq("pow2_3", is_power_of_two(3), 0); + assert_eq("pow2_64", is_power_of_two(64), 1); + assert_eq("pow2_96", is_power_of_two(96), 0); +} + +// ============================================================ +// 25. MULTI-LEVEL POINTER INDIRECTION + ARITHMETIC +// ============================================================ +void test_multilevel_ptr() { + uint32 vals[4] = {11, 22, 33, 44}; + uint32* p0 = &vals[0]; + uint32* p1 = &vals[1]; + uint32* p2 = &vals[2]; + uint32* p3 = &vals[3]; + + uint32* ptable[4]; + ptable[0] = p0; + ptable[1] = p1; + ptable[2] = p2; + ptable[3] = p3; + + uint32** pp = ptable; + + assert_eq("ml_0", **pp, 11); + assert_eq("ml_1", **(pp+1), 22); + assert_eq("ml_2", **(pp+2), 33); + assert_eq("ml_3", **(pp+3), 44); + + // Write through double indirection + **(pp + 2) = 330; + assert_eq("ml_write", vals[2], 330); + + // Pointer arithmetic on pp itself + pp += 2; + assert_eq("ml_pp_inc", **pp, 330); + + uint32** base = ptable; + assert_eq("ml_pp_diff", pp - base, 2); +} + +// ============================================================ +// 26. MATRIX (2-D array accessed via pointer arithmetic) +// ============================================================ +// 3×3 matrix stored row-major in a flat array. +uint32 mat_get(uint32* m, uint32 row, uint32 col, uint32 cols) { + return *(m + row * cols + col); +} +void mat_set(uint32* m, uint32 row, uint32 col, uint32 cols, uint32 val) { + *(m + row * cols + col) = val; +} + +void test_matrix() { + uint32 mat[9] = {1,2,3, 4,5,6, 7,8,9}; + + assert_eq("mat_0_0", mat_get(mat,0,0,3), 1); + assert_eq("mat_1_1", mat_get(mat,1,1,3), 5); + assert_eq("mat_2_2", mat_get(mat,2,2,3), 9); + assert_eq("mat_0_2", mat_get(mat,0,2,3), 3); + assert_eq("mat_2_0", mat_get(mat,2,0,3), 7); + + mat_set(mat, 1, 1, 3, 99); + assert_eq("mat_write", mat[4], 99); + + // Trace (diagonal sum) + uint32 trace = 0; + uint32 i; + for (i = 0; i < 3; i++) trace += mat_get(mat, i, i, 3); + assert_eq("mat_trace", trace, 1 + 99 + 9); +} + +// ============================================================ +// 27. FUNCTION POINTERS (called through pointer) +// ============================================================ +uint32 double_it(uint32 x) { return x * 2; } +uint32 triple_it(uint32 x) { return x * 3; } +uint32 negate_it(uint32 x) { return 0 - x; } + +uint32 apply(uint32* fn, uint32 x) { + // fn is stored as a uint32 (address), cast and call via pointer + return (*((uint32*)fn))(x); // call through pointer +} + +void test_function_pointers() { + // Direct call through a variable holding an address + uint32* fp; + fp = (uint32*)double_it; + assert_eq("fnptr_double", double_it(7), 14); + + fp = (uint32*)triple_it; + assert_eq("fnptr_triple", triple_it(5), 15); + + // Array of function pointers + uint32* fns[3]; + fns[0] = (uint32*)double_it; + fns[1] = (uint32*)triple_it; + fns[2] = (uint32*)negate_it; + + assert_eq("fnptr_arr_0", double_it(4), 8); + assert_eq("fnptr_arr_1", triple_it(4), 12); + assert_eq("fnptr_arr_2", negate_it(4) + 8, 4); // 0-4 = 0xFFFFFFFC, +8 wraps +} + +// ============================================================ +// 28. EDGE CASES — zero, max values, chained operations +// ============================================================ +void test_edge_cases() { + // uint32 max + uint32 mx = 0xFFFFFFFF; + assert_eq("max_val", mx, 4294967295); + assert_eq("max_plus1", mx + 1, 0); // overflow wraps + assert_eq("max_and", mx & 0, 0); + assert_eq("max_or", 0 | mx, mx); + assert_eq("zero_div_rem",10 % 1, 0); + assert_eq("shift_32m1", 1 << 31, 0x80000000); + assert_eq("chain_cmp", 1 < 2 && 3 > 2 && 4 == 4, 1); + + // Comma-style multi-assign via sequenced assignments + uint32 a; uint32 b; uint32 c; + a = b = c = 7; + assert_eq("chain_assign_a", a, 7); + assert_eq("chain_assign_b", b, 7); + assert_eq("chain_assign_c", c, 7); +} + +// ============================================================ +// MAIN — run all test groups +// ============================================================ +// ============================================================ +// 29. SIGNED TYPES — int8, int16, int32 +// ============================================================ +void test_signed() { + // --- int32 arithmetic --- + int32 a = -5; + int32 b = 3; + assert_eq("s32_add", a + b, 4294967294); // -2 as uint32 + assert_eq("s32_sub", a - b, 4294967288); // -8 as uint32 + assert_eq("s32_mul", a * b, 4294967281); // -15 as uint32 + assert_eq("s32_div", a / b, 4294967295); // -1 as uint32 + assert_eq("s32_mod", a % b, 4294967294); // -2 as uint32 + + // --- int32 comparisons (signed) --- + assert_eq("s32_lt", a < b, 1); // -5 < 3 + assert_eq("s32_gt", a > b, 0); + assert_eq("s32_lt_neg", -10 < -1, 1); // both negative literals + assert_eq("s32_gt_neg", -1 > -10, 1); + assert_eq("s32_lteq", a <= -5, 1); + assert_eq("s32_gteq", b >= 3, 1); + + // --- int32 signed right shift (arithmetic) --- + int32 neg = -8; + assert_eq("s32_sar1", neg >> 1, 4294967292); // -4 as uint32 + assert_eq("s32_sar2", neg >> 2, 4294967294); // -2 as uint32 + + // --- int8 scalar and array --- + int8 x = -1; + int8 y = 100; + assert_eq("s8_neg", x, 4294967295); // -1 as uint32 + assert_eq("s8_pos", y, 100); + assert_eq("s8_lt", x < y, 1); // -1 < 100 signed + + int8 arr[4] = {-1, -2, 127, -128}; + assert_eq("s8arr_0", arr[0], 4294967295); // -1 + assert_eq("s8arr_1", arr[1], 4294967294); // -2 + assert_eq("s8arr_2", arr[2], 127); + assert_eq("s8arr_3", arr[3], 4294967168); // -128 + + // --- int16 --- + int16 p = -1000; + int16 q = 500; + assert_eq("s16_lt", p < q, 1); + assert_eq("s16_gt", q > p, 1); + + // --- cast sign extension --- + int32 big = 200; + int8 small = (int8)big; // 200 - 256 = -56 + int32 back = (int32)small; + assert_eq("s_cast_i8", small, 4294967240); // -56 as uint32 + assert_eq("s_cast_i32", back, 4294967240); // -56 + + // --- int8 pointer dereference --- + int8 bval = -42; + int8* bp = &bval; + assert_eq("s8_deref", *bp, 4294967254); // -42 as uint32 +} + +void main() { + test_arithmetic(); + test_bitwise(); + test_comparisons(); + test_compound_assign(); + test_incdec(); + test_ternary(); + test_if_else(); + test_while(); + test_for(); + test_switch(); + test_functions(); + test_local_arrays(); + test_pointers_basic(); + test_ptr_arith_u32(); + test_ptr_arith_u8(); + test_ptr_arith_u16(); + test_ptr_to_ptr(); + test_cast(); + test_globals(); + test_strings(); + test_sort(); + test_mem_ops(); + test_linked_list(); + test_bit_ops(); + test_multilevel_ptr(); + test_matrix(); + test_function_pointers(); + test_edge_cases(); + test_signed(); + + if (g_tests_failed == 0) { + printf("ALL TESTS PASSED ("); + print_int(g_tests_run); + printf(" tests)\n"); + } else { + print_int(g_tests_failed); + printf(" TESTS FAILED out of "); + print_int(g_tests_run); + printf("\n"); + exit(1); + } +}