Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 21a15a24 authored by Jack Palevich's avatar Jack Palevich
Browse files

Various C++ improvements

+ Changed indentation, causes the stats to say there's a lot of lines changed.
+ Should be able to compile multiple times with the same compiler object.
+ Create a CodeBuffer class to hold the code.
+ Create a CodeGenerator class to encapsulate knowlege of the CPU instruction set.
+ Started filling in the code generator.
parent a9693057
Loading
Loading
Loading
Loading
+794 −542
Original line number Diff line number Diff line
@@ -30,6 +30,240 @@
namespace acc {

class compiler {

    class CodeBuf {
        char* ind;
        char* pProgramBase;

        void release() {
            if (pProgramBase != 0) {
                free(pProgramBase);
                pProgramBase = 0;
            }
        }

    public:
        CodeBuf() {
            pProgramBase = 0;
            ind = 0;
        }

        ~CodeBuf() {
            release();
        }

        void init(int size) {
            release();
            pProgramBase = (char*) calloc(1, size);
            ind = pProgramBase;
        }

        void o(int n) {
            /* cannot use unsigned, so we must do a hack */
            while (n && n != -1) {
                *ind++ = n;
                n = n >> 8;
            }
        }

        /*
         * Output a byte. Handles all values, 0..ff.
         */
        void ob(int n) {
            *ind++ = n;
        }

        /* output a symbol and patch all calls to it */
        void gsym(int t) {
            int n;
            while (t) {
                n = *(int *) t; /* next value */
                *(int *) t = ((int) ind) - t - 4;
                t = n;
            }
        }

        /* psym is used to put an instruction with a data field which is a
         reference to a symbol. It is in fact the same as oad ! */
        int psym(int n, int t) {
            return oad(n, t);
        }

        /* instruction + address */
        int oad(int n, int t) {
            o(n);
            *(int *) ind = t;
            t = (int) ind;
            ind = ind + 4;
            return t;
        }

        inline void* getBase() {
            return (void*) pProgramBase;
        }

        int getSize() {
            return ind - pProgramBase;
        }

        int getPC() {
            return (int) ind;
        }
    };

    class CodeGenerator {
    public:
        CodeGenerator() {}
        virtual ~CodeGenerator() {}

        void init(CodeBuf* pCodeBuf) {
            this->pCodeBuf = pCodeBuf;
        }

        /* output a symbol and patch all calls to it */
        void gsym(int t) {
            pCodeBuf->gsym(t);
        }

    protected:
        void o(int n) {
            pCodeBuf->o(n);
        }

        /*
         * Output a byte. Handles all values, 0..ff.
         */
        void ob(int n) {
            pCodeBuf->ob(n);
        }

        /* psym is used to put an instruction with a data field which is a
         reference to a symbol. It is in fact the same as oad ! */
        int psym(int n, int t) {
            return oad(n, t);
        }

        /* instruction + address */
        int oad(int n, int t) {
            return pCodeBuf->oad(n,t);
        }

        int getPC() {
            return pCodeBuf->getPC();
        }

    private:
        CodeBuf* pCodeBuf;
    };

    class X86CodeGenerator : public CodeGenerator {
    public:
        X86CodeGenerator() {}
        virtual ~X86CodeGenerator() {}

        /* load immediate value */
        int li(int t) {
            oad(0xb8, t); /* mov $xx, %eax */
        }

        int gjmp(int t) {
            return psym(0xe9, t);
        }

        /* l = 0: je, l == 1: jne */
        int gtst(int l, int t) {
            o(0x0fc085); /* test %eax, %eax, je/jne xxx */
            return psym(0x84 + l, t);
        }

        int gcmp(int t) {
            o(0xc139); /* cmp %eax,%ecx */
            li(0);
            o(0x0f); /* setxx %al */
            o(t + 0x90);
            o(0xc0);
        }

        void clearECX() {
            oad(0xb9, 0); /* movl $0, %ecx */
        }

        void pushEAX() {
            o(0x50); /* push %eax */
        }

        void storeEAXIntoPoppedLVal(bool isInt) {
            o(0x59); /* pop %ecx */
            o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
        }

        void loadEAXIndirect(bool isInt) {
            if (isInt)
                o(0x8b); /* mov (%eax), %eax */
            else
                o(0xbe0f); /* movsbl (%eax), %eax */
            ob(0); /* add zero in code */
        }

        void leaEAX(int ea) {
            gmov(10, ea); /* leal EA, %eax */
        }

        void storeEAX(int ea) {
            gmov(6, ea); /* mov %eax, EA */
        }

        void loadEAX(int ea) {
            gmov(8, ea); /* mov EA, %eax */
        }

        void puzzleAdd(int n, int tokc) {
            /* Not sure what this does, related to variable loading with an
             * operator at level 11.
             */
            gmov(0, n); /* 83 ADD */
            o(tokc);
        }

        int allocStackSpaceForArgs() {
            return oad(0xec81, 0); /* sub $xxx, %esp */
        }

        void storeEAToArg(int l) {
            oad(0x248489, l); /* movl %eax, xxx(%esp) */
        }

        int callForward(int symbol) {
            return psym(0xe8, symbol); /* call xxx */
        }

        void callRelative(int t) {
            psym(0xe8, t); /* call xxx */
        }

        void callIndirect(int l) {
            oad(0x2494ff, l); /* call *xxx(%esp) */
        }

        void adjustStackAfterCall(int l) {
            oad(0xc481, l); /* add $xxx, %esp */
        }

        void oHack(int n) {
            o(n);
        }

        void oadHack(int n, int t) {
            oad(n, t);
        }
    private:

        int gmov(int l, int t) {
            o(l + 0x83);
            oad((t < LOCAL) << 7 | 5, t);
        }
    };

    /* vars: value of variables
     loc : local variable index
     glo : global variable index
@@ -39,43 +273,48 @@ class compiler {
     dstk: define stack
     dptr, dch: macro state
     */
int tok, tokc, tokl, ch, vars, rsym, prog, ind, loc, glo, sym_stk, dstk, dptr, dch, last_id;
    int tok, tokc, tokl, ch, vars, rsym, loc, glo, sym_stk, dstk,
            dptr, dch, last_id;
    void* pSymbolBase;
    void* pGlobalBase;
    void* pVarsBase;
    FILE* file;

#define ALLOC_SIZE 99999
    CodeBuf codeBuf;
    X86CodeGenerator* pGen;

    static const int ALLOC_SIZE = 99999;

    /* depends on the init string */
#define TOK_STR_SIZE 48
#define TOK_IDENT    0x100
#define TOK_INT      0x100
#define TOK_IF       0x120
#define TOK_ELSE     0x138
#define TOK_WHILE    0x160
#define TOK_BREAK    0x190
#define TOK_RETURN   0x1c0
#define TOK_FOR      0x1f8
#define TOK_DEFINE   0x218
#define TOK_MAIN     0x250

#define TOK_DUMMY   1
#define TOK_NUM     2

#define LOCAL   0x200

#define SYM_FORWARD 0
#define SYM_DEFINE  1
    static const int TOK_STR_SIZE = 48;
    static const int TOK_IDENT = 0x100;
    static const int TOK_INT = 0x100;
    static const int TOK_IF = 0x120;
    static const int TOK_ELSE = 0x138;
    static const int TOK_WHILE = 0x160;
    static const int TOK_BREAK = 0x190;
    static const int TOK_RETURN = 0x1c0;
    static const int TOK_FOR = 0x1f8;
    static const int TOK_DEFINE = 0x218;
    static const int TOK_MAIN = 0x250;

    static const int TOK_DUMMY = 1;
    static const int TOK_NUM = 2;

    static const int LOCAL = 0x200;

    static const int SYM_FORWARD = 0;
    static const int SYM_DEFINE = 1;

    /* tokens in string heap */
#define TAG_TOK    ' '
#define TAG_MACRO  2
    static const int TAG_TOK = ' ';
    static const int TAG_MACRO = 2;

void pdef(int t)
{
    void pdef(int t) {
        *(char *) dstk++ = t;
    }

void inp()
{
    void inp() {
        if (dptr) {
            ch = *(char *) dptr++;
            if (ch == TAG_MACRO) {
@@ -87,14 +326,12 @@ void inp()
        /*    printf("ch=%c 0x%x\n", ch, ch); */
    }

int isid()
{
    int isid() {
        return isalnum(ch) | ch == '_';
    }

    /* read a character constant */
void getq()
{
    void getq() {
        if (ch == '\\') {
            inp();
            if (ch == 'n')
@@ -102,8 +339,7 @@ void getq()
        }
    }

void next()
{
    void next() {
        int l, a;

        while (isspace(ch) | ch == '#') {
@@ -141,8 +377,9 @@ void next()
                tok = TOK_NUM;
            } else {
                *(char *) dstk = TAG_TOK; /* no need to mark end of string (we
                                        suppose data is initied to zero */
            tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1)) - sym_stk);
                 suppose data is initialized to zero by calloc) */
                tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1))
                        - sym_stk);
                *(char *) dstk = 0; /* mark real end of ident for dlsym() */
                tok = tok * 8 + TOK_IDENT;
                if (tok > TOK_DEFINE) {
@@ -176,9 +413,10 @@ void next()
                }
                inp();
                next();
        } else
        {
            const char* t = "++#m--%am*@R<^1c/@%[_[H3c%@%[_[H3c+@.B#d-@%:_^BKd<<Z/03e>>`/03e<=0f>=/f<@.f>@1f==&g!=\'g&&k||#l&@.BCh^@.BSi|@.B+j~@/%Yd!@&d*@b";
            } else {
                const char
                        * t =
                                "++#m--%am*@R<^1c/@%[_[H3c%@%[_[H3c+@.B#d-@%:_^BKd<<Z/03e>>`/03e<=0f>=/f<@.f>@1f==&g!=\'g&&k||#l&@.BCh^@.BSi|@.B+j~@/%Yd!@&d*@b";
                while (l = *t++) {
                    a = *t++;
                    tokc = 0;
@@ -221,8 +459,7 @@ void next()
#endif
    }

void error(const char *fmt,...)
{
    void error(const char *fmt, ...) {
        va_list ap;

        va_start(ap, fmt);
@@ -233,84 +470,68 @@ void error(const char *fmt,...)
        exit(1);
    }

void skip(int c)
{
    void skip(int c) {
        if (tok != c) {
            error("'%c' expected", c);
        }
        next();
    }

void o(int n)
{
    /* cannot use unsigned, so we must do a hack */
    while (n && n != -1) {
        *(char *)ind++ = n;
        n = n >> 8;
    /* load immediate value */
    int li(int t) {
        return pGen->li(t);
    }

    int gjmp(int t) {
        return pGen->gjmp(t);
    }

/* output a symbol and patch all calls to it */
void gsym(int t)
{
    int n;
    while (t) {
        n = *(int *)t; /* next value */
        *(int *)t = ind - t - 4;
        t = n;
    /* l = 0: je, l == 1: jne */
    int gtst(int l, int t) {
        return pGen->gtst(l, t);
    }

    int gcmp(int t) {
        return pGen->gcmp(t);
    }

/* psym is used to put an instruction with a data field which is a
   reference to a symbol. It is in fact the same as oad ! */
#define psym oad
    void clearEXC() {
        pGen->clearECX();
    }

/* instruction + address */
int oad(int n, int t)
{
    o(n);
    *(int *)ind = t;
    t = ind;
    ind = ind + 4;
    return t;
    void storeEAXIntoPoppedLVal(bool isInt) {
        pGen->storeEAXIntoPoppedLVal(isInt);
    }

/* load immediate value */
int li(int t)
{
    oad(0xb8, t); /* mov $xx, %eax */
    void loadEAXIndirect(bool isInt) {
        pGen->loadEAXIndirect(isInt);
    }

int gjmp(int t)
{
    return psym(0xe9, t);
    void leaEAX(int ea) {
        pGen->leaEAX(ea);
    }

/* l = 0: je, l == 1: jne */
int gtst(int l, int t)
{
    o(0x0fc085); /* test %eax, %eax, je/jne xxx */
    return psym(0x84 + l, t);
    /* Temporary hack for emitting x86 code directly. */
    void o(int n) {
        pGen->oHack(n);
    }

int gcmp(int t)
{
    o(0xc139); /* cmp %eax,%ecx */
    li(0);
    o(0x0f); /* setxx %al */
    o(t + 0x90);
    o(0xc0);
    /* instruction + address */
    int oad(int n, int t) {
        pGen->oadHack(n,t);
    }

int gmov(int l, int t)
{
    o(l + 0x83);
    oad((t < LOCAL) << 7 | 5, t);
    /* instruction + address */
    int psym(int n, int t) {
        pGen->oadHack(n,t);
    }

    void gsym(int n) {
        pGen->gsym(n);
    }

    /* l is one if '=' parsing wanted (quick hack) */
void unary(int l)
{
    void unary(int l) {
        int n, t, a, c;

        n = 1; /* type of expression 0 = forward, 1 = value, other =
@@ -336,7 +557,7 @@ void unary(int l)
            } else if (c == 2) {
                /* -, +, !, ~ */
                unary(0);
            oad(0xb9, 0); /* movl $0, %ecx */
                clearEXC();
                if (t == '!')
                    gcmp(a);
                else
@@ -362,19 +583,14 @@ void unary(int l)
                unary(0);
                if (tok == '=') {
                    next();
                o(0x50); /* push %eax */
                    pGen->pushEAX();
                    expr();
                o(0x59); /* pop %ecx */
                o(0x0188 + (t == TOK_INT)); /* movl %eax/%al, (%ecx) */
                    storeEAXIntoPoppedLVal(t == TOK_INT);
                } else if (t) {
                if (t == TOK_INT)
                    o(0x8b); /* mov (%eax), %eax */
                else
                    o(0xbe0f); /* movsbl (%eax), %eax */
                ind++; /* add zero in code */
                    loadEAXIndirect(t == TOK_INT);
                }
            } else if (t == '&') {
            gmov(10, *(int *)tok); /* leal EA, %eax */
                leaEAX(*(int *) tok);
                next();
            } else {
                n = *(int *) t;
@@ -385,13 +601,12 @@ void unary(int l)
                    /* assignment */
                    next();
                    expr();
                gmov(6, n); /* mov %eax, EA */
                    pGen->storeEAX(n);
                } else if (tok != '(') {
                    /* variable */
                gmov(8, n); /* mov EA, %eax */
                    pGen->loadEAX(n);
                    if (tokl == 11) {
                    gmov(0, n);
                    o(tokc);
                        pGen->puzzleAdd(n, tokc);
                        next();
                    }
                }
@@ -401,15 +616,15 @@ void unary(int l)
        /* function call */
        if (tok == '(') {
            if (n == 1)
            o(0x50); /* push %eax */
                pGen->pushEAX();

            /* push args and invert order */
        a = oad(0xec81, 0); /* sub $xxx, %esp */
            a = pGen->allocStackSpaceForArgs();
            next();
            l = 0;
            while (tok != ')') {
                expr();
            oad(0x248489, l); /* movl %eax, xxx(%esp) */
                pGen->storeEAToArg(l);
                if (tok == ',')
                    next();
                l = l + 4;
@@ -419,20 +634,19 @@ void unary(int l)
            if (!n) {
                /* forward reference */
                t = t + 4;
            *(int *)t = psym(0xe8, *(int *)t);
                *(int *) t = pGen->callForward(*(int *) t);
            } else if (n == 1) {
            oad(0x2494ff, l); /* call *xxx(%esp) */
                pGen->callIndirect(l);
                l = l + 4;
            } else {
            oad(0xe8, n - ind - 5); /* call xxx */
                pGen->callRelative(n - codeBuf.getPC() - 5); /* call xxx */
            }
            if (l)
            oad(0xc481, l); /* add $xxx, %esp */
                pGen->adjustStackAfterCall(l);
        }
    }

void sum(int l)
{
    void sum(int l) {
        int t, n, a;

        if (l-- == 1)
@@ -473,21 +687,16 @@ void sum(int l)
        }
    }

void expr()
{
    void expr() {
        sum(11);
    }


int test_expr()
{
    int test_expr() {
        expr();
        return gtst(0, 0);
    }


void block(int l)
{
    void block(int l) {
        int a, n, t;

        if (tok == TOK_IF) {
@@ -510,13 +719,13 @@ void block(int l)
            next();
            skip('(');
            if (t == TOK_WHILE) {
            n = ind;
                n = codeBuf.getPC();
                a = test_expr();
            } else {
                if (tok != ';')
                    expr();
                skip(';');
            n = ind;
                n = codeBuf.getPC();
                a = 0;
                if (tok != ';')
                    a = test_expr();
@@ -524,14 +733,14 @@ void block(int l)
                if (tok != ')') {
                    t = gjmp(0);
                    expr();
                gjmp(n - ind - 5);
                    gjmp(n - codeBuf.getPC() - 5);
                    gsym(t);
                    n = t + 4;
                }
            }
            skip(')');
            block((int) &a);
        gjmp(n - ind - 5); /* jmp */
            gjmp(n - codeBuf.getPC() - 5); /* jmp */
            gsym(a);
        } else if (tok == '{') {
            next();
@@ -556,8 +765,7 @@ void block(int l)
    }

    /* 'l' is true if local declarations */
void decl(int l)
{
    void decl(int l) {
        int a;

        while (tok == TOK_INT | tok != -1 & !l) {
@@ -581,7 +789,7 @@ void decl(int l)
                 pointers) */
                gsym(*(int *) (tok + 4));
                /* put function address */
            *(int *)tok = ind;
                *(int *) tok = codeBuf.getPC();
                next();
                skip('(');
                a = 8;
@@ -605,48 +813,92 @@ void decl(int l)
        }
    }

    void cleanup() {
        if (sym_stk != 0) {
            free((void*) sym_stk);
            sym_stk = 0;
        }
        if (pGlobalBase != 0) {
            free((void*) pGlobalBase);
            pGlobalBase = 0;
        }
        if (pVarsBase != 0) {
            free(pVarsBase);
            pVarsBase = 0;
        }
        if (pGen) {
            delete pGen;
            pGen = 0;
        }
    }

    void clear() {
        tok = 0;
        tokc = 0;
        tokl = 0;
        ch = 0;
        vars = 0;
        rsym = 0;
        loc = 0;
        glo = 0;
        sym_stk = 0;
        dstk = 0;
        dptr = 0;
        dch = 0;
        last_id = 0;
        file = 0;
        pGlobalBase = 0;
        pVarsBase = 0;
        pGen = 0;
    }

public:
compiler() :
    tok(0), tokc(0), tokl(0), ch(0),
    vars(0), rsym(0), prog(0), ind(0), loc(0), glo(0), sym_stk(0),
    dstk(0), dptr(0), dch(0), last_id(0), file(0)
{
    compiler() {
        clear();
    }

int compile(FILE* in) {
    ~compiler() {
        cleanup();
    }

    int compile(FILE* in) {
        cleanup();
        clear();
        codeBuf.init(ALLOC_SIZE);
        pGen = new X86CodeGenerator();
        pGen->init(&codeBuf);
        file = in;
        sym_stk = (int) calloc(1, ALLOC_SIZE);
        dstk = (int) strcpy((char*) sym_stk,
                  " int if else while break return for define main ") + TOK_STR_SIZE;
    glo = (int) calloc(1, ALLOC_SIZE);
    ind = prog = (int) calloc(1, ALLOC_SIZE);
    vars = (int) calloc(1, ALLOC_SIZE);
                " int if else while break return for define main ")
                + TOK_STR_SIZE;
        pGlobalBase = calloc(1, ALLOC_SIZE);
        glo = (int) pGlobalBase;
        pVarsBase = calloc(1, ALLOC_SIZE);
        vars = (int) pVarsBase;
        inp();
        next();
        decl(0);
        return 0;
    }

int run(int argc, char** argv)
{
    int run(int argc, char** argv) {
        typedef int (*mainPtr)(int argc, char** argv);
        mainPtr aMain = (mainPtr) *(int*) (vars + TOK_MAIN);
        if (!aMain) {
        fprintf(stderr, "Could not find main");
            fprintf(stderr, "Could not find function \"main\".\n");
            return -1;
        }
        return aMain(argc, argv);
    }

    int dump(FILE* out) {
    fwrite((void *)prog, 1, ind - prog, out);
        fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out);
        return 0;
    }

};


} // namespace acc

int main(int argc, char** argv) {
+126 −0
Original line number Diff line number Diff line
/* #!/usr/local/bin/otcc */
/*
 * Sample OTCC C example. You can uncomment the first line and install
 * otcc in /usr/local/bin to make otcc scripts !  
 */

/* Any preprocessor directive except #define are ignored. We put this
   include so that a standard C compiler can compile this code too. */
#include <stdio.h>

/* defines are handled, but macro arguments cannot be given. No
   recursive defines are tolerated */
#define DEFAULT_BASE 10

/*
 * Only old style K&R prototypes are parsed. Only int arguments are
 * allowed (implicit types).
 * 
 * By benchmarking the execution time of this function (for example
 * for fib(35)), you'll notice that OTCC is quite fast because it
 * generates native i386 machine code.  
 */
fib(n)
{
    if (n <= 2)
        return 1;
    else
        return fib(n-1) + fib(n-2);
}

/* Identifiers are parsed the same way as C: begins with letter or
   '_', and then letters, '_' or digits */
fact(n)
{
    /* local variables can be declared. Only 'int' type is supported */
    int i, r;
    r = 1;
    /* 'while' and 'for' loops are supported */
    for(i=2;i<=n;i++)
        r = r * i;
    return r;
}

/* Well, we could use printf, but it would be too easy */
print_num(n, b)
{
    int tab, p, c;
    /* Numbers can be entered in decimal, hexadecimal ('0x' prefix) and
       octal ('0' prefix) */
    /* more complex programs use malloc */
    tab = malloc(0x100); 
    p = tab;
    while (1) {
        c = n % b;
        /* Character constants can be used */
        if (c >= 10)
            c = c + 'a' - 10;
        else
            c = c + '0';
        *(char *)p = c;
        p++;
        n = n / b;
        /* 'break' is supported */
        if (n == 0)
            break;
    }
    while (p != tab) {
        p--;
        printf("%c", *(char *)p);
    }
    free(tab);
}

/* 'main' takes standard 'argc' and 'argv' parameters */
main(argc, argv)
{
    /* no local name space is supported, but local variables ARE
       supported. As long as you do not use a globally defined
       variable name as local variable (which is a bad habbit), you
       won't have any problem */
    int s, n, f, base;
    
    /* && and || operator have the same semantics as C (left to right
       evaluation and early exit) */
    if (argc != 2 && argc != 3) {
        /* '*' operator is supported with explicit casting to 'int *',
           'char *' or 'int (*)()' (function pointer). Of course, 'int'
           are supposed to be used as pointers too. */
        s = *(int *)argv;
        help(s);
        return 1;
    }
    /* Any libc function can be used because OTCC uses dynamic linking */
    n = atoi(*(int *)(argv + 4));
    base = DEFAULT_BASE;
    if (argc >= 3) {
        base = atoi(*(int *)(argv + 8));
        if (base < 2 || base > 36) {
            /* external variables can be used too (here: 'stderr') */
            fprintf(stderr, "Invalid base\n");
            return 1;
        }
    }
    printf("fib(%d) = ", n);
    print_num(fib(n), base);
    printf("\n");

    printf("fact(%d) = ", n);
    if (n > 12) {
        printf("Overflow");
    } else {
        /* why not using a function pointer ? */
        f = &fact;
        print_num((*(int (*)())f)(n), base);
    }
    printf("\n");
    return 0;
}

/* functions can be used before being defined */
help(name)
{
    printf("usage: %s n [base]\n", name);
    printf("Compute fib(n) and fact(n) and output the result in base 'base'\n");
}
+4 −0
Original line number Diff line number Diff line
/* No main. */

a() {
}
 No newline at end of file
(8.39 KiB)

File changed.

No diff preview for this file type.