Skip to content

Commit

Permalink
Emscripten RTS (#67)
Browse files Browse the repository at this point in the history
* Begin working on a minimal Emscripten RTS

Immediate goal with this is to get PolyBenchC compiling, so we can get some numbers.

* Flesh out remaining symbols needed for atax

(untested)

* Fix mistake with compiling globals

The whole point of global initialisers is that so we can mutate the global in the *importing* module -- we were mutating the original global!

* Ensure init() method is exported in header

* A lot of work on the Emscripten RTS.

It's getting there, it really is. But it's still trapping on the 5th call to malloc in the atax benchmark -- and I'm not sure why. Same behaviour with dlmalloc and emmalloc.

More debugging to do, but I've been in this office for 9hours straight now on a Saturday, and I really need to eat...

* Fix memory corruption in Emscripten RTS

PolyBenchC's 2mm benchmark runs now! And I'm hoping most of the others do, too. Hurrah!
  • Loading branch information
SimonJF authored Jul 24, 2018
1 parent e7dc81f commit 6359e6c
Show file tree
Hide file tree
Showing 4 changed files with 343 additions and 4 deletions.
3 changes: 3 additions & 0 deletions src/bin/c_stubs.ml
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,9 @@ let header ~prefix ~exports =
Printf.sprintf "%s* %s;" (string_of_ctype type_) name
) exports
|> String.concat "\n" in
let init_stub_header = Printf.sprintf "void %s_init();\n" prefix in
let header_exports = init_stub_header ^ header_exports in


let header_name =
Printf.sprintf "__CMMOFWASM_%s_H" (String.uppercase_ascii prefix) in
Expand Down
7 changes: 3 additions & 4 deletions src/lib/cmmcompile/gencmm.ml
Original file line number Diff line number Diff line change
Expand Up @@ -761,10 +761,9 @@ let compile_expression env =
(* If it's immutable, we may simply return its (compiled) initial
* value. *)
compile_value v
| DefinedGlobal { initial_value = AnotherGlobal g ; _ } ->
(* If we're referencing another global, then we know by
* the specification that the other global *must* be an
* imported global. *)
| DefinedGlobal _ ->
(* If it's been initialised from another global, we need
* to do a load. *)
load_global g
| ImportedGlobal _ ->
(* If we're referencing an imported global, we load that. *)
Expand Down
269 changes: 269 additions & 0 deletions src/rts/emscripten_rts.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
#include "emscripten_rts.h"
// Syscall stuff

// Variadic arguments scaffolding
uint64_t varargs_position = 0;

uint32_t heap_read_int32(uint32_t ptr) {
// NOTE: I'm not entirely sure what the >>2 in the
// generated JS is all about. My guess is that it's an
// artefact of the array encoding, so I'm ignoring it for
// now.
uint8_t* heap_base = env_memory_memory.data;
uint32_t ret = *((uint32_t*)(heap_base + ptr));
return ret;
}

uint64_t heap_read_int64(uint32_t ptr) {
uint8_t* heap_base = env_memory_memory.data;
uint64_t ret = *((uint64_t*)(heap_base + ptr));
return ret;
}

uint32_t varargs_get() {
uint32_t ret = heap_read_int32(varargs_position);
varargs_position += 4;
return ret;
}

char* varargs_get_str() {
uint32_t ptr = heap_read_int32(varargs_position);
uint8_t* heap_base = env_memory_memory.data;
// TODO: This doesn't properly handle UTF8.
char* ret = (char*) (heap_base + ptr);
return ret;
}

uint64_t varargs_get64() {
uint64_t ret = heap_read_int64(varargs_position);
varargs_position += 8;
return ret;
}

uint32_t varargs_get_zero() {
uint32_t ret = varargs_get();
assert(ret == 0);
return ret;
}

// Syscall implementations
uint32_t env_cfunc____syscall54(uint32_t which, uint32_t varargs) {
varargs_position = varargs;
// ioctl
return 0;
}

uint32_t env_cfunc____syscall6(uint32_t which, uint32_t varargs) {
varargs_position = varargs;
varargs_get(); // close
// close
// Since we're going to be working on either stdout, stderr, or stdin,
// it doesn't make a whole lot of sense to close them...
return 0;
}

uint32_t env_cfunc____syscall140(uint32_t which, uint32_t varargs) {
varargs_position = varargs;
// llseek
// I can't see how the JS version of this even works -- SYSCALLS.getStreamFromFD()
// and FS.llseek are *both* undefined.
// So anyway, I'm gonna see what happens if I no-op this as well. Of course,
// we need to perform the necessary varargs side-effects.
varargs_get(); // stream
varargs_get(); // offset_high
varargs_get(); // offset_low
varargs_get(); // result
varargs_get(); // whence
return 0;
}

uint32_t env_cfunc____syscall146(uint32_t which, uint32_t varargs) {
varargs_position = varargs;
// writev
uint32_t stream = varargs_get();
uint32_t iov = varargs_get(); // Base address of iovec array
uint32_t iovcnt = varargs_get();
uint32_t ret = 0;

// Again, not doing UTF8 properly here.
for (int i = 0; i < iovcnt; i++) {
// Starting address
// Base pointer. Index i is buffer number, each struct is 8 bytes.
uint32_t ptr = heap_read_int32(iov + (i * 8));
// Length: second int in the struct
uint32_t len = heap_read_int32(iov + (i * 8) + 4);
write(stream, (void*) (env_memory_memory.data + ptr), len);
ret = ret + len;
}
return ret;
}

uint32_t env_cfunc__emscripten_memcpy_big(uint32_t dest,
uint32_t src, uint32_t len) {
uint8_t* heap_base = env_memory_memory.data;
memcpy(heap_base + dest, heap_base + src, len);
return dest;
}

// Aborting and errors
void err(char* str) {
puts(str);
}

void rts_abort(char* str) {
env_global_ABORT = 1;
env_global_EXITSTATUS = 1;
printf("abort: %s\n", str);
exit(-1);
}

void env_cfunc_abort(char* str) {
printf("abort\n");
exit(-1);
}

void env_cfunc_nullFunc_ii(char* x) {
err("Null pointer exception (ii)\n");
rts_abort(x);
}

void env_cfunc_nullFunc_iiii(char* x) {
err("Null pointer exception (iiii)\n");
rts_abort(x);
}

void env_cfunc____assert_fail(char* condition, char* filename,
int line, void* func) {
char buf[100];
sprintf(buf, "Assertion failed: %s, at %s:%d (%p)\n", condition,
filename, line, func);
rts_abort(buf);
}

void env_cfunc___exit(uint32_t status) {
exit(status);
}

void env_cfunc__exit(uint32_t status) {
exit(status);
}

uint32_t env_cfunc____setErrNo(uint32_t value) {
// FIXME: This should call XXX_cfunc____errno_location() to
// ascertain the error number location, and then write the
// error number to the result >>2 in the heap.
return value;
}


// Wrappers
uint32_t env_cfunc__gettimeofday(uint32_t ptr) {
uint8_t* heap_base = env_memory_memory.data;
struct timeval time_struct;
gettimeofday(&time_struct, NULL);
*((uint32_t*) (heap_base + ptr)) = (uint32_t) time_struct.tv_sec;
*((uint32_t*) (heap_base + ptr + 4)) = (uint32_t) time_struct.tv_usec;
return 0;
}

// I guess these were needed for pthreads support?
// Nothing in our output anyway.
void env_cfunc____lock() {
}

void env_cfunc____unlock() {
}

void env_cfunc_abortOnCannotGrowMemory() {
rts_abort("Memory is static for now.");
}

void env_cfunc_enlargeMemory() {
env_cfunc_abortOnCannotGrowMemory();
}

uint32_t env_cfunc_getTotalMemory() {
uint32_t ret = (uint32_t) (env_global_TOTAL_MEMORY);
fprintf(stderr, "total memory: %d\n", ret);
return ret;
}

void env_cfunc_abortStackOverflow(uint32_t alloc_size) {
char buffer[100];
// TODO: It would be nice to print out the amount the stack overflowed
// by, but for this, we need "stackSave" which we would have to link...
sprintf(buffer,
"Stack overflow! Attempted to allocate %d bytes on the stack.",
alloc_size);
rts_abort(buffer);
}

int static_alloc(int size) {
int ret = env_global_STATICTOP;
// Implementation taken from Emscripten JS RTS.
// Honestly, I haven't got a clue.
// & -16 means clearing the lowest 4 bits...
env_global_STATICTOP =
(env_global_STATICTOP + size + 15) & -16;
assert(env_global_STATICTOP < env_global_TOTAL_MEMORY);
return ret;
}

int align_memory(int size) {
int factor = 16; // 16-bit alignment by default
return ((int) (ceil(((double) size) / ((double) factor)))) * factor;
}


void env_init() {
// FIXME: HACK -- this is taken from the PolyBenchC output code at the
// moment.
env_global_TOTAL_STACK = 5242880;
env_global_TOTAL_MEMORY = 134217728;


// Allocate table and memory
wasm_rt_allocate_memory(&env_memory_memory,
env_global_TOTAL_MEMORY / WASM_PAGE_SIZE,
env_global_TOTAL_MEMORY / WASM_PAGE_SIZE);
wasm_rt_allocate_table(&env_table_table, 1024, -1);


// Global initialisation
env_global_ABORT = 0;
env_global_EXITSTATUS = 0;
env_global_GLOBAL_BASE = 1024;
env_global_STATIC_BUMP = 5840;
env_global_STATIC_BASE = 0;
env_global_STACK_BASE = 0;
env_global_DYNAMIC_BASE = 0;
env_global_DYNAMICTOP_PTR = 0;
env_global_tableBase = 0;
global_global_NaN = NAN;
global_global_Infinity = INFINITY;


env_global_STATIC_BASE = env_global_GLOBAL_BASE;
env_global_memoryBase = env_global_STATIC_BASE;

env_global_STATICTOP = env_global_STATIC_BASE + env_global_STATIC_BUMP;

// Initialise tempDoublePtr
env_global_tempDoublePtr = env_global_STATICTOP;
env_global_STATICTOP += 16;


// Initialise stack base / top / max
env_global_DYNAMICTOP_PTR = static_alloc(4);
uint64_t base_ptr = align_memory(env_global_STATICTOP);
env_global_STACK_BASE = base_ptr;
env_global_STACKTOP = base_ptr;
env_global_STACK_MAX = env_global_STACK_BASE + env_global_TOTAL_STACK;

// Initialise dynamic base, and save to memory
env_global_DYNAMIC_BASE = align_memory(env_global_STACK_MAX);
uint8_t* heap_base = env_memory_memory.data;
*((uint32_t*) (heap_base + env_global_DYNAMICTOP_PTR)) =
(uint32_t) env_global_DYNAMIC_BASE;
assert(env_global_DYNAMIC_BASE < env_global_TOTAL_MEMORY);
}
68 changes: 68 additions & 0 deletions src/rts/emscripten_rts.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#ifndef CMM_OF_WASM_EMSCRIPTEN_RTS
#define CMM_OF_WASM_EMSCRIPTEN_RTS

#include <sys/time.h>
#include <sys/uio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <math.h>
#include <assert.h>
#include "wasm-rt.h"

#define WASM_PAGE_SIZE 65536

uint64_t env_global_ABORT;
uint64_t env_global_EXITSTATUS;
double global_global_NaN;
double global_global_Infinity;

// Hack: just taking this from PolyBenchC output code at the moment
uint64_t env_global_TOTAL_STACK;
uint64_t env_global_TOTAL_MEMORY;

uint64_t env_global_GLOBAL_BASE;

uint64_t env_global_STATIC_BASE;
uint64_t env_global_STATIC_BUMP;
uint64_t env_global_STATICTOP;
uint64_t env_global_STACK_BASE;
uint64_t env_global_STACKTOP;
uint64_t env_global_STACK_MAX;
uint64_t env_global_DYNAMIC_BASE;
uint64_t env_global_DYNAMICTOP_PTR;
uint64_t env_global_tempDoublePtr;

uint64_t env_global_tableBase;
uint64_t env_global_memoryBase;

wasm_rt_table_t env_table_table;
wasm_rt_memory_t env_memory_memory;

void env_cfunc_abort(char* str);
uint32_t env_cfunc____syscall54(uint32_t which, uint32_t varargs);
uint32_t env_cfunc____syscall6(uint32_t which, uint32_t varargs);
uint32_t env_cfunc____syscall140(uint32_t which, uint32_t varargs);
uint32_t env_cfunc____syscall146(uint32_t which, uint32_t varargs);
uint32_t env_cfunc__emscripten_memcpy_big(uint32_t dest,
uint32_t src, uint32_t num);

void env_cfunc_nullFunc_ii(char* x);
uint32_t env_cfunc____setErrNo(uint32_t value);
void env_cfunc____assert_fail(char* condition, char* filename,
int line, void* func);
uint32_t env_cfunc__gettimeofday(uint32_t ptr);
void env_cfunc____lock();
void env_cfunc____unlock();
void env_cfunc_abortOnCannotGrowMemory();
void env_cfunc_enlargeMemory();
uint32_t env_cfunc_getTotalMemory();
void env_cfunc_abortStackOverflow(uint32_t alloc_size);
void env_cfunc___exit(uint32_t status);
void env_cfunc__exit(uint32_t status);

void env_init();

#endif

0 comments on commit 6359e6c

Please sign in to comment.