From 1dab8ba334a1b53f3c8780bc8a4f8045a6ea0375 Mon Sep 17 00:00:00 2001 From: ringabout <43030857+ringabout@users.noreply.github.com> Date: Sun, 30 Oct 2022 00:11:40 +0800 Subject: [PATCH] move threads out of system (#20674) * move syslocks first * progress * clean up * go on * clean up * clean up * add imports syslocks * remove documentation * public deallocOsPages * fixes genode * fixes more * fixes boehmGC * cover more cases * fixes cyclic deps * fixes genode * cleanup * unpublic fields * cleanup * clean up --- lib/core/locks.nim | 3 +- lib/core/rlocks.nim | 3 +- lib/genode/alloc.nim | 2 +- lib/genode/env.nim | 8 +- lib/pure/concurrency/cpuinfo.nim | 2 +- lib/{system => std/private}/syslocks.nim | 170 +++++++++++----------- lib/std/private/threadtypes.nim | 176 +++++++++++++++++++++++ lib/{system => std}/threads.nim | 154 +++----------------- lib/system.nim | 12 +- lib/system/alloc.nim | 1 + lib/system/channels_builtin.nim | 2 + lib/system/gc_regions.nim | 1 + lib/system/threadimpl.nim | 107 ++++++++++++++ lib/system/threadlocalstorage.nim | 174 +++------------------- tools/kochdocs.nim | 1 - 15 files changed, 425 insertions(+), 391 deletions(-) rename lib/{system => std/private}/syslocks.nim (51%) create mode 100644 lib/std/private/threadtypes.nim rename lib/{system => std}/threads.nim (66%) create mode 100644 lib/system/threadimpl.nim diff --git a/lib/core/locks.nim b/lib/core/locks.nim index 92967b9db6580..ad0bff44d093d 100644 --- a/lib/core/locks.nim +++ b/lib/core/locks.nim @@ -18,8 +18,7 @@ when not compileOption("threads") and not defined(nimdoc): when false: # fix #12330 {.error: "Locks requires --threads:on option.".} -const insideRLocksModule = false -include "system/syslocks" +import std/private/syslocks type Lock* = SysLock ## Nim lock; whether this is re-entrant diff --git a/lib/core/rlocks.nim b/lib/core/rlocks.nim index 0444b9a832931..bee5c16556301 100644 --- a/lib/core/rlocks.nim +++ b/lib/core/rlocks.nim @@ -16,8 +16,7 @@ when not compileOption("threads") and not defined(nimdoc): # so they can replace each other seamlessly. {.error: "Rlocks requires --threads:on option.".} -const insideRLocksModule = true -include "system/syslocks" +import std/private/syslocks type RLock* = SysLock ## Nim lock, re-entrant diff --git a/lib/genode/alloc.nim b/lib/genode/alloc.nim index 3ddd3074be8e5..efc25ac57da4b 100644 --- a/lib/genode/alloc.nim +++ b/lib/genode/alloc.nim @@ -15,7 +15,7 @@ when not defined(genode): {.error: "Genode only module".} when not declared(GenodeEnv): - include genode/env + import genode/env type RamDataspaceCapability {. importcpp: "Genode::Ram_dataspace_capability", pure.} = object diff --git a/lib/genode/env.nim b/lib/genode/env.nim index ef4a25883045f..34abf576fc3c8 100644 --- a/lib/genode/env.nim +++ b/lib/genode/env.nim @@ -20,10 +20,10 @@ when not defined(genode): {.error: "Genode only include".} type - GenodeEnvObj {.importcpp: "Genode::Env", header: "", pure.} = object - GenodeEnvPtr = ptr GenodeEnvObj + GenodeEnvObj* {.importcpp: "Genode::Env", header: "", pure.} = object + GenodeEnvPtr* = ptr GenodeEnvObj -const runtimeEnvSym = "nim_runtime_env" +const runtimeEnvSym* = "nim_runtime_env" when not defined(nimscript): - var runtimeEnv {.importcpp: runtimeEnvSym.}: GenodeEnvPtr + var runtimeEnv* {.importcpp: runtimeEnvSym.}: GenodeEnvPtr diff --git a/lib/pure/concurrency/cpuinfo.nim b/lib/pure/concurrency/cpuinfo.nim index 16d32002da1bd..1d2ff63e16df9 100644 --- a/lib/pure/concurrency/cpuinfo.nim +++ b/lib/pure/concurrency/cpuinfo.nim @@ -40,7 +40,7 @@ when defined(macosx) or defined(bsd): importc: "sysctl", nodecl.} when defined(genode): - include genode/env + import genode/env proc affinitySpaceTotal(env: GenodeEnvPtr): cuint {. importcpp: "@->cpu().affinity_space().total()".} diff --git a/lib/system/syslocks.nim b/lib/std/private/syslocks.nim similarity index 51% rename from lib/system/syslocks.nim rename to lib/std/private/syslocks.nim index 2f0c8b0ba0b9d..dff339e9eaf48 100644 --- a/lib/system/syslocks.nim +++ b/lib/std/private/syslocks.nim @@ -15,7 +15,7 @@ when defined(windows): type Handle = int - SysLock {.importc: "CRITICAL_SECTION", + SysLock* {.importc: "CRITICAL_SECTION", header: "", final, pure.} = object # CRITICAL_SECTION in WinApi DebugInfo: pointer LockCount: int32 @@ -24,10 +24,10 @@ when defined(windows): LockSemaphore: int SpinCount: int - SysCond {.importc: "RTL_CONDITION_VARIABLE", header: "".} = object + SysCond* {.importc: "RTL_CONDITION_VARIABLE", header: "".} = object thePtr {.importc: "ptr".} : Handle - proc initSysLock(L: var SysLock) {.importc: "InitializeCriticalSection", + proc initSysLock*(L: var SysLock) {.importc: "InitializeCriticalSection", header: "".} ## Initializes the lock `L`. @@ -35,18 +35,18 @@ when defined(windows): header: "".} ## Tries to acquire the lock `L`. - proc tryAcquireSys(L: var SysLock): bool {.inline.} = + proc tryAcquireSys*(L: var SysLock): bool {.inline.} = result = tryAcquireSysAux(L) != 0'i32 - proc acquireSys(L: var SysLock) {.importc: "EnterCriticalSection", + proc acquireSys*(L: var SysLock) {.importc: "EnterCriticalSection", header: "".} ## Acquires the lock `L`. - proc releaseSys(L: var SysLock) {.importc: "LeaveCriticalSection", + proc releaseSys*(L: var SysLock) {.importc: "LeaveCriticalSection", header: "".} ## Releases the lock `L`. - proc deinitSys(L: var SysLock) {.importc: "DeleteCriticalSection", + proc deinitSys*(L: var SysLock) {.importc: "DeleteCriticalSection", header: "".} proc initializeConditionVariable( @@ -60,41 +60,41 @@ when defined(windows): ): int32 {.stdcall, noSideEffect, dynlib: "kernel32", importc: "SleepConditionVariableCS".} - proc signalSysCond(hEvent: var SysCond) {.stdcall, noSideEffect, + proc signalSysCond*(hEvent: var SysCond) {.stdcall, noSideEffect, dynlib: "kernel32", importc: "WakeConditionVariable".} - proc broadcastSysCond(hEvent: var SysCond) {.stdcall, noSideEffect, + proc broadcastSysCond*(hEvent: var SysCond) {.stdcall, noSideEffect, dynlib: "kernel32", importc: "WakeAllConditionVariable".} - proc initSysCond(cond: var SysCond) {.inline.} = + proc initSysCond*(cond: var SysCond) {.inline.} = initializeConditionVariable(cond) - proc deinitSysCond(cond: var SysCond) {.inline.} = + proc deinitSysCond*(cond: var SysCond) {.inline.} = discard - proc waitSysCond(cond: var SysCond, lock: var SysLock) = + proc waitSysCond*(cond: var SysCond, lock: var SysLock) = discard sleepConditionVariableCS(cond, lock, -1'i32) elif defined(genode): const Header = "genode_cpp/syslocks.h" type - SysLock {.importcpp: "Nim::SysLock", pure, final, + SysLock* {.importcpp: "Nim::SysLock", pure, final, header: Header.} = object - SysCond {.importcpp: "Nim::SysCond", pure, final, + SysCond* {.importcpp: "Nim::SysCond", pure, final, header: Header.} = object - proc initSysLock(L: var SysLock) = discard - proc deinitSys(L: var SysLock) = discard - proc acquireSys(L: var SysLock) {.noSideEffect, importcpp.} - proc tryAcquireSys(L: var SysLock): bool {.noSideEffect, importcpp.} - proc releaseSys(L: var SysLock) {.noSideEffect, importcpp.} + proc initSysLock*(L: var SysLock) = discard + proc deinitSys*(L: var SysLock) = discard + proc acquireSys*(L: var SysLock) {.noSideEffect, importcpp.} + proc tryAcquireSys*(L: var SysLock): bool {.noSideEffect, importcpp.} + proc releaseSys*(L: var SysLock) {.noSideEffect, importcpp.} - proc initSysCond(L: var SysCond) = discard - proc deinitSysCond(L: var SysCond) = discard - proc waitSysCond(cond: var SysCond, lock: var SysLock) {. + proc initSysCond*(L: var SysCond) = discard + proc deinitSysCond*(L: var SysCond) = discard + proc waitSysCond*(cond: var SysCond, lock: var SysLock) {. noSideEffect, importcpp.} - proc signalSysCond(cond: var SysCond) {. + proc signalSysCond*(cond: var SysCond) {. noSideEffect, importcpp.} - proc broadcastSysCond(cond: var SysCond) {. + proc broadcastSysCond*(cond: var SysCond) {. noSideEffect, importcpp.} else: @@ -105,7 +105,7 @@ else: when defined(linux) and defined(amd64): abi: array[40 div sizeof(clong), clong] - SysLockAttr {.importc: "pthread_mutexattr_t", pure, final + SysLockAttr* {.importc: "pthread_mutexattr_t", pure, final header: """#include #include """.} = object when defined(linux) and defined(amd64): @@ -143,8 +143,8 @@ else: # to prevent this once and for all, we're doing an extra malloc when # initializing the primitive. type - SysLock = ptr SysLockObj - SysCond = ptr SysCondObj + SysLock* = ptr SysLockObj + SysCond* = ptr SysCondObj when not declared(c_malloc): proc c_malloc(size: csize_t): pointer {. @@ -152,83 +152,83 @@ else: proc c_free(p: pointer) {. importc: "free", header: "".} - proc initSysLock(L: var SysLock, attr: ptr SysLockAttr = nil) = + proc initSysLock*(L: var SysLock, attr: ptr SysLockAttr = nil) = L = cast[SysLock](c_malloc(csize_t(sizeof(SysLockObj)))) initSysLockAux(L[], attr) - proc deinitSys(L: var SysLock) = + proc deinitSys*(L: var SysLock) = deinitSysAux(L[]) c_free(L) - template acquireSys(L: var SysLock) = + template acquireSys*(L: var SysLock) = acquireSysAux(L[]) - template tryAcquireSys(L: var SysLock): bool = + template tryAcquireSys*(L: var SysLock): bool = tryAcquireSysAux(L[]) == 0'i32 - template releaseSys(L: var SysLock) = + template releaseSys*(L: var SysLock) = releaseSysAux(L[]) else: type - SysLock = SysLockObj - SysCond = SysCondObj + SysLock* = SysLockObj + SysCond* = SysCondObj - template initSysLock(L: var SysLock, attr: ptr SysLockAttr = nil) = + template initSysLock*(L: var SysLock, attr: ptr SysLockAttr = nil) = initSysLockAux(L, attr) - template deinitSys(L: var SysLock) = + template deinitSys*(L: var SysLock) = deinitSysAux(L) - template acquireSys(L: var SysLock) = + template acquireSys*(L: var SysLock) = acquireSysAux(L) - template tryAcquireSys(L: var SysLock): bool = + template tryAcquireSys*(L: var SysLock): bool = tryAcquireSysAux(L) == 0'i32 - template releaseSys(L: var SysLock) = + template releaseSys*(L: var SysLock) = releaseSysAux(L) - when insideRLocksModule: - let SysLockType_Reentrant {.importc: "PTHREAD_MUTEX_RECURSIVE", - header: "".}: SysLockType - proc initSysLockAttr(a: var SysLockAttr) {. - importc: "pthread_mutexattr_init", header: "", noSideEffect.} - proc setSysLockType(a: var SysLockAttr, t: SysLockType) {. - importc: "pthread_mutexattr_settype", header: "", noSideEffect.} + # rlocks + var SysLockType_Reentrant* {.importc: "PTHREAD_MUTEX_RECURSIVE", + header: "".}: SysLockType + proc initSysLockAttr*(a: var SysLockAttr) {. + importc: "pthread_mutexattr_init", header: "", noSideEffect.} + proc setSysLockType*(a: var SysLockAttr, t: SysLockType) {. + importc: "pthread_mutexattr_settype", header: "", noSideEffect.} + + # locks + proc initSysCondAux(cond: var SysCondObj, cond_attr: ptr SysCondAttr = nil) {. + importc: "pthread_cond_init", header: "", noSideEffect.} + proc deinitSysCondAux(cond: var SysCondObj) {.noSideEffect, + importc: "pthread_cond_destroy", header: "".} + + proc waitSysCondAux(cond: var SysCondObj, lock: var SysLockObj): cint {. + importc: "pthread_cond_wait", header: "", noSideEffect.} + proc signalSysCondAux(cond: var SysCondObj) {. + importc: "pthread_cond_signal", header: "", noSideEffect.} + proc broadcastSysCondAux(cond: var SysCondObj) {. + importc: "pthread_cond_broadcast", header: "", noSideEffect.} + when defined(ios): + proc initSysCond*(cond: var SysCond, cond_attr: ptr SysCondAttr = nil) = + cond = cast[SysCond](c_malloc(csize_t(sizeof(SysCondObj)))) + initSysCondAux(cond[], cond_attr) + + proc deinitSysCond*(cond: var SysCond) = + deinitSysCondAux(cond[]) + c_free(cond) + + template waitSysCond*(cond: var SysCond, lock: var SysLock) = + discard waitSysCondAux(cond[], lock[]) + template signalSysCond*(cond: var SysCond) = + signalSysCondAux(cond[]) + template broadcastSysCond*(cond: var SysCond) = + broadcastSysCondAux(cond[]) else: - proc initSysCondAux(cond: var SysCondObj, cond_attr: ptr SysCondAttr = nil) {. - importc: "pthread_cond_init", header: "", noSideEffect.} - proc deinitSysCondAux(cond: var SysCondObj) {.noSideEffect, - importc: "pthread_cond_destroy", header: "".} - - proc waitSysCondAux(cond: var SysCondObj, lock: var SysLockObj): cint {. - importc: "pthread_cond_wait", header: "", noSideEffect.} - proc signalSysCondAux(cond: var SysCondObj) {. - importc: "pthread_cond_signal", header: "", noSideEffect.} - proc broadcastSysCondAux(cond: var SysCondObj) {. - importc: "pthread_cond_broadcast", header: "", noSideEffect.} - - when defined(ios): - proc initSysCond(cond: var SysCond, cond_attr: ptr SysCondAttr = nil) = - cond = cast[SysCond](c_malloc(csize_t(sizeof(SysCondObj)))) - initSysCondAux(cond[], cond_attr) - - proc deinitSysCond(cond: var SysCond) = - deinitSysCondAux(cond[]) - c_free(cond) - - template waitSysCond(cond: var SysCond, lock: var SysLock) = - discard waitSysCondAux(cond[], lock[]) - template signalSysCond(cond: var SysCond) = - signalSysCondAux(cond[]) - template broadcastSysCond(cond: var SysCond) = - broadcastSysCondAux(cond[]) - else: - template initSysCond(cond: var SysCond, cond_attr: ptr SysCondAttr = nil) = - initSysCondAux(cond, cond_attr) - template deinitSysCond(cond: var SysCond) = - deinitSysCondAux(cond) - - template waitSysCond(cond: var SysCond, lock: var SysLock) = - discard waitSysCondAux(cond, lock) - template signalSysCond(cond: var SysCond) = - signalSysCondAux(cond) - template broadcastSysCond(cond: var SysCond) = - broadcastSysCondAux(cond) + template initSysCond*(cond: var SysCond, cond_attr: ptr SysCondAttr = nil) = + initSysCondAux(cond, cond_attr) + template deinitSysCond*(cond: var SysCond) = + deinitSysCondAux(cond) + + template waitSysCond*(cond: var SysCond, lock: var SysLock) = + discard waitSysCondAux(cond, lock) + template signalSysCond*(cond: var SysCond) = + signalSysCondAux(cond) + template broadcastSysCond*(cond: var SysCond) = + broadcastSysCondAux(cond) {.pop.} diff --git a/lib/std/private/threadtypes.nim b/lib/std/private/threadtypes.nim new file mode 100644 index 0000000000000..a1cdf21dc0f77 --- /dev/null +++ b/lib/std/private/threadtypes.nim @@ -0,0 +1,176 @@ +include system/inclrtl + +const hasSharedHeap* = defined(boehmgc) or defined(gogc) # don't share heaps; every thread has its own + +when defined(windows): + type + Handle* = int + SysThread* = Handle + WinThreadProc* = proc (x: pointer): int32 {.stdcall.} + + proc createThread*(lpThreadAttributes: pointer, dwStackSize: int32, + lpStartAddress: WinThreadProc, + lpParameter: pointer, + dwCreationFlags: int32, + lpThreadId: var int32): SysThread {. + stdcall, dynlib: "kernel32", importc: "CreateThread".} + + proc winSuspendThread*(hThread: SysThread): int32 {. + stdcall, dynlib: "kernel32", importc: "SuspendThread".} + + proc winResumeThread*(hThread: SysThread): int32 {. + stdcall, dynlib: "kernel32", importc: "ResumeThread".} + + proc waitForSingleObject*(hHandle: SysThread, dwMilliseconds: int32): int32 {. + stdcall, dynlib: "kernel32", importc: "WaitForSingleObject".} + + proc waitForMultipleObjects*(nCount: int32, + lpHandles: ptr SysThread, + bWaitAll: int32, + dwMilliseconds: int32): int32 {. + stdcall, dynlib: "kernel32", importc: "WaitForMultipleObjects".} + + proc terminateThread*(hThread: SysThread, dwExitCode: int32): int32 {. + stdcall, dynlib: "kernel32", importc: "TerminateThread".} + + proc setThreadAffinityMask*(hThread: SysThread, dwThreadAffinityMask: uint) {. + importc: "SetThreadAffinityMask", stdcall, header: "".} + +elif defined(genode): + const + GenodeHeader* = "genode_cpp/threads.h" + type + SysThread* {.importcpp: "Nim::SysThread", + header: GenodeHeader, final, pure.} = object + GenodeThreadProc* = proc (x: pointer) {.noconv.} + + proc initThread*(s: var SysThread, + env: GenodeEnv, + stackSize: culonglong, + entry: GenodeThreadProc, + arg: pointer, + affinity: cuint) {. + importcpp: "#.initThread(@)".} + + +else: + when not (defined(macosx) or defined(haiku)): + {.passl: "-pthread".} + + when not defined(haiku): + {.passc: "-pthread".} + + const + schedh = "#define _GNU_SOURCE\n#include " + pthreadh* = "#define _GNU_SOURCE\n#include " + + when not declared(Time): + when defined(linux): + type Time = clong + else: + type Time = int + + when (defined(linux) or defined(nintendoswitch)) and defined(amd64): + type + SysThread* {.importc: "pthread_t", + header: "" .} = distinct culong + Pthread_attr* {.importc: "pthread_attr_t", + header: "".} = object + abi: array[56 div sizeof(clong), clong] + elif defined(openbsd) and defined(amd64): + type + SysThread* {.importc: "pthread_t", header: "".} = object + Pthread_attr* {.importc: "pthread_attr_t", + header: "".} = object + else: + type + SysThread* {.importc: "pthread_t", header: "".} = int + Pthread_attr* {.importc: "pthread_attr_t", + header: "".} = object + type + Timespec* {.importc: "struct timespec", header: "".} = object + tv_sec*: Time + tv_nsec*: clong + + proc pthread_attr_init*(a1: var Pthread_attr): cint {. + importc, header: pthreadh.} + proc pthread_attr_setstack*(a1: ptr Pthread_attr, a2: pointer, a3: int): cint {. + importc, header: pthreadh.} + proc pthread_attr_setstacksize*(a1: var Pthread_attr, a2: int): cint {. + importc, header: pthreadh.} + proc pthread_attr_destroy*(a1: var Pthread_attr): cint {. + importc, header: pthreadh.} + + proc pthread_create*(a1: var SysThread, a2: var Pthread_attr, + a3: proc (x: pointer): pointer {.noconv.}, + a4: pointer): cint {.importc: "pthread_create", + header: pthreadh.} + proc pthread_join*(a1: SysThread, a2: ptr pointer): cint {. + importc, header: pthreadh.} + + proc pthread_cancel*(a1: SysThread): cint {. + importc: "pthread_cancel", header: pthreadh.} + + type CpuSet* {.importc: "cpu_set_t", header: schedh.} = object + when defined(linux) and defined(amd64): + abi: array[1024 div (8 * sizeof(culong)), culong] + + proc cpusetZero*(s: var CpuSet) {.importc: "CPU_ZERO", header: schedh.} + proc cpusetIncl*(cpu: cint; s: var CpuSet) {. + importc: "CPU_SET", header: schedh.} + + when defined(android): + # libc of android doesn't implement pthread_setaffinity_np, + # it exposes pthread_gettid_np though, so we can use that in combination + # with sched_setaffinity to set the thread affinity. + type Pid* {.importc: "pid_t", header: "".} = int32 # From posix_other.nim + + proc setAffinityTID*(tid: Pid; setsize: csize_t; s: var CpuSet) {. + importc: "sched_setaffinity", header: schedh.} + + proc pthread_gettid_np*(thread: SysThread): Pid {. + importc: "pthread_gettid_np", header: pthreadh.} + + proc setAffinity*(thread: SysThread; setsize: csize_t; s: var CpuSet) = + setAffinityTID(pthread_gettid_np(thread), setsize, s) + else: + proc setAffinity*(thread: SysThread; setsize: csize_t; s: var CpuSet) {. + importc: "pthread_setaffinity_np", header: pthreadh.} + + +const + emulatedThreadVars* = compileOption("tlsEmulation") +# we preallocate a fixed size for thread local storage, so that no heap +# allocations are needed. Currently less than 16K are used on a 64bit machine. +# We use `float` for proper alignment: +const nimTlsSize {.intdefine.} = 16000 +type + ThreadLocalStorage* = array[0..(nimTlsSize div sizeof(float)), float] + PGcThread* = ptr GcThread + GcThread* {.pure, inheritable.} = object + when emulatedThreadVars: + tls*: ThreadLocalStorage + else: + nil + when hasSharedHeap: + next*, prev*: PGcThread + stackBottom*, stackTop*: pointer + stackSize*: int + else: + nil + +const hasAllocStack* = defined(zephyr) # maybe freertos too? + +type + Thread*[TArg] = object + core*: PGcThread + sys*: SysThread + when TArg is void: + dataFn*: proc () {.nimcall, gcsafe.} + else: + dataFn*: proc (m: TArg) {.nimcall, gcsafe.} + data*: TArg + when hasAllocStack: + rawStack*: pointer + +proc `=copy`*[TArg](x: var Thread[TArg], y: Thread[TArg]) {.error.} diff --git a/lib/system/threads.nim b/lib/std/threads.nim similarity index 66% rename from lib/system/threads.nim rename to lib/std/threads.nim index 4e190e443fe80..5726a5cdcf2eb 100644 --- a/lib/system/threads.nim +++ b/lib/std/threads.nim @@ -9,10 +9,6 @@ ## Thread support for Nim. ## -## **Note**: This is part of the system module. Do not import it directly. -## To activate thread support you need to compile -## with the `--threads:on`:option: command line switch. -## ## Nim's memory model for threads is quite different from other common ## programming languages (C, Pascal): Each thread has its own ## (garbage collected) heap and sharing of memory is restricted. This helps @@ -44,24 +40,17 @@ ## ## deinitLock(L) -when not declared(ThisIsSystem): - {.error: "You must not import this module explicitly".} +import std/private/[threadtypes] +export Thread + +import system/ansi_c when defined(nimPreviewSlimSystem): import std/assertions -const - hasAllocStack = defined(zephyr) # maybe freertos too? +when defined(genode): + import genode/env -when defined(gcDestructors): - proc allocThreadStorage(size: int): pointer = - result = c_malloc(csize_t size) - zeroMem(result, size) - - proc deallocThreadStorage(p: pointer) = c_free(p) -else: - template allocThreadStorage(size: untyped): untyped = allocShared0(size) - template deallocThreadStorage(p: pointer) = deallocShared(p) when hasAllocStack or defined(zephyr) or defined(freertos): const @@ -81,6 +70,14 @@ else: ThreadStackSize = ThreadStackMask+1 - StackGuardSize + +when defined(gcDestructors): + proc allocThreadStorage(size: int): pointer = + result = c_malloc(csize_t size) + zeroMem(result, size) +else: + template allocThreadStorage(size: untyped): untyped = allocShared0(size) + #const globalsSlot = ThreadVarSlot(0) #sysAssert checkSlot.int == globalsSlot.int @@ -90,43 +87,13 @@ when defined(zephyr): #include """.} -# create for the main thread. Note: do not insert this data into the list -# of all threads; it's not to be stopped etc. -when not defined(useNimRtl): - #when not defined(createNimRtl): initStackBottom() - when declared(initGC): - initGC() - when not emulatedThreadVars: - type ThreadType {.pure.} = enum - None = 0, - NimThread = 1, - ForeignThread = 2 - var - threadType {.rtlThreadVar.}: ThreadType - - threadType = ThreadType.NimThread # We jump through some hops here to ensure that Nim thread procs can have # the Nim calling convention. This is needed because thread procs are # ``stdcall`` on Windows and ``noconv`` on UNIX. Alternative would be to just # use ``stdcall`` since it is mapped to ``noconv`` on UNIX anyway. -type - Thread*[TArg] = object - core: PGcThread - sys: SysThread - when TArg is void: - dataFn: proc () {.nimcall, gcsafe.} - else: - dataFn: proc (m: TArg) {.nimcall, gcsafe.} - data: TArg - when hasAllocStack: - rawStack: pointer - -proc `=copy`*[TArg](x: var Thread[TArg], y: Thread[TArg]) {.error.} -var - threadDestructionHandlers {.rtlThreadVar.}: seq[proc () {.closure, gcsafe, raises: [].}] proc onThreadDestruction*(handler: proc () {.closure, gcsafe, raises: [].}) = ## Registers a *thread local* handler that is called at the thread's @@ -135,101 +102,20 @@ proc onThreadDestruction*(handler: proc () {.closure, gcsafe, raises: [].}) = ## A thread is destructed when the `.thread` proc returns ## normally or when it raises an exception. Note that unhandled exceptions ## in a thread nevertheless cause the whole process to die. - threadDestructionHandlers.add handler - -template afterThreadRuns() = - for i in countdown(threadDestructionHandlers.len-1, 0): - threadDestructionHandlers[i]() - -when not defined(boehmgc) and not hasSharedHeap and not defined(gogc) and not defined(gcRegions): - proc deallocOsPages() {.rtl, raises: [].} - -proc threadTrouble() {.raises: [], gcsafe.} - ## defined in system/excpt.nim - -when defined(boehmgc): - type GCStackBaseProc = proc(sb: pointer, t: pointer) {.noconv.} - proc boehmGC_call_with_stack_base(sbp: GCStackBaseProc, p: pointer) - {.importc: "GC_call_with_stack_base", boehmGC.} - proc boehmGC_register_my_thread(sb: pointer) - {.importc: "GC_register_my_thread", boehmGC.} - proc boehmGC_unregister_my_thread() - {.importc: "GC_unregister_my_thread", boehmGC.} - - proc threadProcWrapDispatch[TArg](sb: pointer, thrd: pointer) {.noconv, raises: [].} = - boehmGC_register_my_thread(sb) - try: - let thrd = cast[ptr Thread[TArg]](thrd) - when TArg is void: - thrd.dataFn() - else: - thrd.dataFn(thrd.data) - except: - threadTrouble() - finally: - afterThreadRuns() - boehmGC_unregister_my_thread() -else: - proc threadProcWrapDispatch[TArg](thrd: ptr Thread[TArg]) {.raises: [].} = - try: - when TArg is void: - thrd.dataFn() - else: - when defined(nimV2): - thrd.dataFn(thrd.data) - else: - var x: TArg - deepCopy(x, thrd.data) - thrd.dataFn(x) - except: - threadTrouble() - finally: - afterThreadRuns() - when hasAllocStack: - deallocThreadStorage(thrd.rawStack) - -proc threadProcWrapStackFrame[TArg](thrd: ptr Thread[TArg]) {.raises: [].} = - when defined(boehmgc): - boehmGC_call_with_stack_base(threadProcWrapDispatch[TArg], thrd) - elif not defined(nogc) and not defined(gogc) and not defined(gcRegions) and not usesDestructors: - var p {.volatile.}: pointer - # init the GC for refc/markandsweep - nimGC_setStackBottom(addr(p)) - initGC() - when declared(threadType): - threadType = ThreadType.NimThread - threadProcWrapDispatch[TArg](thrd) - when declared(deallocOsPages): deallocOsPages() - else: - threadProcWrapDispatch(thrd) - -template threadProcWrapperBody(closure: untyped): untyped = - var thrd = cast[ptr Thread[TArg]](closure) - var core = thrd.core - when declared(globalsSlot): threadVarSetValue(globalsSlot, thrd.core) - threadProcWrapStackFrame(thrd) - # Since an unhandled exception terminates the whole process (!), there is - # no need for a ``try finally`` here, nor would it be correct: The current - # exception is tried to be re-raised by the code-gen after the ``finally``! - # However this is doomed to fail, because we already unmapped every heap - # page! - - # mark as not running anymore: - thrd.core = nil - thrd.dataFn = nil - deallocThreadStorage(cast[pointer](core)) + nimThreadDestructionHandlers.add handler + {.push stack_trace:off.} when defined(windows): proc threadProcWrapper[TArg](closure: pointer): int32 {.stdcall.} = - threadProcWrapperBody(closure) + nimThreadProcWrapperBody(closure) # implicitly return 0 elif defined(genode): proc threadProcWrapper[TArg](closure: pointer) {.noconv.} = - threadProcWrapperBody(closure) + nimThreadProcWrapperBody(closure) else: proc threadProcWrapper[TArg](closure: pointer): pointer {.noconv.} = - threadProcWrapperBody(closure) + nimThreadProcWrapperBody(closure) {.pop.} proc running*[TArg](t: Thread[TArg]): bool {.inline.} = @@ -387,4 +273,4 @@ proc createThread*(t: var Thread[void], tp: proc () {.thread, nimcall.}) = createThread[void](t, tp) when not defined(gcOrc): - include threadids + include system/threadids diff --git a/lib/system.nim b/lib/system.nim index c28479a44655e..ff2bff45f9270 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -1115,7 +1115,7 @@ elif defined(nimdoc): proc quit*(errorcode: int = QuitSuccess) {.magic: "Exit", noreturn.} elif defined(genode): - include genode/env + import genode/env var systemEnv {.exportc: runtimeEnvSym.}: GenodeEnvPtr @@ -1609,8 +1609,7 @@ when notJSnotNims: {.push stackTrace: off.} when not defined(js) and hasThreadSupport and hostOS != "standalone": - const insideRLocksModule = false - include "system/syslocks" + import std/private/syslocks include "system/threadlocalstorage" when not defined(js) and defined(nimV2): @@ -2092,7 +2091,12 @@ when not defined(js): when declared(initAllocator): initAllocator() when hasThreadSupport: - when hostOS != "standalone": include "system/threads" + when hostOS != "standalone": + include system/threadimpl + + import std/threads + export threads + elif not defined(nogc) and not defined(nimscript): when not defined(useNimRtl) and not defined(createNimRtl): initStackBottom() when declared(initGC): initGC() diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim index 88f680500434d..5350c1db828fe 100644 --- a/lib/system/alloc.nim +++ b/lib/system/alloc.nim @@ -11,6 +11,7 @@ {.push profiler:off.} include osalloc +import std/private/syslocks template track(op, address, size) = when defined(memTracker): diff --git a/lib/system/channels_builtin.nim b/lib/system/channels_builtin.nim index 2ad589383e011..50d740b233189 100644 --- a/lib/system/channels_builtin.nim +++ b/lib/system/channels_builtin.nim @@ -139,6 +139,8 @@ when not declared(ThisIsSystem): {.error: "You must not import this module explicitly".} +import std/private/syslocks + type pbytes = ptr UncheckedArray[byte] RawChannel {.pure, final.} = object ## msg queue for a thread diff --git a/lib/system/gc_regions.nim b/lib/system/gc_regions.nim index 6ced04c998a75..d96de7eac5384 100644 --- a/lib/system/gc_regions.nim +++ b/lib/system/gc_regions.nim @@ -7,6 +7,7 @@ # # "Stack GC" for embedded devices or ultra performance requirements. +import std/private/syslocks when defined(memProfiler): proc nimProfile(requestedSize: int) {.benign.} diff --git a/lib/system/threadimpl.nim b/lib/system/threadimpl.nim new file mode 100644 index 0000000000000..73f2807d2cc7a --- /dev/null +++ b/lib/system/threadimpl.nim @@ -0,0 +1,107 @@ +var + nimThreadDestructionHandlers* {.rtlThreadVar.}: seq[proc () {.closure, gcsafe, raises: [].}] +when not defined(boehmgc) and not hasSharedHeap and not defined(gogc) and not defined(gcRegions): + proc deallocOsPages() {.rtl, raises: [].} +proc threadTrouble() {.raises: [], gcsafe.} +# create for the main thread. Note: do not insert this data into the list +# of all threads; it's not to be stopped etc. +when not defined(useNimRtl): + #when not defined(createNimRtl): initStackBottom() + when declared(initGC): + initGC() + when not emulatedThreadVars: + type ThreadType {.pure.} = enum + None = 0, + NimThread = 1, + ForeignThread = 2 + var + threadType {.rtlThreadVar.}: ThreadType + + threadType = ThreadType.NimThread + +when defined(gcDestructors): + proc allocThreadStorage(size: int): pointer = + result = c_malloc(csize_t size) + zeroMem(result, size) + + proc deallocThreadStorage(p: pointer) = c_free(p) +else: + template allocThreadStorage(size: untyped): untyped = allocShared0(size) + template deallocThreadStorage(p: pointer) = deallocShared(p) + +template afterThreadRuns() = + for i in countdown(nimThreadDestructionHandlers.len-1, 0): + nimThreadDestructionHandlers[i]() + +when defined(boehmgc): + type GCStackBaseProc = proc(sb: pointer, t: pointer) {.noconv.} + proc boehmGC_call_with_stack_base(sbp: GCStackBaseProc, p: pointer) + {.importc: "GC_call_with_stack_base", boehmGC.} + proc boehmGC_register_my_thread(sb: pointer) + {.importc: "GC_register_my_thread", boehmGC.} + proc boehmGC_unregister_my_thread() + {.importc: "GC_unregister_my_thread", boehmGC.} + + proc threadProcWrapDispatch[TArg](sb: pointer, thrd: pointer) {.noconv, raises: [].} = + boehmGC_register_my_thread(sb) + try: + let thrd = cast[ptr Thread[TArg]](thrd) + when TArg is void: + thrd.dataFn() + else: + thrd.dataFn(thrd.data) + except: + threadTrouble() + finally: + afterThreadRuns() + boehmGC_unregister_my_thread() +else: + proc threadProcWrapDispatch[TArg](thrd: ptr Thread[TArg]) {.raises: [].} = + try: + when TArg is void: + thrd.dataFn() + else: + when defined(nimV2): + thrd.dataFn(thrd.data) + else: + var x: TArg + deepCopy(x, thrd.data) + thrd.dataFn(x) + except: + threadTrouble() + finally: + afterThreadRuns() + when hasAllocStack: + deallocThreadStorage(thrd.rawStack) + +proc threadProcWrapStackFrame[TArg](thrd: ptr Thread[TArg]) {.raises: [].} = + when defined(boehmgc): + boehmGC_call_with_stack_base(threadProcWrapDispatch[TArg], thrd) + elif not defined(nogc) and not defined(gogc) and not defined(gcRegions) and not usesDestructors: + var p {.volatile.}: pointer + # init the GC for refc/markandsweep + nimGC_setStackBottom(addr(p)) + when declared(initGC): + initGC() + when declared(threadType): + threadType = ThreadType.NimThread + threadProcWrapDispatch[TArg](thrd) + when declared(deallocOsPages): deallocOsPages() + else: + threadProcWrapDispatch(thrd) + +template nimThreadProcWrapperBody*(closure: untyped): untyped = + var thrd = cast[ptr Thread[TArg]](closure) + var core = thrd.core + when declared(globalsSlot): threadVarSetValue(globalsSlot, thrd.core) + threadProcWrapStackFrame(thrd) + # Since an unhandled exception terminates the whole process (!), there is + # no need for a ``try finally`` here, nor would it be correct: The current + # exception is tried to be re-raised by the code-gen after the ``finally``! + # However this is doomed to fail, because we already unmapped every heap + # page! + + # mark as not running anymore: + thrd.core = nil + thrd.dataFn = nil + deallocThreadStorage(cast[pointer](core)) diff --git a/lib/system/threadlocalstorage.nim b/lib/system/threadlocalstorage.nim index b62c903c34a8c..977f42e723825 100644 --- a/lib/system/threadlocalstorage.nim +++ b/lib/system/threadlocalstorage.nim @@ -1,83 +1,33 @@ +import std/private/threadtypes when defined(windows): - type - SysThread* = Handle - WinThreadProc = proc (x: pointer): int32 {.stdcall.} - - proc createThread(lpThreadAttributes: pointer, dwStackSize: int32, - lpStartAddress: WinThreadProc, - lpParameter: pointer, - dwCreationFlags: int32, - lpThreadId: var int32): SysThread {. - stdcall, dynlib: "kernel32", importc: "CreateThread".} - - proc winSuspendThread(hThread: SysThread): int32 {. - stdcall, dynlib: "kernel32", importc: "SuspendThread".} - - proc winResumeThread(hThread: SysThread): int32 {. - stdcall, dynlib: "kernel32", importc: "ResumeThread".} - - proc waitForSingleObject(hHandle: SysThread, dwMilliseconds: int32): int32 {. - stdcall, dynlib: "kernel32", importc: "WaitForSingleObject".} - - proc waitForMultipleObjects(nCount: int32, - lpHandles: ptr SysThread, - bWaitAll: int32, - dwMilliseconds: int32): int32 {. - stdcall, dynlib: "kernel32", importc: "WaitForMultipleObjects".} - - proc terminateThread(hThread: SysThread, dwExitCode: int32): int32 {. - stdcall, dynlib: "kernel32", importc: "TerminateThread".} - type ThreadVarSlot = distinct int32 - when true: - proc threadVarAlloc(): ThreadVarSlot {. - importc: "TlsAlloc", stdcall, header: "".} - proc threadVarSetValue(dwTlsIndex: ThreadVarSlot, lpTlsValue: pointer) {. - importc: "TlsSetValue", stdcall, header: "".} - proc tlsGetValue(dwTlsIndex: ThreadVarSlot): pointer {. - importc: "TlsGetValue", stdcall, header: "".} + proc threadVarAlloc(): ThreadVarSlot {. + importc: "TlsAlloc", stdcall, header: "".} + proc threadVarSetValue(dwTlsIndex: ThreadVarSlot, lpTlsValue: pointer) {. + importc: "TlsSetValue", stdcall, header: "".} + proc tlsGetValue(dwTlsIndex: ThreadVarSlot): pointer {. + importc: "TlsGetValue", stdcall, header: "".} - proc getLastError(): uint32 {. - importc: "GetLastError", stdcall, header: "".} - proc setLastError(x: uint32) {. - importc: "SetLastError", stdcall, header: "".} + proc getLastError(): uint32 {. + importc: "GetLastError", stdcall, header: "".} + proc setLastError(x: uint32) {. + importc: "SetLastError", stdcall, header: "".} - proc threadVarGetValue(dwTlsIndex: ThreadVarSlot): pointer = - let realLastError = getLastError() - result = tlsGetValue(dwTlsIndex) - setLastError(realLastError) - else: - proc threadVarAlloc(): ThreadVarSlot {. - importc: "TlsAlloc", stdcall, dynlib: "kernel32".} - proc threadVarSetValue(dwTlsIndex: ThreadVarSlot, lpTlsValue: pointer) {. - importc: "TlsSetValue", stdcall, dynlib: "kernel32".} - proc threadVarGetValue(dwTlsIndex: ThreadVarSlot): pointer {. - importc: "TlsGetValue", stdcall, dynlib: "kernel32".} - - proc setThreadAffinityMask(hThread: SysThread, dwThreadAffinityMask: uint) {. - importc: "SetThreadAffinityMask", stdcall, header: "".} + proc threadVarGetValue(dwTlsIndex: ThreadVarSlot): pointer = + let realLastError = getLastError() + result = tlsGetValue(dwTlsIndex) + setLastError(realLastError) elif defined(genode): - import genode/env const GenodeHeader = "genode_cpp/threads.h" + type - SysThread* {.importcpp: "Nim::SysThread", - header: GenodeHeader, final, pure.} = object - GenodeThreadProc = proc (x: pointer) {.noconv.} ThreadVarSlot = int - proc initThread(s: var SysThread, - env: GenodeEnv, - stackSize: culonglong, - entry: GenodeThreadProc, - arg: pointer, - affinity: cuint) {. - importcpp: "#.initThread(@)".} - proc threadVarAlloc(): ThreadVarSlot = 0 proc offMainThread(): bool {. @@ -113,62 +63,18 @@ else: when not defined(haiku): {.passc: "-pthread".} - const - schedh = "#define _GNU_SOURCE\n#include " - pthreadh = "#define _GNU_SOURCE\n#include " - - when not declared(Time): - when defined(linux): - type Time = clong - else: - type Time = int - when (defined(linux) or defined(nintendoswitch)) and defined(amd64): type - SysThread* {.importc: "pthread_t", - header: "" .} = distinct culong - Pthread_attr {.importc: "pthread_attr_t", - header: "".} = object - abi: array[56 div sizeof(clong), clong] ThreadVarSlot {.importc: "pthread_key_t", header: "".} = distinct cuint elif defined(openbsd) and defined(amd64): type - SysThread* {.importc: "pthread_t", header: "".} = object - Pthread_attr {.importc: "pthread_attr_t", - header: "".} = object ThreadVarSlot {.importc: "pthread_key_t", header: "".} = cint else: type - SysThread* {.importc: "pthread_t", header: "".} = int - Pthread_attr {.importc: "pthread_attr_t", - header: "".} = object ThreadVarSlot {.importc: "pthread_key_t", header: "".} = object - type - Timespec {.importc: "struct timespec", header: "".} = object - tv_sec: Time - tv_nsec: clong - - proc pthread_attr_init(a1: var Pthread_attr): cint {. - importc, header: pthreadh.} - proc pthread_attr_setstack*(a1: ptr Pthread_attr, a2: pointer, a3: int): cint {. - importc, header: pthreadh.} - proc pthread_attr_setstacksize(a1: var Pthread_attr, a2: int): cint {. - importc, header: pthreadh.} - proc pthread_attr_destroy(a1: var Pthread_attr): cint {. - importc, header: pthreadh.} - - proc pthread_create(a1: var SysThread, a2: var Pthread_attr, - a3: proc (x: pointer): pointer {.noconv.}, - a4: pointer): cint {.importc: "pthread_create", - header: pthreadh.} - proc pthread_join(a1: SysThread, a2: ptr pointer): cint {. - importc, header: pthreadh.} - - proc pthread_cancel(a1: SysThread): cint {. - importc: "pthread_cancel", header: pthreadh.} proc pthread_getspecific(a1: ThreadVarSlot): pointer {. importc: "pthread_getspecific", header: pthreadh.} @@ -188,59 +94,13 @@ else: proc threadVarGetValue(s: ThreadVarSlot): pointer {.inline.} = result = pthread_getspecific(s) - type CpuSet {.importc: "cpu_set_t", header: schedh.} = object - when defined(linux) and defined(amd64): - abi: array[1024 div (8 * sizeof(culong)), culong] - - proc cpusetZero(s: var CpuSet) {.importc: "CPU_ZERO", header: schedh.} - proc cpusetIncl(cpu: cint; s: var CpuSet) {. - importc: "CPU_SET", header: schedh.} - - when defined(android): - # libc of android doesn't implement pthread_setaffinity_np, - # it exposes pthread_gettid_np though, so we can use that in combination - # with sched_setaffinity to set the thread affinity. - type Pid {.importc: "pid_t", header: "".} = int32 # From posix_other.nim - - proc setAffinityTID(tid: Pid; setsize: csize_t; s: var CpuSet) {. - importc: "sched_setaffinity", header: schedh.} - - proc pthread_gettid_np(thread: SysThread): Pid {. - importc: "pthread_gettid_np", header: pthreadh.} - - proc setAffinity(thread: SysThread; setsize: csize_t; s: var CpuSet) = - setAffinityTID(pthread_gettid_np(thread), setsize, s) - else: - proc setAffinity(thread: SysThread; setsize: csize_t; s: var CpuSet) {. - importc: "pthread_setaffinity_np", header: pthreadh.} - - -const - emulatedThreadVars = compileOption("tlsEmulation") when emulatedThreadVars: # the compiler generates this proc for us, so that we can get the size of # the thread local var block; we use this only for sanity checking though proc nimThreadVarsSize(): int {.noconv, importc: "NimThreadVarsSize".} -# we preallocate a fixed size for thread local storage, so that no heap -# allocations are needed. Currently less than 16K are used on a 64bit machine. -# We use `float` for proper alignment: -const nimTlsSize {.intdefine.} = 16000 -type - ThreadLocalStorage = array[0..(nimTlsSize div sizeof(float)), float] - PGcThread = ptr GcThread - GcThread {.pure, inheritable.} = object - when emulatedThreadVars: - tls: ThreadLocalStorage - else: - nil - when hasSharedHeap: - next, prev: PGcThread - stackBottom, stackTop: pointer - stackSize: int - else: - nil + when emulatedThreadVars: var globalsSlot: ThreadVarSlot diff --git a/tools/kochdocs.nim b/tools/kochdocs.nim index ee9c29499bace..c0bdb0503f8ad 100644 --- a/tools/kochdocs.nim +++ b/tools/kochdocs.nim @@ -138,7 +138,6 @@ mm.md """.splitWhitespace().mapIt("doc" / it) doc0 = """ -lib/system/threads.nim lib/system/channels_builtin.nim """.splitWhitespace() # ran by `nim doc0` instead of `nim doc`