dhewm · DanielGibson · Nov 7, 2024 · Oct 31, 2024 · Jan 21, 2023 · Jan 28, 2023
diff --git a/neo/framework/Common.cpp b/neo/framework/Common.cpp
@@ -2688,13 +2688,27 @@ void idCommonLocal::LoadGameDLL( void ) {
 	// there was no gamelib for this mod, use default one from base game
 	if (!gameDLL) {
 		common->Printf( "\n" );
-		common->Warning( "couldn't load mod-specific %s, defaulting to base game's library!\n", dll );
-		sys->DLL_GetFileName(BASE_GAMEDIR, dll, sizeof(dll));
-		LoadGameDLLbyName(dll, s);
+
+		const char *fs_base = cvarSystem->GetCVarString("fs_game_base");
+		if (fs_base && fs_base[0]) {
+			common->Warning( "couldn't load mod-specific %s, defaulting to library of fs_game_base (%s)!\n", dll, fs_base);
+			sys->DLL_GetFileName(fs_base, dll, sizeof(dll));
+			LoadGameDLLbyName(dll, s);
+			if ( !gameDLL ) {
+				common->Warning( "couldn't load fs_game_base lib %s either, defaulting to base game's library!\n", dll);
+			}
+		} else {
+			common->Warning( "couldn't load mod-specific %s, defaulting to base game's library!\n", dll );
+		}
+
+		if ( !gameDLL ) {
+			sys->DLL_GetFileName(BASE_GAMEDIR, dll, sizeof(dll));
+			LoadGameDLLbyName(dll, s);
+		}
 	}
 
 	if ( !gameDLL ) {
-		common->FatalError( "couldn't load game dynamic library" );
+		common->FatalError( "couldn't load game dynamic library '%s'", dll );
 		return;
 	}
 

diff --git a/neo/idlib/Heap.h b/neo/idlib/Heap.h
@@ -142,6 +142,24 @@ __inline void operator delete[]( void *p ) {
 #endif /* ID_DEBUG_MEMORY */
 
 
+// allocate SIZE bytes, aligned to 16 bytes - possibly on the stack (like _alloca16())
+// if it's too big (> ID_MAX_ALLOCA_SIZE, 1MB), it gets allocated on the Heap instead.
+// ON_STACK should be a bool and will be set to true if it was allocated on the stack
+// and false if it was allocated on the heap.
+// if ON_STACK is false, you must free this with Mem_FreeA() or Mem_Free16()!
+// (just pass your ON_STACK bool to Mem_FreeA() and it will do the right thing)
+#define Mem_MallocA( SIZE, ON_STACK ) \
+	( (SIZE) < ID_MAX_ALLOCA_SIZE ? ( ON_STACK=true, _alloca16(SIZE) ) : ( ON_STACK=false, Mem_Alloc16(SIZE) ) )
+
+// free memory allocated with Mem_MallocA()
+ID_INLINE void Mem_FreeA( void* ptr, bool onStack )
+{
+	if( !onStack ) {
+		Mem_Free16( ptr );
+	}
+}
+
+
 /*
 ===============================================================================
 

diff --git a/neo/renderer/ModelOverlay.cpp b/neo/renderer/ModelOverlay.cpp
@@ -117,8 +117,16 @@ void idRenderModelOverlay::CreateOverlay( const idRenderModel *model, const idPl
 	}
 
 	// make temporary buffers for the building process
-	overlayVertex_t	*overlayVerts = (overlayVertex_t *)_alloca( maxVerts * sizeof( *overlayVerts ) );
-	glIndex_t *overlayIndexes = (glIndex_t *)_alloca16( maxIndexes * sizeof( *overlayIndexes ) );
+	// DG: using Mem_MallocA() instead of alloca() to avoid stack overflows with large models
+	size_t vertSize = maxVerts * sizeof( overlayVertex_t );
+	bool vertsOnStack;
+	overlayVertex_t	*overlayVerts = (overlayVertex_t *)Mem_MallocA( vertSize, vertsOnStack );
+
+	// Note: here we have two Mem_MallocA() calls, this relies on the stack being
+	//       big enough for two alloca(ID_MAX_ALLOCA_SIZE) calls!
+	size_t idxSize = maxIndexes * sizeof( glIndex_t );
+	bool idxOnStack;
+	glIndex_t *overlayIndexes = (glIndex_t *)Mem_MallocA( idxSize, idxOnStack );
 
 	// pull out the triangles we need from the base surfaces
 	for ( surfNum = 0; surfNum < model->NumBaseSurfaces(); surfNum++ ) {
@@ -224,6 +232,9 @@ void idRenderModelOverlay::CreateOverlay( const idRenderModel *model, const idPl
 			materials[i]->surfaces.RemoveIndex( 0 );
 		}
 	}
+
+	Mem_FreeA(overlayVerts, vertsOnStack);
+	Mem_FreeA(overlayIndexes, idxOnStack);
 }
 
 /*

diff --git a/neo/renderer/Model_md5.cpp b/neo/renderer/Model_md5.cpp
@@ -235,13 +235,16 @@ void idMD5Mesh::ParseMesh( idLexer &parser, int numJoints, const idJointMat *joi
 	// build the information that will be common to all animations of this mesh:
 	// silhouette edge connectivity and normal / tangent generation information
 	//
-	idDrawVert *verts = (idDrawVert *) _alloca16( texCoords.Num() * sizeof( idDrawVert ) );
+	bool onStack;
+	idDrawVert *verts = (idDrawVert*)Mem_MallocA( texCoords.Num()*sizeof(idDrawVert), onStack );
+
 	for ( i = 0; i < texCoords.Num(); i++ ) {
 		verts[i].Clear();
 		verts[i].st = texCoords[i];
 	}
 	TransformVerts( verts, joints );
 	deformInfo = R_BuildDeformInfo( texCoords.Num(), verts, tris.Num(), tris.Ptr(), shader->UseUnsmoothedTangents() );
+	Mem_FreeA( verts, onStack );
 }
 
 /*
@@ -352,12 +355,15 @@ idMD5Mesh::CalcBounds
 */
 idBounds idMD5Mesh::CalcBounds( const idJointMat *entJoints ) {
 	idBounds	bounds;
-	idDrawVert *verts = (idDrawVert *) _alloca16( texCoords.Num() * sizeof( idDrawVert ) );
+	bool onStack;
+	idDrawVert *verts = (idDrawVert*)Mem_MallocA( texCoords.Num()*sizeof(idDrawVert), onStack );
 
 	TransformVerts( verts, entJoints );
 
 	SIMDProcessor->MinMax( bounds[0], bounds[1], verts, texCoords.Num() );
 
+	Mem_FreeA( verts, onStack );
+
 	return bounds;
 }
 

diff --git a/neo/renderer/tr_light.cpp b/neo/renderer/tr_light.cpp
@@ -164,7 +164,9 @@ void R_CreateVertexProgramShadowCache( srfTriangles_t *tri ) {
 		return;
 	}
 
-	shadowCache_t *temp = (shadowCache_t *)_alloca16( tri->numVerts * 2 * sizeof( shadowCache_t ) );
+	// DG: use Mem_MallocA() instead of _alloca16() to avoid stack overflows with big models
+	bool tempOnStack;
+	shadowCache_t *temp = (shadowCache_t *)Mem_MallocA( tri->numVerts * 2 * sizeof( shadowCache_t ), tempOnStack );
 
 #if 1
 
@@ -189,6 +191,7 @@ void R_CreateVertexProgramShadowCache( srfTriangles_t *tri ) {
 #endif
 
 	vertexCache.Alloc( temp, tri->numVerts * 2 * sizeof( shadowCache_t ), &tri->shadowCache );
+	Mem_FreeA( temp, tempOnStack );
 }
 
 /*

diff --git a/neo/renderer/tr_trisurf.cpp b/neo/renderer/tr_trisurf.cpp
@@ -790,7 +790,9 @@ R_CreateDupVerts
 void R_CreateDupVerts( srfTriangles_t *tri ) {
 	int i;
 
-	int *remap = (int *) _alloca16( tri->numVerts * sizeof( remap[0] ) );
+	// DG: use Mem_MallocA() instead of _alloca16() to avoid stack overflows with big models
+	bool remapOnStack;
+	int *remap = (int *)Mem_MallocA( tri->numVerts * sizeof( remap[0] ), remapOnStack );
 
 	// initialize vertex remap in case there are unused verts
 	for ( i = 0; i < tri->numVerts; i++ ) {
@@ -803,7 +805,9 @@ void R_CreateDupVerts( srfTriangles_t *tri ) {
 	}
 
 	// create duplicate vertex index based on the vertex remap
-	int * tempDupVerts = (int *) _alloca16( tri->numVerts * 2 * sizeof( tempDupVerts[0] ) );
+	bool tempDupVertsOnStack;
+	int *tempDupVerts = (int *)Mem_MallocA( tri->numVerts * 2 * sizeof( tempDupVerts[0] ), tempDupVertsOnStack );
+
 	tri->numDupVerts = 0;
 	for ( i = 0; i < tri->numVerts; i++ ) {
 		if ( remap[i] != i ) {
@@ -819,6 +823,9 @@ void R_CreateDupVerts( srfTriangles_t *tri ) {
 	} else {
 		tri->dupVerts = NULL;
 	}
+
+	Mem_FreeA( remap, remapOnStack );
+	Mem_FreeA( tempDupVerts, tempDupVertsOnStack );
 }
 
 /*
@@ -1279,7 +1286,10 @@ static void	R_DuplicateMirroredVertexes( srfTriangles_t *tri ) {
 	int				totalVerts;
 	int				numMirror;
 
-	tverts = (tangentVert_t *)_alloca16( tri->numVerts * sizeof( *tverts ) );
+	// DG: use Mem_MallocA() instead of _alloca16() to avoid stack overflows with big models
+	bool tvertsOnStack;
+	tverts = (tangentVert_t *)Mem_MallocA( tri->numVerts * sizeof( *tverts ), tvertsOnStack );
+
 	memset( tverts, 0, tri->numVerts * sizeof( *tverts ) );
 
 	// determine texture polarity of each surface
@@ -1309,6 +1319,7 @@ static void	R_DuplicateMirroredVertexes( srfTriangles_t *tri ) {
 	// now create the new list
 	if ( totalVerts == tri->numVerts ) {
 		tri->mirroredVerts = NULL;
+		Mem_FreeA( tverts, tvertsOnStack );
 		return;
 	}
 
@@ -1344,6 +1355,8 @@ static void	R_DuplicateMirroredVertexes( srfTriangles_t *tri ) {
 	}
 
 	tri->numVerts = totalVerts;
+
+	Mem_FreeA( tverts, tvertsOnStack );
 }
 
 /*
@@ -1386,14 +1399,10 @@ void R_DeriveTangentsWithoutNormals( srfTriangles_t *tri ) {
 	faceTangents_t	*ft;
 	idDrawVert		*vert;
 
-	// DG: windows only has a 1MB stack and it could happen that we try to allocate >1MB here
-	//     (in lost mission mod, game/le_hell map), causing a stack overflow
-	//     to prevent that, use heap allocation if it's >600KB
+	// DG: use Mem_MallocA() instead of _alloca16() to avoid stack overflows with big models
 	size_t allocaSize = sizeof(faceTangents[0]) * tri->numIndexes/3;
-	if(allocaSize < 600000)
-		faceTangents = (faceTangents_t *)_alloca16( allocaSize );
-	else
-		faceTangents = (faceTangents_t *)Mem_Alloc16( allocaSize );
+	bool faceTangentsOnStack;
+	faceTangents = (faceTangents_t *)Mem_MallocA( allocaSize, faceTangentsOnStack );
 
 	R_DeriveFaceTangents( tri, faceTangents );
 
@@ -1451,8 +1460,7 @@ void R_DeriveTangentsWithoutNormals( srfTriangles_t *tri ) {
 
 	tri->tangentsCalculated = true;
 
-	if(allocaSize >= 600000)
-		Mem_Free16( faceTangents );
+	Mem_FreeA( faceTangents, faceTangentsOnStack );
 }
 
 static ID_INLINE void VectorNormalizeFast2( const idVec3 &v, idVec3 &out) {
@@ -1685,8 +1693,12 @@ void R_DeriveTangents( srfTriangles_t *tri, bool allocFacePlanes ) {
 
 #if 1
 
+	// ok, this is also true if they're not on the stack but from tri->facePlanes
+	// (either way, Mem_FreeA() mustn't free() them)
+	bool planesOnStack = true;
 	if ( !planes ) {
-		planes = (idPlane *)_alloca16( ( tri->numIndexes / 3 ) * sizeof( planes[0] ) );
+		// DG: use Mem_MallocA() instead of _alloca16() to avoid stack overflows with big models
+		planes = (idPlane *)Mem_MallocA( ( tri->numIndexes / 3 ) * sizeof( planes[0] ), planesOnStack );
 	}
 
 	SIMDProcessor->DeriveTangents( planes, tri->verts, tri->numVerts, tri->indexes, tri->numIndexes );
@@ -1846,6 +1858,8 @@ void R_DeriveTangents( srfTriangles_t *tri, bool allocFacePlanes ) {
 
 	tri->tangentsCalculated = true;
 	tri->facePlanesCalculated = true;
+
+	Mem_FreeA( planes, planesOnStack );
 }
 
 /*

diff --git a/neo/sys/platform.h b/neo/sys/platform.h
@@ -38,9 +38,11 @@ If you have questions concerning this license or the applicable additional terms
 
 // NOTE: By default Win32 uses a 1MB stack. Doom3 1.3.1 uses 4MB (probably set after compiling with EDITBIN /STACK
 // dhewm3 now uses a 8MB stack, set with a linker flag in CMakeLists.txt (/STACK:8388608 for MSVC, -Wl,--stack,8388608 for mingw)
-// Linux has a 8MB stack by default, and so does macOS, at least for the main thread
-// anyway, a 2MB limit alloca should be safe even when using it multiple times in the same function
-#define ID_MAX_ALLOCA_SIZE 2097152 // 2MB
+// Linux has a 8MB stack by default, and so does macOS, at least for the main thread.
+// Anyway, a 1MB limit for _alloca() should be safe even when using it multiple times
+// in the same function or callstack.
+// If there's a risk of bigger stack allocations, Mem_MallocA() should be used instead.
+#define ID_MAX_ALLOCA_SIZE 1048576 // 1MB
 
 /*
 ===============================================================================

diff --git a/neo/sys/win32/win_main.cpp b/neo/sys/win32/win_main.cpp
@@ -53,6 +53,8 @@ If you have questions concerning this license or the applicable additional terms
 
 #include "tools/edit_public.h"
 
+#undef strcmp // get rid of "#define strcmp idStr::Cmp", it conflicts with SDL headers
+
 #include "sys/sys_sdl.h"
 
 #ifdef D3_SDL3
@@ -646,6 +648,24 @@ uintptr_t Sys_DLL_Load( const char *dllName ) {
 		}
 	} else {
 		DWORD e = GetLastError();
+
+		if ( e ==  0x7E ) {
+			// 0x7E is "The specified module could not be found."
+			// don't print a warning for that error, it's expected
+			// when trying different possible paths for a DLL
+			return 0;
+		}
+
+		if ( e == 0xC1) {
+			// "[193 (0xC1)] is not a valid Win32 application"
+			// probably going to be common. Lets try to be less cryptic.
+			common->Warning( "LoadLibrary( \"%s\" ) Failed ! [%i (0x%X)]\tprobably the DLL is of the wrong architecture, "
+			                 "like x64 instead of x86 (this build of dhewm3 expects %s)",
+			                 dllName, e, e, D3_ARCH );
+			return 0;
+		}
+
+		// for all other errors, print whatever FormatMessage() gives us
 		LPVOID msgBuf = NULL;
 
 		FormatMessage(
@@ -658,17 +678,7 @@ uintptr_t Sys_DLL_Load( const char *dllName ) {
 			(LPTSTR)&msgBuf,
 			0, NULL);
 
-		idStr errorStr = va( "[%i (0x%X)]\t%s", e, e, msgBuf );
-
-		// common, skipped.
-		if ( e == 0x7E ) // [126 (0x7E)] The specified module could not be found.
-			errorStr = "";
-		// probably going to be common. Lets try to be less cryptic.
-		else if ( e == 0xC1 ) // [193 (0xC1)] is not a valid Win32 application.
-			errorStr = va( "[%i (0x%X)]\t%s", e, e, "probably the DLL is of the wrong architecture, like x64 instead of x86" );
-
-		if ( errorStr.Length() )
-			common->Warning( "LoadLibrary(%s) Failed ! %s", dllName, errorStr.c_str() );
+		common->Warning( "LoadLibrary( \"%s\" ) Failed ! [%i (0x%X)]\t%s", dllName, e, e, msgBuf );
 
 		::LocalFree( msgBuf );
 	}