CIS565-Fall-2015 · nadaOuf · Sep 8, 2015 · Sep 8, 2015 · Sep 8, 2015 · Sep 8, 2015
diff --git a/Project1-Part1/src/kernel.cu b/Project1-Part1/src/kernel.cu
@@ -28,7 +28,7 @@ void checkCUDAError(const char *msg, int line = -1) {
  *****************/
 
 /*! Block size used for CUDA kernel launch. */
-#define blockSize 128
+#define blockSize 64
 
 /*! Mass of one "planet." */
 #define planetMass 3e8f
@@ -39,7 +39,6 @@ void checkCUDAError(const char *msg, int line = -1) {
 /*! Size of the starting area in simulation space. */
 const float scene_scale = 1e2;
 
-
 /***********************************************
  * Kernel state (pointers are device pointers) *
  ***********************************************/
@@ -113,6 +112,8 @@ void Nbody::initSimulation(int N) {
     numObjects = N;
     dim3 fullBlocksPerGrid((N + blockSize - 1) / blockSize);
 
+	printf("number of blocks per grid %i, %i, %i \n", fullBlocksPerGrid.x, fullBlocksPerGrid.y, fullBlocksPerGrid.z);
+
     cudaMalloc((void**)&dev_pos, N * sizeof(glm::vec3));
     checkCUDAErrorWithLine("cudaMalloc dev_pos failed!");
 
@@ -168,29 +169,42 @@ void Nbody::copyPlanetsToVBO(float *vbodptr) {
 /******************
  * stepSimulation *
  ******************/
+ /**
+* Helper function to calculate the gravity influence of a body at a certain position
+*/
+__device__ glm::vec3 gravity(glm::vec3 position, glm::vec3 bodyPosition, float mass) {
+
+	//Get the distance
+	glm::vec3 direction = bodyPosition - position;
+	float rSquared = glm::dot(direction, direction);
+
+	direction = glm::normalize(direction);
+
+	if(rSquared < 0.01f) {
+		return glm::vec3(0.0f);
+	} 
+
+	float g =  (G*mass) / rSquared;
+
+	return direction * g;
+} 
 
 /**
  * Compute the acceleration on a body at `my_pos` due to the `N` bodies in the array `other_planets`.
  */
 __device__  glm::vec3 accelerate(int N, int iSelf, glm::vec3 this_planet, const glm::vec3 *other_planets) {
-    // TODO: Compute the acceleration on `my_pos` due to:
-    //   * The star at the origin (with mass `starMass`)
-    //   * All of the *other* planets (with mass `planetMass`)
-    // Return the sum of all of these contributions.
-
-    // HINT: You may want to write a helper function that will compute the acceleration at
-    //   a single point due to a single other mass. Be careful that you protect against
-    //   division by very small numbers.
-    // HINT: Use Newtonian gravitational acceleration:
-    //       G M
-    //  g = -----
-    //       r^2
-    //  where:
-    //    * G is the universal gravitational constant (already defined for you)
-    //    * M is the mass of the other object
-    //    * r is the distance between this object and the other object
 
-    return glm::vec3(0.0f);
+	glm::vec3 accel = glm::vec3(0.0f);
+	//Add the force due to the star at the origin
+	accel += gravity(this_planet, glm::vec3(0.0f), starMass);
+
+	for(int i = 0; i < N; ++i) {
+		if(i == iSelf)
+			continue;
+		accel += gravity(this_planet, other_planets[i], planetMass);			
+	}
+
+    return accel;
 }
 
 /**
@@ -201,20 +215,45 @@ __global__ void kernUpdateAcc(int N, float dt, const glm::vec3 *pos, glm::vec3 *
     // TODO: implement updateAccArray.
     // This function body runs once on each CUDA thread.
     // To avoid race conditions, each instance should only write ONE value to `acc`!
+	int index = threadIdx.x + (blockIdx.x * blockDim.x);
+
+	if(index < N) {
+		acc[index] = accelerate(N, index, pos[index], pos);
+	}
 }
 
 /**
  * For each of the `N` bodies, update its velocity, then update its position, using a
  * simple Euler integration scheme. Acceleration must be updated before calling this kernel.
  */
 __global__ void kernUpdateVelPos(int N, float dt, glm::vec3 *pos, glm::vec3 *vel, const glm::vec3 *acc) {
-    // TODO: implement updateVelocityPosition
+
+	int index = threadIdx.x + (blockIdx.x * blockDim.x);
+
+	if(index < N) {
+		vel[index] = vel[index] + acc[index]*dt;
+		pos[index] = pos[index] + vel[index]*dt;
+	}
 }
 
 /**
  * Step the entire N-body simulation by `dt` seconds.
  */
-void Nbody::stepSimulation(float dt) {
-    // TODO: Using the CUDA kernels you wrote above, write a function that
-    // calls the kernels to perform a full simulation step.
-}
+float Nbody::stepSimulation(float dt) {
+
+	dim3 fullBlocksPerGrid((numObjects + blockSize - 1) / blockSize);
+	cudaEvent_t start, stop;
+    cudaEventCreate(&start);
+    cudaEventCreate(&stop);
+    cudaEventRecord(start, 0);
+
+	kernUpdateAcc<<<fullBlocksPerGrid, blockSize>>>(numObjects, dt, dev_pos, dev_acc);
+	kernUpdateVelPos<<<fullBlocksPerGrid, blockSize>>>(numObjects, dt, dev_pos, dev_vel, dev_acc);
+
+	cudaEventRecord(stop, 0);
+    cudaEventSynchronize(stop);
+	float elapsedTime; 
+    cudaEventElapsedTime(&elapsedTime , start, stop);
+
+	return elapsedTime;
+}
diff --git a/Project1-Part1/src/kernel.h b/Project1-Part1/src/kernel.h
@@ -7,6 +7,6 @@
 
 namespace Nbody {
 void initSimulation(int N);
-void stepSimulation(float dt);
+float stepSimulation(float dt);
 void copyPlanetsToVBO(float *vbodptr);
 }
diff --git a/Project1-Part1/src/main.cpp b/Project1-Part1/src/main.cpp
@@ -14,7 +14,7 @@
 
 #define VISUALIZE 1
 
-const int N_FOR_VIS = 5000;
+const int N_FOR_VIS = 500;
 const float DT = 0.2f;
 
 /**
@@ -183,6 +183,7 @@ void initShaders(GLuint * program) {
 //====================================
 // Main loop
 //====================================
+float kernalElapsedTime = 0;
 void runCUDA() {
     // Map OpenGL buffer object for writing from CUDA on a single GPU
     // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not
@@ -193,7 +194,7 @@ void runCUDA() {
     cudaGLMapBufferObject((void**)&dptrvert, planetVBO);
 
     // execute the kernel
-    Nbody::stepSimulation(DT);
+    kernalElapsedTime += Nbody::stepSimulation(DT);
 #if VISUALIZE
     Nbody::copyPlanetsToVBO(dptrvert);
 #endif
@@ -204,12 +205,13 @@ void runCUDA() {
 void mainLoop() {
     double fps = 0;
     double timebase = 0;
-    int frame = 0;
+    int frame = 0, totalFrames = 0;
 
     while (!glfwWindowShouldClose(window)) {
         glfwPollEvents();
 
-        frame++;
+        ++frame;
+		++totalFrames;
         double time = glfwGetTime();
 
         if (time - timebase > 1.0) {
@@ -237,12 +239,17 @@ void mainLoop() {
 
         glUseProgram(0);
         glBindVertexArray(0);
+		glfwSwapBuffers(window);
 #endif
 
-        glfwSwapBuffers(window);
+
     }
     glfwDestroyWindow(window);
     glfwTerminate();
+
+	std::cout << "Kernal timing : " << (kernalElapsedTime/(totalFrames*100)) << std::endl;
+	std::cout << "Number of frames : " << totalFrames << std::endl;
+
 }
 
 

diff --git a/Project1-Part2/.gitignore b/Project1-Part2/.gitignore
diff --git a/Project1-Part2/CMakeLists.txt b/Project1-Part2/CMakeLists.txt
@@ -0,0 +1,86 @@
+cmake_minimum_required(VERSION 3.0)
+
+project(cis565_matrix_math)
+
+set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
+
+# Set up include and lib paths
+set(EXTERNAL "external")
+include_directories("${EXTERNAL}/include")
+include_directories("${EXTERNAL}/src")
+if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    set(EXTERNAL_LIB_PATH "${EXTERNAL}/lib/osx")
+elseif(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
+    set(EXTERNAL_LIB_PATH "${EXTERNAL}/lib/linux" "/usr/lib64")
+elseif(WIN32)
+    set(EXTERNAL_LIB_PATH "${EXTERNAL}/lib/win")
+endif()
+link_directories(${EXTERNAL_LIB_PATH})
+list(APPEND CMAKE_LIBRARY_PATH "${EXTERNAL_LIB_PATH}")
+
+# Find up and set up core dependency libs
+
+set(GLFW_INCLUDE_DIR "${EXTERNAL}/include")
+set(GLFW_LIBRARY_DIR "${CMAKE_LIBRARY_PATH}")
+find_library(GLFW_LIBRARY "glfw3" HINTS "${GLFW_LIBRARY_DIR}")
+
+set(GLEW_INCLUDE_DIR "${EXTERNAL}/include")
+set(GLEW_LIBRARY_DIR "${CMAKE_LIBRARY_PATH}")
+add_definitions(-DGLEW_STATIC)
+find_package(GLEW)
+
+find_package(OpenGL)
+
+set(CORELIBS
+    "${GLFW_LIBRARY}"
+    "${OPENGL_LIBRARY}"
+    "${GLEW_LIBRARY}"
+    )
+
+# Enable C++11 for host code
+set(CMAKE_CXX_STANDARD 11)
+
+list(APPEND CUDA_NVCC_FLAGS -G -g)
+
+# OSX-specific hacks/fixes
+if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    list(APPEND CORELIBS "-framework IOKit")
+    list(APPEND CORELIBS "-framework Cocoa")
+    list(APPEND CORELIBS "-framework CoreVideo")
+endif()
+
+# Linux-specific hacks/fixes
+if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
+    list(APPEND CMAKE_EXE_LINKER_FLAGS "-lX11 -lXxf86vm -lXrandr -lpthread -lXi")
+endif()
+
+# Crucial magic for CUDA linking
+find_package(Threads REQUIRED)
+find_package(CUDA REQUIRED)
+
+set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE ON)
+set(CUDA_SEPARABLE_COMPILATION ON)
+
+if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    set(CUDA_PROPAGATE_HOST_FLAGS OFF)
+endif()
+
+add_subdirectory(src)
+
+cuda_add_executable(${CMAKE_PROJECT_NAME}
+    "src/main.hpp"
+    "src/main.cpp"
+    )
+
+target_link_libraries(${CMAKE_PROJECT_NAME}
+    src
+    ${CORELIBS}
+    )
+
+add_custom_command(
+    TARGET ${CMAKE_PROJECT_NAME}
+    POST_BUILD
+    COMMAND ${CMAKE_COMMAND} -E copy_directory
+        ${CMAKE_SOURCE_DIR}/shaders
+        ${CMAKE_BINARY_DIR}/shaders
+    )
diff --git a/Project1-Part2/GNUmakefile b/Project1-Part2/GNUmakefile
@@ -0,0 +1,31 @@
+CMAKE_ALT1 := /usr/local/bin/cmake
+CMAKE_ALT2 := /Applications/CMake.app/Contents/bin/cmake
+CMAKE := $(shell \
+	which cmake 2>/dev/null || \
+	([ -e ${CMAKE_ALT1} ] && echo "${CMAKE_ALT1}") || \
+	([ -e ${CMAKE_ALT2} ] && echo "${CMAKE_ALT2}") \
+	)
+
+all: RelWithDebugInfo
+
+
+Debug: build
+	(cd build && ${CMAKE} -DCMAKE_BUILD_TYPE=$@ .. && make)
+
+MinSizeRel: build
+	(cd build && ${CMAKE} -DCMAKE_BUILD_TYPE=$@ .. && make)
+
+Release: build
+	(cd build && ${CMAKE} -DCMAKE_BUILD_TYPE=$@ .. && make)
+
+RelWithDebugInfo: build
+	(cd build && ${CMAKE} -DCMAKE_BUILD_TYPE=$@ .. && make)
+
+
+build:
+	(mkdir -p build && cd build)
+
+clean:
+	((cd build && make clean) 2>&- || true)
+
+.PHONY: all Debug MinSizeRel Release RelWithDebugInfo clean