Skip to content

Commit

Permalink
[feat] return trajectories data if -M S or cfg.debuglevel contains S
Browse files Browse the repository at this point in the history
  • Loading branch information
fangq committed Mar 7, 2024
1 parent 10ea5ea commit ebefe98
Show file tree
Hide file tree
Showing 11 changed files with 338 additions and 62 deletions.
35 changes: 32 additions & 3 deletions mmclab/mmclab.m
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
%#############################################################################%
%
% Format:
% [fluence,detphoton,ncfg,seeds]=mmclab(cfg);
% [fluence,detphoton,ncfg,seeds,traj]=mmclab(cfg);
% or
% fluence=mmclab(cfg);
% newcfg=mmclab(cfg,'prep');
% [fluence,detphoton,ncfg,seeds]=mmclab(cfg, options);
% [fluence,detphoton,ncfg,seeds,traj]=mmclab(cfg, options);
%
% Input:
% cfg: a struct, or struct array. Each element in cfg defines
Expand Down Expand Up @@ -166,6 +166,9 @@
% 'wp'- weighted scattering counts to build mus Jacobian (replay mode)
% cfg.debuglevel: debug flag string, a subset of [MCBWDIOXATRPE], no space
% cfg.debugphoton: print the photon movement debug info only for a specified photon ID
% cfg.maxjumpdebug: [10000000|int] when trajectory is requested in the output,
% use this parameter to set the maximum position stored. By default,
% only the first 1e6 positions are stored.
%
% fields marked with * are required; options in [] are the default values
% fields marked with - are calculated if not given (can be faster if precomputed)
Expand Down Expand Up @@ -205,6 +208,14 @@
% seeds: (optional), if give, mmclab returns the seeds, in the form of
% a byte array (uint8) for each detected photon. The column number
% of seed equals that of detphoton.
% trajectory: (optional), if given, mmclab returns the trajectory data for
% each simulated photon. The output has 6 rows, the meanings are
% id: 1: index of the photon packet
% pos: 2-4: x/y/z/ of each trajectory position
% 5: current photon packet weight
% 6: enclosing element's ID
% By default, mcxlab only records the first 1e7 positions along all
% simulated photons; change cfg.maxjumpdebug to define a different limit.
%
% Example:
% cfg.nphoton=1e5;
Expand Down Expand Up @@ -457,7 +468,25 @@
end
end

if (mmcout >= 3 || (~isempty(cfg) && isstruct(cfg) && isfield(cfg, 'debuglevel') && ~isempty(regexp(cfg(1).debuglevel, '[sS]', 'once'))))
for i = 1:length(varargout{4})
data = varargout{4}.data;
if (isempty(data))
continue
end
traj.pos = data(2:4, :).';
traj.id = typecast(data(1, :), 'uint32').';
[traj.id, idx] = sort(traj.id);
traj.pos = traj.pos(idx, :);
traj.eid = typecast(data(6, :), 'uint32').';
traj.data = [single(traj.id)'; data(2:end, idx)];
newtraj(i) = traj;
end
if (exist('newtraj', 'var'))
varargout{4} = newtraj;
end
end

if(nargout>=4)
[varargout{3:end}]=deal(varargout{[end 3:end-1]});
end

59 changes: 55 additions & 4 deletions src/mmc_cl_host.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ void mmc_run_cl(mcconfig* cfg, tetmesh* mesh, raytracer* tracer) {
cl_uint* progress = NULL;
cl_uint detected = 0, workdev;

cl_uint tic, tic0, tic1, toc = 0, fieldlen;
cl_uint tic, tic0, tic1, toc = 0, fieldlen, debuglen = MCX_DEBUG_REC_LEN;

cl_context mcxcontext; // compute mcxcontext
cl_command_queue* mcxqueue; // compute command queue
Expand All @@ -89,7 +89,7 @@ void mmc_run_cl(mcconfig* cfg, tetmesh* mesh, raytracer* tracer) {
cl_uint devid = 0;
cl_mem* gnode = NULL, *gelem = NULL, *gtype = NULL, *gfacenb = NULL, *gsrcelem = NULL, *gnormal = NULL;
cl_mem* gproperty = NULL, *gparam = NULL, *gsrcpattern = NULL, *greplayweight = NULL, *greplaytime = NULL, *greplayseed = NULL; /*read-only buffers*/
cl_mem* gweight, *gdref, *gdetphoton, *gseed, *genergy, *greporter; /*read-write buffers*/
cl_mem* gweight, *gdref, *gdetphoton, *gseed, *genergy, *greporter, *gdebugdata; /*read-write buffers*/
cl_mem* gprogress = NULL, *gdetected = NULL, *gphotonseed = NULL; /*write-only buffers*/

cl_uint meshlen = ((cfg->method == rtBLBadouelGrid) ? cfg->crop0.z : mesh->ne) << cfg->nbuffer; // use 4 copies to reduce racing
Expand Down Expand Up @@ -124,10 +124,10 @@ void mmc_run_cl(mcconfig* cfg, tetmesh* mesh, raytracer* tracer) {
mesh->srcelemlen, {{cfg->bary0.x, cfg->bary0.y, cfg->bary0.z, cfg->bary0.w}},
cfg->e0, cfg->isextdet, meshlen, cfg->nbuffer, (mesh->prop + 1 + cfg->isextdet) + cfg->detnum,
(MIN((MAX_PROP - param.maxpropdet), ((mesh->ne) << 2)) >> 2), /*max count of elem normal data in const mem*/
cfg->issaveseed, cfg->seed
cfg->issaveseed, cfg->seed, cfg->maxjumpdebug
};

MCXReporter reporter = {0.f};
MCXReporter reporter = {0.f, 0};
platform = mcx_list_cl_gpu(cfg, &workdev, devices, &gpu);

if (workdev > MAX_DEVICE) {
Expand Down Expand Up @@ -155,6 +155,7 @@ void mmc_run_cl(mcconfig* cfg, tetmesh* mesh, raytracer* tracer) {
gdetected = (cl_mem*)malloc(workdev * sizeof(cl_mem));
gphotonseed = (cl_mem*)malloc(workdev * sizeof(cl_mem));
greporter = (cl_mem*)malloc(workdev * sizeof(cl_mem));
gdebugdata = (cl_mem*)malloc(workdev * sizeof(cl_mem));

gnode = (cl_mem*)malloc(workdev * sizeof(cl_mem));
gelem = (cl_mem*)malloc(workdev * sizeof(cl_mem));
Expand Down Expand Up @@ -251,6 +252,10 @@ void mmc_run_cl(mcconfig* cfg, tetmesh* mesh, raytracer* tracer) {
srand(time(0));
}

if (cfg->debuglevel & dlTraj && cfg->exportdebugdata == NULL) {
cfg->exportdebugdata = (float*)calloc(sizeof(float), (debuglen * cfg->maxjumpdebug));
}

cl_mem (*clCreateBufferNV)(cl_context, cl_mem_flags, cl_mem_flags_NV, size_t, void*, cl_int*) = (cl_mem (*)(cl_context, cl_mem_flags, cl_mem_flags_NV, size_t, void*, cl_int*)) clGetExtensionFunctionAddressForPlatform(platform, "clCreateBufferNV");

if (clCreateBufferNV == NULL) {
Expand Down Expand Up @@ -310,6 +315,10 @@ void mmc_run_cl(mcconfig* cfg, tetmesh* mesh, raytracer* tracer) {
gphotonseed[i] = NULL;
}

if (cfg->debuglevel & dlTraj) {
OCL_ASSERT(((gdebugdata[i] = clCreateBuffer(mcxcontext, RW_MEM, sizeof(float) * (debuglen * cfg->maxjumpdebug), cfg->exportdebugdata, &status), status)));
}

OCL_ASSERT(((genergy[i] = clCreateBuffer(mcxcontext, RW_MEM, sizeof(float) * (gpu[i].autothread << 1), energy, &status), status)));
OCL_ASSERT(((gdetected[i] = clCreateBuffer(mcxcontext, RW_MEM, sizeof(cl_uint), &detected, &status), status)));
OCL_ASSERT(((greporter[i] = clCreateBuffer(mcxcontext, RW_MEM, sizeof(MCXReporter), &reporter, &status), status)));
Expand Down Expand Up @@ -427,6 +436,7 @@ void mmc_run_cl(mcconfig* cfg, tetmesh* mesh, raytracer* tracer) {
IPARAM_TO_MACRO(opt, param, issaveref);
IPARAM_TO_MACRO(opt, param, isspecular);
IPARAM_TO_MACRO(opt, param, maxdetphoton);
IPARAM_TO_MACRO(opt, param, maxjumpdebug);
IPARAM_TO_MACRO(opt, param, maxmedia);
IPARAM_TO_MACRO(opt, param, maxgate);
IPARAM_TO_MACRO(opt, param, maxpropdet);
Expand Down Expand Up @@ -508,6 +518,7 @@ void mmc_run_cl(mcconfig* cfg, tetmesh* mesh, raytracer* tracer) {
OCL_ASSERT((clSetKernelArg(mcxkernel[i], 21, sizeof(cl_mem), (void*)(greplaytime + i))));
OCL_ASSERT((clSetKernelArg(mcxkernel[i], 22, sizeof(cl_mem), (void*)(greplayseed + i))));
OCL_ASSERT((clSetKernelArg(mcxkernel[i], 23, sizeof(cl_mem), (void*)(gphotonseed + i))));
OCL_ASSERT((clSetKernelArg(mcxkernel[i], 24, sizeof(cl_mem), ((cfg->debuglevel & dlTraj) ? (void*)(gdebugdata + i) : NULL) )));
}

MMC_FPRINTF(cfg->flog, "set kernel arguments complete : %d ms %d\n", GetTimeMillis() - tic, param.method);
Expand All @@ -525,6 +536,10 @@ void mmc_run_cl(mcconfig* cfg, tetmesh* mesh, raytracer* tracer) {
cfg->exportseed = (unsigned char*)malloc(cfg->maxdetphoton * (sizeof(RandType) * RAND_BUF_LEN));
}

if (cfg->debuglevel & dlTraj && cfg->exportdebugdata == NULL) {
cfg->exportdebugdata = (float*)calloc(sizeof(float), (debuglen * cfg->maxjumpdebug));
}

cfg->energytot = 0.f;
cfg->energyesc = 0.f;
cfg->runtime = 0;
Expand Down Expand Up @@ -603,6 +618,27 @@ void mmc_run_cl(mcconfig* cfg, tetmesh* mesh, raytracer* tracer) {
OCL_ASSERT((clEnqueueReadBuffer(mcxqueue[devid], greporter[devid], CL_TRUE, 0, sizeof(MCXReporter),
&rep, 0, NULL, waittoread + devid)));
reporter.raytet += rep.raytet;
reporter.jumpdebug += rep.jumpdebug;

if (cfg->debuglevel & dlTraj) {
uint debugrec = reporter.jumpdebug;

if (debugrec > 0) {
if (debugrec > cfg->maxdetphoton) {
MMC_FPRINTF(cfg->flog, S_RED "WARNING: the saved trajectory positions (%d) \
are more than what your have specified (%d), please use the --maxjumpdebug option to specify a greater number\n" S_RESET
, debugrec, cfg->maxjumpdebug);
} else {
MMC_FPRINTF(cfg->flog, "saved %u trajectory positions, total: %d\t", debugrec, cfg->maxjumpdebug + debugrec);
}

debugrec = MIN(debugrec, cfg->maxjumpdebug);
cfg->exportdebugdata = (float*)realloc(cfg->exportdebugdata, (cfg->debugdatalen + debugrec) * debuglen * sizeof(float));
OCL_ASSERT((clEnqueueReadBuffer(mcxqueue[devid], gdebugdata[devid], CL_FALSE, 0, sizeof(float)*debuglen * debugrec,
cfg->exportdebugdata + cfg->debugdatalen, 0, NULL, waittoread + devid)));
cfg->debugdatalen += debugrec;
}
}

if (cfg->issavedet) {
OCL_ASSERT((clEnqueueReadBuffer(mcxqueue[devid], gdetected[devid], CL_FALSE, 0, sizeof(uint),
Expand Down Expand Up @@ -752,12 +788,22 @@ is more than what your have specified (%d), please use the -H option to specify
}

if (cfg->issavedet && cfg->parentid == mpStandalone && cfg->exportdetected) {
cfg->his.totalphoton = cfg->nphoton;
cfg->his.unitinmm = cfg->unitinmm;
cfg->his.savedphoton = cfg->detectedcount;
cfg->his.detected = cfg->detectedcount;
cfg->his.colcount = (2 + (cfg->ismomentum > 0)) * cfg->his.maxmedia + (cfg->issaveexit > 0) * 6 + 2; /*column count=maxmedia+3*/
mesh_savedetphoton(cfg->exportdetected, (void*)(cfg->exportseed), cfg->detectedcount, (sizeof(RandType)*RAND_BUF_LEN), cfg);
}

if ((cfg->debuglevel & dlTraj) && cfg->parentid == mpStandalone && cfg->exportdebugdata) {
cfg->his.colcount = MCX_DEBUG_REC_LEN;
cfg->his.savedphoton = cfg->debugdatalen;
cfg->his.totalphoton = cfg->nphoton;
cfg->his.detected = 0;
mesh_savedetphoton(cfg->exportdebugdata, NULL, cfg->debugdatalen, 0, cfg);
}

if (cfg->issaveref) {
MMC_FPRINTF(cfg->flog, "saving surface diffuse reflectance ...");
mesh_saveweight(mesh, cfg, 1);
Expand Down Expand Up @@ -805,6 +851,10 @@ is more than what your have specified (%d), please use the -H option to specify
OCL_ASSERT(clReleaseMemObject(gsrcpattern[i]));
}

if (cfg->debuglevel & dlTraj) {
OCL_ASSERT(clReleaseMemObject(gdebugdata[i]));
}

if (greplayweight[i]) {
OCL_ASSERT(clReleaseMemObject(greplayweight[i]));
}
Expand All @@ -828,6 +878,7 @@ is more than what your have specified (%d), please use the -H option to specify
free(gprogress);
free(gdetected);
free(gphotonseed);
free(gdebugdata);
free(greporter);

free(gnode);
Expand Down
4 changes: 3 additions & 1 deletion src/mmc_cl_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,12 @@ typedef struct PRE_ALIGN(32) GPU_mcconfig {
cl_uint normbuf;
cl_int issaveseed;
cl_uint seed;
cl_uint maxjumpdebug; /**< max number of positions to be saved to save photon trajectory when -D M is used */
} MCXParam POST_ALIGN(32);

typedef struct POST_ALIGN(32) GPU_reporter {
float raytet;
float raytet;
cl_uint jumpdebug;
} MCXReporter POST_ALIGN(32);

void mmc_run_cl(mcconfig* cfg, tetmesh* mesh, raytracer* tracer);
Expand Down
53 changes: 48 additions & 5 deletions src/mmc_core.cl
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ inline __device__ __host__ float4 convert_float4_rte(float4 v) {
#define MCX_DEBUG_MOVE 1
#define MCX_DEBUG_PROGRESS 2048

#define MCX_DEBUG_REC_LEN 6 /**< number of floating points per position saved when -D M is used for trajectory */

#define MMC_UNDEFINED (3.40282347e+38F)
#define SEED_FROM_FILE -999 /**< special flag indicating to load seeds from history file */

Expand Down Expand Up @@ -318,7 +320,7 @@ typedef struct MMC_Parameter {
float nout;
float roulettesize;
int srcnum;
uint4 crop0;
uint4 crop0;
int srcelemlen;
float4 bary0;
int e0;
Expand All @@ -329,10 +331,12 @@ typedef struct MMC_Parameter {
uint normbuf;
int issaveseed;
int seed;
uint maxjumpdebug; /**< max number of positions to be saved to save photon trajectory when -D M is used */
} MCXParam __attribute__ ((aligned (16)));

typedef struct MMC_Reporter {
float raytet;
uint jumpdebug;
} MCXReporter __attribute__ ((aligned (16)));

typedef struct MCX_medium {
Expand Down Expand Up @@ -372,7 +376,7 @@ __constant__ int ifacemap[] = {1, 2, 0, 3};
#ifndef __NVCC__
enum TDebugLevel {dlMove = 1, dlTracing = 2, dlBary = 4, dlWeight = 8, dlDist = 16, dlTracingEnter = 32,
dlTracingExit = 64, dlEdge = 128, dlAccum = 256, dlTime = 512, dlReflect = 1024,
dlProgress = 2048, dlExit = 4096
dlProgress = 2048, dlExit = 4096, dlTraj = 8192
};

enum TRTMethod {rtPlucker, rtHavel, rtBadouel, rtBLBadouel, rtBLBadouelGrid};
Expand Down Expand Up @@ -561,6 +565,27 @@ __device__ void savedetphoton(__global float* n_det, __global uint* detectedphot
}
#endif

/**
* @brief Saving photon trajectory data for debugging purposes
* @param[in] p: the position/weight of the current photon packet
* @param[in] id: the global index of the photon
* @param[in] gdebugdata: pointer to the global-memory buffer to store the trajectory info
*/

__device__ void savedebugdata(ray* r, uint id, __global MCXReporter* reporter, __global float* gdebugdata, __constant MCXParam* gcfg) {
uint pos = atomic_inc(&reporter->jumpdebug);

if (pos < GPU_PARAM(gcfg, maxjumpdebug)) {
pos *= MCX_DEBUG_REC_LEN;
((uint*)gdebugdata)[pos++] = id;
gdebugdata[pos++] = r->p0.x;
gdebugdata[pos++] = r->p0.y;
gdebugdata[pos++] = r->p0.z;
gdebugdata[pos++] = r->weight;
((uint*)gdebugdata)[pos++] = r->eid;
}
}

/**
* \brief Branch-less Badouel-based SSE4 ray-tracer to advance photon by one step
*
Expand Down Expand Up @@ -1342,7 +1367,7 @@ __device__ void launchphoton(__constant MCXParam* gcfg, ray* r, __global float3*
__device__ void onephoton(unsigned int id, __local float* ppath, __constant MCXParam* gcfg, __global float3* node, __global int* elem, __global float* weight, __global float* dref,
__global int* type, __global int* facenb, __global int* srcelem, __global float4* normal, __constant Medium* gmed,
__global float* n_det, __global uint* detectedphoton, float* energytot, float* energyesc, __private RandType* ran, int* raytet, __global float* srcpattern,
__global float* replayweight, __global float* replaytime, __global RandType* photonseed) {
__global float* replayweight, __global float* replaytime, __global RandType* photonseed, __global MCXReporter* reporter, __global float* gdebugdata) {

int oldeid, fixcount = 0;
ray r = {gcfg->srcpos, gcfg->srcdir, {MMC_UNDEFINED, 0.f, 0.f}, GPU_PARAM(gcfg, e0), 0, 0, 1.f, 0.f, 0.f, 0.f, ID_UNDEFINED, 0.f};
Expand Down Expand Up @@ -1371,6 +1396,11 @@ __device__ void onephoton(unsigned int id, __local float* ppath, __constant MCXP
}

#endif

if (GPU_PARAM(gcfg, debuglevel) & dlTraj) {
savedebugdata(&r, id, reporter, gdebugdata, gcfg);
}

/*use Kahan summation to accumulate weight, otherwise, counter stops at 16777216*/
/*http://stackoverflow.com/questions/2148149/how-to-sum-a-large-number-of-float-number*/

Expand Down Expand Up @@ -1555,9 +1585,18 @@ __device__ void onephoton(unsigned int id, __local float* ppath, __constant MCXP
}
}

if (GPU_PARAM(gcfg, debuglevel) & dlTraj) {
savedebugdata(&r, id, reporter, gdebugdata, gcfg);
}

float mom = 0.f;
r.slen0 = mc_next_scatter(gmed[type[r.eid - 1]].g, &r.vec, ran, gcfg, &mom);
r.slen = r.slen0;

if (GPU_PARAM(gcfg, debuglevel) & dlTraj) {
savedebugdata(&r, id, reporter, gdebugdata, gcfg);
}

#ifdef MCX_SAVE_DETECTORS

if (GPU_PARAM(gcfg, issavedet)) {
Expand All @@ -1573,6 +1612,10 @@ __device__ void onephoton(unsigned int id, __local float* ppath, __constant MCXP
#endif
}

if (GPU_PARAM(gcfg, debuglevel) & dlTraj) {
savedebugdata(&r, id, reporter, gdebugdata, gcfg);
}

*energyesc += r.weight;
}

Expand All @@ -1583,7 +1626,7 @@ __kernel void mmc_main_loop(const int nphoton, const int ophoton,
__global float3* node, __global int* elem, __global float* weight, __global float* dref, __global int* type, __global int* facenb, __global int* srcelem, __global float4* normal,
__global float* n_det, __global uint* detectedphoton,
__global uint* n_seed, __global int* progress, __global float* energy, __global MCXReporter* reporter, __global float* srcpattern,
__global float* replayweight, __global float* replaytime, __global RandType* replayseed, __global RandType* photonseed) {
__global float* replayweight, __global float* replaytime, __global RandType* replayseed, __global RandType* photonseed, __global float* gdebugdata) {

RandType t[RAND_BUF_LEN];
int idx = get_global_id(0);
Expand All @@ -1607,7 +1650,7 @@ __kernel void mmc_main_loop(const int nphoton, const int ophoton,

onephoton(idx * nphoton + MIN(idx, ophoton) + i, sharedmem + get_local_id(0)*GPU_PARAM(gcfg, reclen), gcfg, node, elem,
weight, dref, type, facenb, srcelem, normal, gmed, n_det, detectedphoton, &energytot, &energyesc, t, &raytet,
srcpattern, replayweight, replaytime, photonseed);
srcpattern, replayweight, replaytime, photonseed, reporter, gdebugdata);
}

energy[idx << 1] = energyesc;
Expand Down
Loading

0 comments on commit ebefe98

Please sign in to comment.