Snapshotting config branch

DeepLearnPhysics · Dec 11, 2020 · 89d591a · 89d591a
1 parent 547f35a
commit 89d591a
Show file tree

Hide file tree

Showing 21 changed files with 619 additions and 205 deletions.
diff --git a/data_loader_api.py b/data_loader_api.py
@@ -0,0 +1,26 @@
+import larcv
+
+
+# This script is an example of what a larcv dataloader should look like.
+
+
+dl = larcv.dataloader()
+
+# Add several input streams.
+# Each input stream is from a file (or, maybe, set of files if distributed?)
+dl.add_input_stream(key="train", file="example_train_file.h5")
+dl.add_input_stream(key="test", file="example_test_file.h5")
+
+
+# Set target outputs for each stream.
+dl.add_output_stream(
+    keys="all",             # This is default, but can be configured
+    input_format="image2d", # This needs to be a larcv3 dataformat
+    input_label="sbndwire", # This is the larcv key under which this is stored.
+    output_format="dense",  # This can be "dense" for dense images, "sparse" for SCN, "graph" for torch_geometric
+    framework = "numpy",    # This can be "numpy", "torch", "tf"/"tensorflow"
+    preload = True          # If set, it will preload data to the GPU/accelerator
+)
+
+
+dl.configure() # Calling this will configure the C++ code and begin loading data
diff --git a/larcv/dataloader.py b/larcv/dataloader.py
@@ -0,0 +1,43 @@
+import sys,time,os,signal
+import numpy
+import larcv
+import random
+
+class dataloader(object)
+
+    '''Dataloader for larcv3
+    
+    This class manages the data loading from larcv, including preprocessing
+
+    To configure this, you create the dataloader with a configuration dictionary like so:
+    {
+        'train': {
+            'data' : {
+                'input_data' : 'sparse2d'
+                'preprocess' : {
+                    'Downsample' : ....
+                }
+            }
+        }
+    }
+
+    '''
+
+    def __init__(self, verbose=False, random_access_mode="random_blocks", seed=None):
+        '''init function
+
+        Not much to store here, just a dict of dataloaders and the keys to access their data.
+
+        Queue loaders are manually triggered IO, not always running, so
+        '''
+        object.__init__(self)
+
+        # Hold a config for each process stream (for example, "train" or "test")
+        self.config = {}
+        self._queue_processor = {}
+        self._batch_holder    = {}
+
+
+    def add_data_stream(self, configure):
+
+
diff --git a/src/larcv3/app/CMakeLists.txt b/src/larcv3/app/CMakeLists.txt
@@ -1,4 +1,4 @@
 add_subdirectory(filter)
 add_subdirectory(queueio)
-add_subdirectory(imagemod)
+# add_subdirectory(imagemod)
 # add_subdirectory(sbnd_imagemod)
diff --git a/src/larcv3/app/queueio/BatchData.cxx b/src/larcv3/app/queueio/BatchData.cxx
@@ -4,6 +4,7 @@
 #include "BatchData.h"
 #include "larcv3/core/base/larcv_logger.h"
 #include "larcv3/core/base/larbys.h"
+#include "larcv3/core/dataformat/Particle.h"
 #include <sstream>
 
 namespace larcv3 {
@@ -19,30 +20,30 @@ namespace larcv3 {
     return _data;
   }
 
-  template<class T>
-  pybind11::array_t<T> BatchData<T>::pydata()
-  {
-    if (_state != BatchDataState_t::kBatchStateFilled) {
-      LARCV_SCRITICAL() << "Current batch state: " << (int)this->state()
-                        << " not ready to expose data!" << std::endl;
-      throw larbys();
-    }
-
-    // First, create the buffer object:
-    // Cast the dimensions to std::array:
-    std::array<size_t, 1> dimensions;
-    dimensions[0] = _data.size();
-    // Allocate a spot to store the data:
-    auto array = pybind11::array_t<T>(
-        // _meta.number_of_voxels()[0],
-        dimensions,
-        {},
-        &(_data[0])
-      );
+  // template<class T>
+  // pybind11::array_t<T> BatchData<T>::pydata()
+  // {
+  //   if (_state != BatchDataState_t::kBatchStateFilled) {
+  //     LARCV_SCRITICAL() << "Current batch state: " << (int)this->state()
+  //                       << " not ready to expose data!" << std::endl;
+  //     throw larbys();
+  //   }
+
+  //   // First, create the buffer object:
+  //   // Cast the dimensions to std::array:
+  //   std::array<size_t, 1> dimensions;
+  //   dimensions[0] = _data.size();
+  //   // Allocate a spot to store the data:
+  //   auto array = pybind11::array_t<T>(
+  //       // _meta.number_of_voxels()[0],
+  //       dimensions,
+  //       {},
+  //       &(_data[0])
+  //     );
 
-    return array;
+  //   return array;
 
-  }
+  // }
 
   template<class T>
   size_t BatchData<T>::data_size(bool calculate) const
@@ -186,6 +187,7 @@ template class larcv3::BatchData<short>;
 template class larcv3::BatchData<int>;
 template class larcv3::BatchData<float>;
 template class larcv3::BatchData<double>;
+template class larcv3::BatchData<larcv3::Particle>;
 
 void init_batchdata(pybind11::module m){
 
@@ -211,7 +213,7 @@ void init_batchdata_(pybind11::module m){
 
 
 
-    batch_data.def("pydata",             &Class::pydata);
+    // batch_data.def("pydata",             &Class::pydata);
     batch_data.def("data",               &Class::data);
     batch_data.def("dim",                &Class::dim);
     batch_data.def("dense_dim",          &Class::dense_dim);

diff --git a/src/larcv3/app/queueio/BatchData.h b/src/larcv3/app/queueio/BatchData.h
@@ -50,7 +50,7 @@ namespace larcv3 {
     std::vector<T> & writeable_data() {return _data;}
 
 #ifdef LARCV_INTERNAL
-    pybind11::array_t<T> pydata();
+    // pybind11::array_t<T> pydata();
 #endif
 
     inline const std::vector<int>& dim() const { return _dim; }

diff --git a/src/larcv3/app/queueio/BatchDataQueue.cxx b/src/larcv3/app/queueio/BatchDataQueue.cxx
@@ -4,6 +4,7 @@
 #include "BatchDataQueue.h"
 #include "larcv3/core/base/larcv_logger.h"
 #include "larcv3/core/base/larbys.h"
+#include "larcv3/core/dataformat/Particle.h"
 
 namespace larcv3 {
 
@@ -62,6 +63,7 @@ template class larcv3::BatchDataQueue<short>;
 template class larcv3::BatchDataQueue<int>;
 template class larcv3::BatchDataQueue<float>;
 template class larcv3::BatchDataQueue<double>;
+template class larcv3::BatchDataQueue<larcv3::Particle>;
 // template class larcv3::BatchDataQueue<larcv3::SparseTensor<2>>;
 
 
@@ -86,6 +88,8 @@ void init_batchdataqueue(pybind11::module m){
   init_batchdataqueue<int>(m);
   init_batchdataqueue<float>(m);
   init_batchdataqueue<double>(m);
+  init_batchdataqueue<larcv3::Particle>(m);
+
 
 }
 

diff --git a/src/larcv3/app/queueio/BatchDataQueueFactory.cxx b/src/larcv3/app/queueio/BatchDataQueueFactory.cxx
@@ -3,6 +3,8 @@
 
 #include "BatchDataQueueFactory.h"
 
+#include "larcv3/core/dataformat/Particle.h"
+
 namespace larcv3 {
 
 
@@ -64,6 +66,7 @@ template<> larcv3::BatchDataQueueFactory<short>*  larcv3::BatchDataQueueFactory<
 template<> larcv3::BatchDataQueueFactory<int>*    larcv3::BatchDataQueueFactory<int>::_me    = nullptr;
 template<> larcv3::BatchDataQueueFactory<float>*  larcv3::BatchDataQueueFactory<float>::_me  = nullptr;
 template<> larcv3::BatchDataQueueFactory<double>* larcv3::BatchDataQueueFactory<double>::_me = nullptr;
+template<> larcv3::BatchDataQueueFactory<larcv3::Particle>* larcv3::BatchDataQueueFactory<larcv3::Particle>::_me = nullptr;
 // template<> larcv3::BatchDataQueueFactory<larcv3::SparseTensor<2>>* larcv3::BatchDataQueueFactory<larcv3::SparseTensor<2>>::_me = nullptr;
 // template<> larcv3::BatchDataQueueFactory<std::string>* larcv3::BatchDataQueueFactory<std::string>::_me = nullptr;
 
@@ -72,6 +75,7 @@ template class larcv3::BatchDataQueueFactory<short>;
 template class larcv3::BatchDataQueueFactory<int>;
 template class larcv3::BatchDataQueueFactory<float>;
 template class larcv3::BatchDataQueueFactory<double>;
+template class larcv3::BatchDataQueueFactory<larcv3::Particle>;
 // template class larcv3::BatchDataQueueFactory<larcv3::SparseTensor<2>>;
 // template class larcv3::BatchDataQueueFactory<std::string>;
 
@@ -96,6 +100,7 @@ void init_batchdataqueuefactory(pybind11::module m){
   init_batchdataqueuefactory<int>(m);
   init_batchdataqueuefactory<float>(m);
   init_batchdataqueuefactory<double>(m);
+  init_batchdataqueuefactory<larcv3::Particle>(m);
   // init_batchdataqueuefactory<larcv3::SparseTensor<2>>(m);
 
 }

diff --git a/src/larcv3/app/queueio/BatchFillerPIDLabel.cxx b/src/larcv3/app/queueio/BatchFillerPIDLabel.cxx
@@ -42,23 +42,7 @@ namespace larcv3 {
 
   void BatchFillerPIDLabel::_batch_end_()
   {
-    // if (logger().level() <= msg::kINFO) {
-    //   LARCV_INFO() << "Total data size: " << batch_data().data_size() << std::endl;
-
-    //   std::vector<size_t> ctr_v(_num_class, 0);
-    //   auto const& data = batch_data().data();
-    //   for (size_t i = 0; i < data.size(); ++i) {
-    //     if (data[i] < 1.) continue;
-    //     ctr_v[i % _num_class] += 1;
-    //   }
-    //   std::stringstream ss;
-    //   ss << "Class fractions (0";
-    //   for (size_t i = 1; i < _num_class; ++i) ss << "," << i;
-    //   ss << ") ... (" << ctr_v[0];
-    //   for (size_t i = 1; i < _num_class; ++i) ss << "," << ctr_v[i];
-    //   ss << ")";
-    //   LARCV_INFO() << ss.str() << std::endl;
-    // }
+
   }
 
   void BatchFillerPIDLabel::finalize()

diff --git a/src/larcv3/app/queueio/BatchFillerParticle.cxx b/src/larcv3/app/queueio/BatchFillerParticle.cxx
@@ -0,0 +1,71 @@
+#ifndef __LARCV3THREADIO_BATCHFILLERPARTICLE_CXX__
+#define __LARCV3THREADIO_BATCHFILLERPARTICLE_CXX__
+
+#include "BatchFillerParticle.h"
+#include <random>
+
+namespace larcv3 {
+
+  static BatchFillerParticleProcessFactory __global_BatchFillerParticleProcessFactory__;
+
+  BatchFillerParticle::BatchFillerParticle(const std::string name)
+    : BatchFillerTemplate<larcv3::Particle>(name)
+  {}
+
+  void BatchFillerParticle::configure(const json& cfg){
+    config = this -> default_config();
+    config = augment_default_config(config, cfg);
+  }
+
+  void BatchFillerParticle::initialize(){}
+
+  bool BatchFillerParticle::process(IOManager& mgr){
+
+    std::string producer = config["ParticleProducer"].get<std::string>();
+    // Fetch the particles:
+    auto const& event_part = mgr.get_data<larcv3::EventParticle>(producer);
+
+    // Refresh the dimension:
+    std::vector<int> dim(2);
+    dim[0] = batch_size();
+    dim[1] = event_part.size();
+    set_dim(dim);
+
+
+    // labels
+    auto const& part_v = event_part.as_vector();
+    if (part_v.size() != 1) {
+      LARCV_CRITICAL() << "Only support single particle label now: EventParticle size != 1" << std::endl;
+      throw larbys();
+    }
+    // class
+    size_t label = kINVALID_SIZE;
+    int pdg = 0;
+
+    _entry_data.resize(1);
+    _entry_data.at(0) = part_v.front();
+
+
+    set_entry_data(_entry_data);
+
+    return true;
+
+
+  }
+
+  void BatchFillerParticle::_batch_begin_(){
+    if (!batch_data().dim().empty() && (int)(batch_size()) != batch_data().dim().front()) {
+      auto dim = batch_data().dim();
+      dim[0] = batch_size();
+      this->set_dim(dim);
+    }
+
+  }
+
+  void BatchFillerParticle::_batch_end_(){}
+
+  void BatchFillerParticle::finalize(){}
+
+}
+
+#endif