Skip to content

Commit

Permalink
Replace hash map
Browse files Browse the repository at this point in the history
  • Loading branch information
johnlees committed Mar 16, 2024
1 parent 90c1ee4 commit 7c74c91
Show file tree
Hide file tree
Showing 13 changed files with 2,065 additions and 2,116 deletions.
6 changes: 2 additions & 4 deletions src/database/database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
#include "hdf5_funcs.hpp"
#include "random/random_match.hpp"

#include "robin_hood.h"

// const int deflate_level = 9;

// Helper function prototypes
Expand Down Expand Up @@ -200,9 +198,9 @@ RandomMC Database::load_random(const bool use_rc_default) {
HighFive::Group random_group = _h5_file.getGroup("/random");

// Flattened hashes
robin_hood::unordered_node_map<std::string, uint16_t> cluster_table =
ankerl::unordered_dense::map<std::string, uint16_t> cluster_table =
load_hash<std::string, uint16_t>(random_group, "table");
robin_hood::unordered_node_map<size_t, NumpyMatrix> matches =
ankerl::unordered_dense::map<size_t, NumpyMatrix> matches =
load_hash<size_t, NumpyMatrix>(random_group, "matches");

// Centroid matrix
Expand Down
2 changes: 1 addition & 1 deletion src/database/database.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <cstring>
#include <vector>
#include <string>
#include "robin_hood.h"
#include "unordered_dense.hpp"

#include <highfive/H5File.hpp>

Expand Down
14 changes: 7 additions & 7 deletions src/database/hdf5_funcs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

#include "dist/matrix.hpp"

#include "robin_hood.h"
#include "unordered_dense.hpp"
#include <highfive/H5File.hpp>

// HighFive does have support for reading/writing Eigen::Matrix
Expand All @@ -18,7 +18,7 @@
// code)
// Save a hash into a HDF5 file by saving as array of keys and values
template <typename T, typename U>
void save_hash(const robin_hood::unordered_node_map<T, U> &hash,
void save_hash(const ankerl::unordered_dense::map<T, U> &hash,
HighFive::Group &group,
const std::string &dataset_name)
{
Expand All @@ -38,7 +38,7 @@ void save_hash(const robin_hood::unordered_node_map<T, U> &hash,

// Specialisation for saving Eigen matrix keys
template <>
void save_hash<size_t, NumpyMatrix>(const robin_hood::unordered_node_map<size_t, NumpyMatrix> &hash,
void save_hash<size_t, NumpyMatrix>(const ankerl::unordered_dense::map<size_t, NumpyMatrix> &hash,
HighFive::Group &group,
const std::string &dataset_name)
{
Expand Down Expand Up @@ -74,15 +74,15 @@ void save_hash<size_t, NumpyMatrix>(const robin_hood::unordered_node_map<size_t,
// Load a hash from a HDF5 file by reading arrays of keys and values
// and re-inserting into a new hash
template <typename T, typename U>
robin_hood::unordered_node_map<T, U> load_hash(HighFive::Group &group,
ankerl::unordered_dense::map<T, U> load_hash(HighFive::Group &group,
const std::string &dataset_name)
{
std::vector<T> hash_keys;
std::vector<U> hash_values;
group.getDataSet(dataset_name + "_keys").read(hash_keys);
group.getDataSet(dataset_name + "_values").read(hash_values);

robin_hood::unordered_node_map<T, U> hash;
ankerl::unordered_dense::map<T, U> hash;
for (size_t i = 0; i < hash_keys.size(); i++)
{
hash[hash_keys[i]] = hash_values[i];
Expand All @@ -92,7 +92,7 @@ robin_hood::unordered_node_map<T, U> load_hash(HighFive::Group &group,

// Specialisation for reading in Eigen matrices
template <>
robin_hood::unordered_node_map<size_t, NumpyMatrix> load_hash(
ankerl::unordered_dense::map<size_t, NumpyMatrix> load_hash(
HighFive::Group &group,
const std::string &dataset_name)
{
Expand All @@ -104,7 +104,7 @@ robin_hood::unordered_node_map<size_t, NumpyMatrix> load_hash(
values.read(buffer);
values.getAttribute("dims").read(dims);

robin_hood::unordered_node_map<size_t, NumpyMatrix> hash;
ankerl::unordered_dense::map<size_t, NumpyMatrix> hash;
float *buffer_pos = buffer.data();
for (size_t i = 0; i < hash_keys.size(); i++)
{
Expand Down
2 changes: 1 addition & 1 deletion src/gpu/gpu_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ std::vector<Reference> create_sketches_cuda(

// Run the sketch on the GPU (serially over the batch)
for (size_t j = 0; j < batch_size; j++) {
robin_hood::unordered_map<int, std::vector<uint64_t>> usigs;
ankerl::unordered_dense::map<int, std::vector<uint64_t>> usigs;
size_t seq_length;
bool densified;
try {
Expand Down
6 changes: 3 additions & 3 deletions src/random/random_match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ const int kmeans_seed = 3019;
// Functions used in construction
double csrs(const size_t k, const bool use_rc, const size_t l1,
const size_t l2);
std::tuple<robin_hood::unordered_node_map<std::string, uint16_t>, NumpyMatrix>
std::tuple<ankerl::unordered_dense::map<std::string, uint16_t>, NumpyMatrix>
cluster_frequencies(const std::vector<Reference> &sketches,
const unsigned int n_clusters);
std::vector<double> apply_rc(const Reference &ref);
Expand Down Expand Up @@ -359,7 +359,7 @@ uint16_t nearest_neighbour(const Reference &ref,
}

// k-means
std::tuple<robin_hood::unordered_node_map<std::string, uint16_t>, NumpyMatrix>
std::tuple<ankerl::unordered_dense::map<std::string, uint16_t>, NumpyMatrix>
cluster_frequencies(const std::vector<Reference> &sketches,
const unsigned int n_clusters) {

Expand All @@ -385,7 +385,7 @@ cluster_frequencies(const std::vector<Reference> &sketches,
centroids_matrix_d(means.data(), n_clusters, N_BASES);
NumpyMatrix centroids_matrix = centroids_matrix_d.cast<float>();

robin_hood::unordered_node_map<std::string, uint16_t> cluster_map;
ankerl::unordered_dense::map<std::string, uint16_t> cluster_map;
for (size_t sketch_idx = 0; sketch_idx < sketches.size(); sketch_idx++) {
cluster_map[sketches[sketch_idx].name()] =
nearest_neighbour(sketches[sketch_idx], centroids_matrix);
Expand Down
14 changes: 7 additions & 7 deletions src/random/random_match.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#include "dist/matrix.hpp"
#include "gpu/gpu.hpp"
#include "robin_hood.h"
#include "unordered_dense.h"

const unsigned int default_max_k = 101;
const unsigned int default_n_clusters = 3;
Expand All @@ -37,9 +37,9 @@ class RandomMC {
// load MC from database (see database.cpp)
RandomMC(const bool use_rc, const unsigned int min_k,
const unsigned int max_k,
const robin_hood::unordered_node_map<std::string, uint16_t>
const ankerl::unordered_dense::map<std::string, uint16_t>
&cluster_table,
const robin_hood::unordered_node_map<size_t, NumpyMatrix> &matches,
const ankerl::unordered_dense::map<size_t, NumpyMatrix> &matches,
const NumpyMatrix &cluster_centroids)
: _n_clusters(cluster_centroids.rows()), _no_adjustment(false),
_no_MC(false), _use_rc(use_rc), _min_k(min_k), _max_k(max_k),
Expand Down Expand Up @@ -80,10 +80,10 @@ class RandomMC {
const size_t default_length) const;

// functions for saving
robin_hood::unordered_node_map<std::string, uint16_t> cluster_table() const {
ankerl::unordered_dense::map<std::string, uint16_t> cluster_table() const {
return _cluster_table;
}
robin_hood::unordered_node_map<size_t, NumpyMatrix> matches() const {
ankerl::unordered_dense::map<size_t, NumpyMatrix> matches() const {
return _matches;
}
NumpyMatrix cluster_centroids() const { return _cluster_centroids; }
Expand All @@ -110,9 +110,9 @@ class RandomMC {
unsigned int _max_k;

// name index -> cluster ID
robin_hood::unordered_node_map<std::string, uint16_t> _cluster_table;
ankerl::unordered_dense::map<std::string, uint16_t> _cluster_table;
// k-mer idx -> square matrix of matches, idx = cluster
robin_hood::unordered_node_map<size_t, NumpyMatrix> _matches;
ankerl::unordered_dense::map<size_t, NumpyMatrix> _matches;

NumpyMatrix _cluster_centroids;
};
2 changes: 1 addition & 1 deletion src/reference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ Reference::Reference(const std::string &name, const size_t bbits,
// Initialise from GPU sketch
Reference::Reference(
const std::string &name,
robin_hood::unordered_map<int, std::vector<uint64_t>> &sketch,
ankerl::unordered_dense::map<int, std::vector<uint64_t>> &sketch,
const size_t bbits, const size_t sketchsize64, const size_t seq_size,
const BaseComp<double> &bases, const unsigned long int missing_bases,
const bool use_rc, const bool densified, const bool reads)
Expand Down
6 changes: 3 additions & 3 deletions src/reference.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include <tuple>

#include <Eigen/Dense>
#include "robin_hood.h"
#include "unordered_dense.hpp"

const size_t def_bbits = 14; // = log2(sketch size) where sketch size = 64 * sketchsize64
const size_t def_sketchsize64 = 156;
Expand Down Expand Up @@ -47,7 +47,7 @@ class Reference

// Initialise from GPU sketch
Reference(const std::string &name,
robin_hood::unordered_map<int, std::vector<uint64_t>> &sketch,
ankerl::unordered_dense::map<int, std::vector<uint64_t>> &sketch,
const size_t bbits,
const size_t sketchsize64,
const size_t seq_size,
Expand Down Expand Up @@ -105,7 +105,7 @@ class Reference
BaseComp<double> _bases;

// sketch - map keys are k-mer length
robin_hood::unordered_map<int, std::vector<uint64_t>> usigs;
ankerl::unordered_dense::map<int, std::vector<uint64_t>> usigs;
std::vector<size_t> _kmers;

void set_kmers() {
Expand Down
Loading

0 comments on commit 7c74c91

Please sign in to comment.