Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove cgo params #29

Merged
merged 1 commit into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions internal/core/src/clustering/KmeansClustering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,16 @@ KmeansClustering::Run(const milvus::proto::clustering::AnalyzeInfo& config) {
false; // all data are used for training, no need to random sampling
trained_segments_num = segment_ids.size();
}
if (train_num < num_clusters) {
LOG_WARN(msg_header_ +
"kmeans train num: {} less than num_clusters: {}, skip "
"clustering",
train_num,
num_clusters);
throw SegcoreError(ErrorCode::ClusterSkip,
"sample data num less than num clusters");
}

size_t train_size_final = train_num * dim * sizeof(T);
knowhere::TimeRecorder rc(msg_header_ + "kmeans clustering",
2 /* log level: info */);
Expand Down
110 changes: 0 additions & 110 deletions internal/core/src/clustering/analyze_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,116 +123,6 @@ DeleteAnalyze(CAnalyze analyze) {
return status;
}

CStatus
NewAnalyzeInfo(CAnalyzeInfo* c_analyze_info, CStorageConfig c_storage_config) {
try {
auto analyze_info = std::make_unique<AnalyzeInfo>();
auto& storage_config = analyze_info->storage_config;
storage_config.address = std::string(c_storage_config.address);
storage_config.bucket_name = std::string(c_storage_config.bucket_name);
storage_config.access_key_id =
std::string(c_storage_config.access_key_id);
storage_config.access_key_value =
std::string(c_storage_config.access_key_value);
storage_config.root_path = std::string(c_storage_config.root_path);
storage_config.storage_type =
std::string(c_storage_config.storage_type);
storage_config.cloud_provider =
std::string(c_storage_config.cloud_provider);
storage_config.iam_endpoint =
std::string(c_storage_config.iam_endpoint);
storage_config.cloud_provider =
std::string(c_storage_config.cloud_provider);
storage_config.useSSL = c_storage_config.useSSL;
storage_config.useIAM = c_storage_config.useIAM;
storage_config.region = c_storage_config.region;
storage_config.useVirtualHost = c_storage_config.useVirtualHost;
storage_config.requestTimeoutMs = c_storage_config.requestTimeoutMs;

*c_analyze_info = analyze_info.release();
auto status = CStatus();
status.error_code = Success;
status.error_msg = "";
return status;
} catch (std::exception& e) {
auto status = CStatus();
status.error_code = UnexpectedError;
status.error_msg = strdup(e.what());
return status;
}
}

void
DeleteAnalyzeInfo(CAnalyzeInfo c_analyze_info) {
auto info = (AnalyzeInfo*)c_analyze_info;
delete info;
}

CStatus
AppendAnalyzeInfo(CAnalyzeInfo c_analyze_info,
int64_t collection_id,
int64_t partition_id,
int64_t field_id,
int64_t task_id,
int64_t version,
const char* field_name,
enum CDataType field_type,
int64_t dim,
int64_t num_clusters,
int64_t train_size) {
try {
auto analyze_info = (AnalyzeInfo*)c_analyze_info;
analyze_info->collection_id = collection_id;
analyze_info->partition_id = partition_id;
analyze_info->field_id = field_id;
analyze_info->task_id = task_id;
analyze_info->version = version;
analyze_info->field_type = milvus::DataType(field_type);
analyze_info->field_name = field_name;
analyze_info->dim = dim;
analyze_info->num_clusters = num_clusters;
analyze_info->train_size = train_size;
return milvus::SuccessCStatus();
} catch (std::exception& e) {
return milvus::FailureCStatus(&e);
}
}

CStatus
AppendSegmentInsertFile(CAnalyzeInfo c_analyze_info,
int64_t segID,
const char* c_file_path) {
try {
auto analyze_info = (AnalyzeInfo*)c_analyze_info;
std::string insert_file_path(c_file_path);
analyze_info->insert_files[segID].emplace_back(insert_file_path);

auto status = CStatus();
status.error_code = Success;
status.error_msg = "";
return status;
} catch (std::exception& e) {
return milvus::FailureCStatus(&e);
}
}

CStatus
AppendSegmentNumRows(CAnalyzeInfo c_analyze_info,
int64_t segID,
int64_t num_rows) {
try {
auto analyze_info = (AnalyzeInfo*)c_analyze_info;
analyze_info->num_rows[segID] = num_rows;

auto status = CStatus();
status.error_code = Success;
status.error_msg = "";
return status;
} catch (std::exception& e) {
return milvus::FailureCStatus(&e);
}
}

CStatus
GetAnalyzeResultMeta(CAnalyze analyze,
char** centroid_path,
Expand Down
29 changes: 0 additions & 29 deletions internal/core/src/clustering/analyze_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,35 +28,6 @@ Analyze(CAnalyze* res_analyze,
CStatus
DeleteAnalyze(CAnalyze analyze);

CStatus
NewAnalyzeInfo(CAnalyzeInfo* c_analyze_info, CStorageConfig c_storage_config);

void
DeleteAnalyzeInfo(CAnalyzeInfo c_analyze_info);

CStatus
AppendAnalyzeInfo(CAnalyzeInfo c_analyze_info,
int64_t collection_id,
int64_t partition_id,
int64_t field_id,
int64_t task_id,
int64_t version,
const char* field_name,
enum CDataType field_type,
int64_t dim,
int64_t num_clusters,
int64_t train_size);

CStatus
AppendSegmentInsertFile(CAnalyzeInfo c_analyze_info,
int64_t segID,
const char* file_path);

CStatus
AppendSegmentNumRows(CAnalyzeInfo c_analyze_info,
int64_t segID,
int64_t num_rows);

CStatus
GetAnalyzeResultMeta(CAnalyze analyze,
char** centroid_path,
Expand Down
2 changes: 1 addition & 1 deletion internal/core/src/clustering/type_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
#include "common/type_c.h"

typedef void* CAnalyze;
typedef void* CAnalyzeInfo;
typedef void* CAnalyzeInfo;
19 changes: 19 additions & 0 deletions internal/core/unittest/test_kmeans_clustering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,25 @@ test_run() {
config["num_clusters"],
true);
}
// num clusters larger than train num
{
EXPECT_THROW(
try {
config["min_cluster_ratio"] = 0.01;
config["insert_files"] = remote_files;
config["num_clusters"] = 100000;
config["train_size"] = 25L * 1024 * 1024 * 1024; // 25GB
config["dim"] = dim;
config["num_rows"] = num_rows;
auto clusteringJob =
std::make_unique<clustering::KmeansClustering>(ctx);
clusteringJob->Run<T>(transforConfigToPB(config));
} catch (SegcoreError& e) {
ASSERT_EQ(e.get_error_code(), ErrorCode::ClusterSkip);
throw e;
},
SegcoreError);
}

// data skew
{
Expand Down
127 changes: 0 additions & 127 deletions internal/util/analyzecgowrapper/analyze_info.go

This file was deleted.

Loading