From 0540d7c7e3f5366686ff110ae1073ff129d7240a Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 30 Jan 2025 01:45:18 -0500 Subject: [PATCH 1/2] [api/compat] PR #465 redefined the language classifier response struct in the API and was casting between incompatible pointer types. Using the exported struct throughout. --- src/expand.c | 4 ++-- src/language_classifier.c | 8 ++++---- src/language_classifier.h | 15 +++++---------- src/language_classifier_cli.c | 2 +- src/language_classifier_test.c | 2 +- src/libpostal.c | 2 +- src/near_dupe.c | 2 +- src/place.c | 4 ++-- src/place.h | 4 ++-- 9 files changed, 19 insertions(+), 24 deletions(-) diff --git a/src/expand.c b/src/expand.c index 898c17d12..2e1dc977c 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1567,7 +1567,7 @@ cstring_array *expand_address_phrase_option(char *input, libpostal_normalize_opt size_t len = strlen(input); - language_classifier_response_t *lang_response = NULL; + libpostal_language_classifier_response_t *lang_response = NULL; if (options.num_languages == 0) { lang_response = classify_languages(input); @@ -1627,7 +1627,7 @@ cstring_array *expand_address_phrase_option(char *input, libpostal_normalize_opt kh_destroy(str_set, unique_strings); if (lang_response != NULL) { - language_classifier_response_destroy(lang_response); + libpostal_language_classifier_response_destroy(lang_response); } char_array_destroy(temp_string); diff --git a/src/language_classifier.c b/src/language_classifier.c index 25273d920..d107d5586 100644 --- a/src/language_classifier.c +++ b/src/language_classifier.c @@ -46,7 +46,7 @@ language_classifier_t *get_language_classifier(void) { return language_classifier; } -void language_classifier_response_destroy(language_classifier_response_t *self) { +void language_classifier_response_destroy(libpostal_language_classifier_response_t *self) { if (self == NULL) return; if (self->languages != NULL) { free(self->languages); @@ -59,7 +59,7 @@ void language_classifier_response_destroy(language_classifier_response_t *self) free(self); } -language_classifier_response_t *classify_languages(char *address) { +libpostal_language_classifier_response_t *classify_languages(char *address) { language_classifier_t *classifier = get_language_classifier(); if (classifier == NULL) { @@ -88,7 +88,7 @@ language_classifier_response_t *classify_languages(char *address) { size_t n = classifier->num_labels; double_matrix_t *p_y = double_matrix_new_zeros(1, n); - language_classifier_response_t *response = NULL; + libpostal_language_classifier_response_t *response = NULL; bool model_exp = false; if (classifier->weights_type == MATRIX_DENSE) { model_exp = logistic_regression_model_expectation(classifier->weights.dense, x, p_y); @@ -129,7 +129,7 @@ language_classifier_response_t *classify_languages(char *address) { free(indices); - response = malloc(sizeof(language_classifier_response_t)); + response = malloc(sizeof(libpostal_language_classifier_response_t)); response->num_languages = num_languages; response->languages = languages; response->probs = probs; diff --git a/src/language_classifier.h b/src/language_classifier.h index c5402b390..2a638e9c1 100644 --- a/src/language_classifier.h +++ b/src/language_classifier.h @@ -6,6 +6,8 @@ #include #include +#include "libpostal.h" + #include "collections.h" #include "language_features.h" #include "logistic_regression.h" @@ -29,21 +31,14 @@ typedef struct language_classifier { } weights; } language_classifier_t; - -typedef struct language_classifier_response { - size_t num_languages; - char **languages; - double *probs; -} language_classifier_response_t; - // General usage language_classifier_t *language_classifier_new(void); language_classifier_t *get_language_classifier(void); language_classifier_t *get_language_classifier_country(void); -language_classifier_response_t *classify_languages(char *address); -void language_classifier_response_destroy(language_classifier_response_t *self); +libpostal_language_classifier_response_t *classify_languages(char *address); +void language_classifier_response_destroy(libpostal_language_classifier_response_t *self); void language_classifier_destroy(language_classifier_t *self); @@ -58,4 +53,4 @@ bool language_classifier_module_setup(char *dir); void language_classifier_module_teardown(void); -#endif \ No newline at end of file +#endif diff --git a/src/language_classifier_cli.c b/src/language_classifier_cli.c index e67be84a8..d612132bb 100644 --- a/src/language_classifier_cli.c +++ b/src/language_classifier_cli.c @@ -29,7 +29,7 @@ int main(int argc, char **argv) { } - language_classifier_response_t *response = classify_languages(address); + libpostal_language_classifier_response_t *response = classify_languages(address); if (response == NULL) { printf("Could not classify language\n"); exit(EXIT_FAILURE); diff --git a/src/language_classifier_test.c b/src/language_classifier_test.c index b795be99c..262020eed 100644 --- a/src/language_classifier_test.c +++ b/src/language_classifier_test.c @@ -34,7 +34,7 @@ double test_accuracy(char *filename) { continue; } - language_classifier_response_t *response = classify_languages(address); + libpostal_language_classifier_response_t *response = classify_languages(address); if (response == NULL || response->num_languages == 0) { printf("%s\tNULL\t%s\n", language, address); continue; diff --git a/src/libpostal.c b/src/libpostal.c index 066a3015d..a2fa9e0a7 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -119,7 +119,7 @@ char **libpostal_near_dupe_hashes_languages(size_t num_components, char **labels char **libpostal_place_languages(size_t num_components, char **labels, char **values, size_t *num_languages) { - language_classifier_response_t *lang_response = place_languages(num_components, labels, values); + libpostal_language_classifier_response_t *lang_response = place_languages(num_components, labels, values); if (lang_response == NULL) { *num_languages = 0; return NULL; diff --git a/src/near_dupe.c b/src/near_dupe.c index 06a89ac4e..f28b2140c 100644 --- a/src/near_dupe.c +++ b/src/near_dupe.c @@ -670,7 +670,7 @@ cstring_array *near_dupe_hashes_languages(size_t num_components, char **labels, libpostal_normalize_options_t normalize_options = libpostal_get_default_options(); - language_classifier_response_t *lang_response = NULL; + libpostal_language_classifier_response_t *lang_response = NULL; if (num_languages == 0) { lang_response = place_languages(num_components, labels, values); diff --git a/src/place.c b/src/place.c index 549f1f48c..f5f05037a 100644 --- a/src/place.c +++ b/src/place.c @@ -17,10 +17,10 @@ static inline bool is_address_text_component(char *label) { ); } -language_classifier_response_t *place_languages(size_t num_components, char **labels, char **values) { +libpostal_language_classifier_response_t *place_languages(size_t num_components, char **labels, char **values) { if (num_components == 0 || values == NULL || labels == NULL) return NULL; - language_classifier_response_t *lang_response = NULL; + libpostal_language_classifier_response_t *lang_response = NULL; char *label; char *value; diff --git a/src/place.h b/src/place.h index 88920582c..22d28b621 100644 --- a/src/place.h +++ b/src/place.h @@ -32,7 +32,7 @@ typedef struct place { char *website; } place_t; -language_classifier_response_t *place_languages(size_t num_components, char **labels, char **values); +libpostal_language_classifier_response_t *place_languages(size_t num_components, char **labels, char **values); place_t *place_new(void); @@ -40,4 +40,4 @@ place_t *place_from_components(size_t num_components, char **labels, char **valu void place_destroy(place_t *place); -#endif \ No newline at end of file +#endif From d491739b21089dc24f01dbaef6bba73e86130039 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 30 Jan 2025 13:13:29 -0500 Subject: [PATCH 2/2] [fix] sparse_matrix_utils also had an incompatible pointer --- src/sparse_matrix_utils.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sparse_matrix_utils.c b/src/sparse_matrix_utils.c index 53fcaf979..70be9d141 100644 --- a/src/sparse_matrix_utils.c +++ b/src/sparse_matrix_utils.c @@ -94,15 +94,15 @@ inline bool sparse_matrix_add_unique_columns_alias(sparse_matrix_t *matrix, khas } uint32_array *sparse_matrix_unique_columns(sparse_matrix_t *matrix) { - khash_t(int_set) *unique_columns = kh_init(int_set); + khash_t(int_uint32) *unique_columns = kh_init(int_uint32); uint32_array *ret = uint32_array_new(); if (sparse_matrix_add_unique_columns(matrix, unique_columns, ret)) { - kh_destroy(int_set, unique_columns); + kh_destroy(int_uint32, unique_columns); return ret; } - kh_destroy(int_set, unique_columns); + kh_destroy(int_uint32, unique_columns); uint32_array_destroy(ret); return NULL; }