diff --git a/pyclustering/cluster/xmeans.py b/pyclustering/cluster/xmeans.py index 2a9705ad..860a1711 100755 --- a/pyclustering/cluster/xmeans.py +++ b/pyclustering/cluster/xmeans.py @@ -602,26 +602,26 @@ def __bayesian_information_criterion(self, clusters, centers): dimension = len(self.__pointer_data[0]) # estimation of the noise variance in the data set - sigma_sqrt = 0.0 + sigma_sq = 0.0 K = len(clusters) N = 0.0 for index_cluster in range(0, len(clusters), 1): for index_object in clusters[index_cluster]: - sigma_sqrt += self.__metric(self.__pointer_data[index_object], centers[index_cluster]) + sigma_sq += self.__metric(self.__pointer_data[index_object], centers[index_cluster]) N += len(clusters[index_cluster]) if N - K > 0: - sigma_sqrt /= (N - K) + sigma_sq /= (N - K) p = (K - 1) + dimension * K + 1 # in case of the same points, sigma_sqrt can be zero (issue: #407) sigma_multiplier = 0.0 - if sigma_sqrt <= 0.0: + if sigma_sq <= 0.0: sigma_multiplier = float('-inf') else: - sigma_multiplier = dimension * 0.5 * log(sigma_sqrt) + sigma_multiplier = dimension * 0.5 * log(sigma_sq) # splitting criterion for index_cluster in range(0, len(clusters), 1): @@ -629,10 +629,10 @@ def __bayesian_information_criterion(self, clusters, centers): L = n * log(n) - n * log(N) - n * 0.5 * log(2.0 * numpy.pi) - n * sigma_multiplier - (n - K) * 0.5 - # BIC calculation - scores[index_cluster] = L - p * 0.5 * log(N) + scores[index_cluster] = L - return sum(scores) + # BIC calculation + return sum(scores) - p * 0.5 * log(N) def __verify_arguments(self):