diff --git a/DESCRIPTION b/DESCRIPTION
index 5fe64b4..da1a8c3 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -22,14 +22,17 @@ Imports:
     survey
 Suggests:
     testthat
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
 Collate:
     'anthro-package.R'
     'utils.R'
+    'prevalence-survey.R'
+    'prevalence-simple.R'
     'assertions.R'
     'prevalence.R'
     'z-score-helper.R'
     'api.R'
+    'prevalence-compute-api.R'
     'z-score-arm-circumference-for-age.R'
     'z-score-bmi-for-age.R'
     'z-score-head-circumference-for-age.R'
diff --git a/NAMESPACE b/NAMESPACE
index fbf96f3..d004407 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,15 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(column_names,simple_design)
+S3method(column_names,survey_design)
+S3method(column_values,simple_design)
+S3method(column_values,survey_design)
+S3method(compute_prevalence_estimates_for_column_by,simple_design)
+S3method(compute_prevalence_estimates_for_column_by,survey_design)
+S3method(compute_prevalence_sample_size_by,simple_design)
+S3method(compute_prevalence_sample_size_by,survey_design)
+S3method(compute_prevalence_zscore_summaries_by,simple_design)
+S3method(compute_prevalence_zscore_summaries_by,survey_design)
 export(anthro_api_compute_prevalence)
 export(anthro_api_compute_zscore)
 export(anthro_api_compute_zscore_adjusted)
@@ -7,8 +17,14 @@ export(anthro_api_standardize_oedema_var)
 export(anthro_api_standardize_sex_var)
 export(anthro_prevalence)
 export(anthro_zscores)
+importFrom(stats,aggregate)
 importFrom(stats,as.formula)
 importFrom(stats,confint)
+importFrom(stats,glm)
+importFrom(stats,plogis)
+importFrom(stats,qt)
+importFrom(stats,quasibinomial)
+importFrom(stats,sd)
 importFrom(stats,setNames)
 importFrom(survey,degf)
 importFrom(survey,svyby)
diff --git a/NEWS.md b/NEWS.md
index 3bb93bd..126f29b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,10 @@
 # anthro (development version)
 
+## Performance
+
+* For inputs with `cluster/strata = NULL` and `sw = NULL or 1` a faster
+  method was added for this special case to compute `anthro_prevalence`.
+
 ## Bugfix
 
 * Fixed a bug during prevalence computation when a value used as a group
diff --git a/R/prevalence-compute-api.R b/R/prevalence-compute-api.R
new file mode 100644
index 0000000..8dd4d05
--- /dev/null
+++ b/R/prevalence-compute-api.R
@@ -0,0 +1,40 @@
+#' In order to support different types of computation for special cases of the
+#' data we define here a compute API that different backends can hook into.
+#' @noRd
+NULL
+
+#' Computes the sample size by indicator and subset
+#' @noRd
+compute_prevalence_sample_size_by <- function(
+    data, indicator, subset_col_name
+) {
+  UseMethod("compute_prevalence_sample_size_by")
+}
+
+#' Computes prevalence of rates for a given indicator, subset and cutoff
+#' @noRd
+compute_prevalence_estimates_for_column_by <- function(
+    data, indicator_name, subset_col_name, prev_col_name
+){
+  UseMethod("compute_prevalence_estimates_for_column_by")
+}
+
+#' Computes prevalence of zscores by indicator and subset
+#' @noRd
+compute_prevalence_zscore_summaries_by <- function(
+    data, indicator, subset_col_name
+) {
+  UseMethod("compute_prevalence_zscore_summaries_by")
+}
+
+#' Returns the values for a specific column
+#' @noRd
+column_values <- function(x, col) {
+  UseMethod("column_values")
+}
+
+#' Returns the column names of the data
+#' @noRd
+column_names <- function(x) {
+  UseMethod("column_names")
+}
diff --git a/R/prevalence-simple.R b/R/prevalence-simple.R
new file mode 100644
index 0000000..6d694b5
--- /dev/null
+++ b/R/prevalence-simple.R
@@ -0,0 +1,168 @@
+#' The following is a fast approximation of what the survey package
+#' computes given cluster/strata/sw is NULL.
+#'
+#' T. Lumley (2024) "survey: analysis of complex survey samples".
+#' R package version 4.4.
+#'
+#' The speedup and memory savings can be substantial in particular for very
+#' large surveys with > 100.000 observations.
+#'
+#' The simple here refers to the absence of cluster/strata/sw, making the
+#' structure a bit more simple.
+#' @noRd
+
+#' @export
+column_names.simple_design <- function(x) {
+  colnames(x$data)
+}
+
+#' @export
+column_values.simple_design <- function(x, col) {
+  x$data[[col]]
+}
+
+#' @export
+compute_prevalence_zscore_summaries_by.simple_design <- function(
+    data, indicator, subset_col_name) {
+  zscore_col_name <- prev_zscore_value_column(indicator)
+  compute_and_aggregate(
+    data,
+    zscore_col_name,
+    subset_col_name,
+    compute = zscore_estimate,
+    empty_data_prototype = data.frame(
+      r = NA_real_,
+      se = NA_real_,
+      ll = NA_real_,
+      ul = NA_real_,
+      stdev = NA_real_
+    )
+  )
+}
+
+#' @export
+compute_prevalence_estimates_for_column_by.simple_design <- function(
+    data, indicator_name, subset_col_name, prev_col_name) {
+  compute_and_aggregate(
+    data,
+    prev_col_name,
+    subset_col_name,
+    compute = logit_rate_estimate,
+    empty_data_prototype = data.frame(
+      r = NA_real_,
+      se = NA_real_,
+      ll = NA_real_,
+      ul = NA_real_
+    )
+  )
+}
+
+#' @export
+compute_prevalence_sample_size_by.simple_design <- function(
+    data, indicator, subset_col_name) {
+  column_name <- prev_prevalence_column_name(indicator)
+  compute_and_aggregate(
+    data,
+    column_name,
+    subset_col_name,
+    compute = sample_size,
+    empty_data_prototype = data.frame(
+      pop = 0,
+      unwpop = 0
+    )
+  )
+}
+
+#' @importFrom stats aggregate
+compute_and_aggregate <- function(
+    data, value_column, subset_column, compute, empty_data_prototype) {
+  col_values <- column_values(data, value_column)
+  N <- length(col_values)
+  values <- aggregate(
+    col_values,
+    by = list(Group = column_values(data, subset_column)),
+    FUN = compute,
+    simplify = FALSE,
+    drop = FALSE,
+    N = N,
+    empty_data_prototype = empty_data_prototype
+  )
+  Group <- values$Group
+  values <- lapply(values$x, function(df) {
+    if (is.null(df)) {
+      return(empty_data_prototype)
+    }
+    df
+  })
+  data <- do.call(rbind, values)
+  cbind(Group, data)
+}
+
+#' @importFrom stats glm plogis qt quasibinomial
+logit_rate_estimate <- function(x, N, empty_data_prototype) {
+  x_orig <- x
+  x <- x[!is.na(x)]
+  if (length(x) == 0) {
+    return(empty_data_prototype)
+  }
+  n <- length(x)
+  r <- mean(x == 1)
+  se <- sample_se(x, r, n, N)
+
+  cis <- if (r == 1) {
+    c(1, 1)
+  } else if (r == 0) {
+    c(0, 0)
+  } else {
+    glm_model <- glm(x_orig ~ 1, family = quasibinomial(link = "logit"))
+    summary_model <- summary(glm_model)
+    r_var <- summary_model$cov.unscaled[1, 1]
+    logit_r <- summary_model$coefficients[1, "Estimate"]
+    scale <- N / (N - 1)
+    se_logit <- sqrt(scale * r_var)
+    t_value <- qt(1 - prevalence_significance_level / 2, df = N - 1)
+    cis <- plogis(
+      logit_r + se_logit * c(-t_value, t_value)
+    )
+    as.numeric(cis)
+  }
+
+  data.frame(
+    r = r * 100,
+    se = se * 100,
+    ll = cis[1L] * 100,
+    ul = cis[2L] * 100
+  )
+}
+
+#' @importFrom stats sd plogis qt
+zscore_estimate <- function(x, N, empty_data_prototype) {
+  x <- x[!is.na(x)]
+  if (length(x) == 0) {
+    return(empty_data_prototype)
+  }
+  n <- length(x)
+  r <- mean(x)
+  stdev <- sd(x)
+  se <- sample_se(x, r, n, N)
+  t_value <- qt(1 - prevalence_significance_level / 2, N - 1)
+  cis <- r + se * c(-t_value, t_value)
+  data.frame(
+    r = r,
+    se = se,
+    ll = cis[1L],
+    ul = cis[2L],
+    stdev = stdev
+  )
+}
+
+sample_size <- function(x, N, empty_data_prototype) {
+  n <- as.numeric(sum(!is.na(x)))
+  data.frame(pop = n, unwpop = n)
+}
+
+sample_se <- function(x, x_mean, n, N) {
+  scale <- N / (N - 1)
+  x_centered_and_scaled <- (x - x_mean) / n * sqrt(scale)
+  sqrt(sum(x_centered_and_scaled^2))
+}
diff --git a/R/prevalence-survey.R b/R/prevalence-survey.R
new file mode 100644
index 0000000..eced62a
--- /dev/null
+++ b/R/prevalence-survey.R
@@ -0,0 +1,142 @@
+#' @export
+column_values.survey_design <- function(x, col) {
+  x$design$variables[[col]]
+}
+
+#' @export
+column_names.survey_design <- function(x) {
+  colnames(x$design$variables)
+}
+
+#' @export
+compute_prevalence_zscore_summaries_by.survey_design <- function(
+    data, indicator, subset_col_name) {
+  zscore_col_name <- prev_zscore_value_column(indicator)
+  zscore_formula <- as.formula(paste0("~", zscore_col_name))
+  subset_formula <- as.formula(paste0("~", subset_col_name))
+  mean_est_summary <- svyby(
+    zscore_formula,
+    subset_formula,
+    data$design,
+    svymean,
+    na.rm = TRUE,
+    na.rm.all = TRUE,
+    drop.empty.groups = FALSE
+  )
+  mean_est_ci_summary <- confint(
+    mean_est_summary,
+    df = degf(data$design),
+    level = 1 - prevalence_significance_level
+  )
+
+  # when there is only one observation in the subset, svyvar returns a
+  # length 2 object which breaks svyby. We thus provide a robust
+  # version of svyvar that returns NA if a length 2 object is returned.
+  # this will be fixed in survey >= 4.2
+  robust_svyvar <- function(x, design, na.rm = FALSE, ...) {
+    res <- svyvar(x, design, na.rm = na.rm, ...)
+    if (length(res) == 2) {
+      new_res <- NA_real_
+      attr(new_res, "names") <- prev_zscore_value_column(indicator)
+      attr(new_res, "var") <- NA_real_
+      attr(new_res, "statistic") <- "variance"
+      class(new_res) <- c("svyvar", "svystat", "numeric")
+      return(new_res)
+    }
+    res
+  }
+  mean_est_sd_summary <- svyby(
+    zscore_formula,
+    subset_formula,
+    data$design,
+    robust_svyvar,
+    na.rm = TRUE,
+    na.rm.all = TRUE,
+    drop.empty.groups = FALSE
+  )
+  data.frame(
+    Group = as.character(mean_est_summary[[subset_col_name]]),
+    r = mean_est_summary[[prev_zscore_value_column(indicator)]],
+    se = survey::SE(mean_est_summary),
+    ll = mean_est_ci_summary[, 1L, drop = TRUE],
+    ul = mean_est_ci_summary[, 2L, drop = TRUE],
+    stdev = sqrt(mean_est_sd_summary[, 2L, drop = TRUE]),
+    stringsAsFactors = FALSE
+  )
+}
+
+
+#' @export
+compute_prevalence_sample_size_by.survey_design <- function(
+    data, indicator, subset_col_name) {
+  expr_name <- paste0("I(!is.na(", prev_prevalence_column_name(indicator), "))")
+  prev_formula <- as.formula(paste0("~", expr_name))
+  subset_formula <- as.formula(paste0("~", subset_col_name))
+  prev <- svyby(
+    prev_formula,
+    subset_formula,
+    data$design,
+    svytotal,
+    drop.empty.groups = FALSE
+  )
+  unweighted_prev <- svyby(
+    prev_formula,
+    subset_formula,
+    data$design_unweighted,
+    svytotal,
+    drop.empty.groups = FALSE
+  )
+  pop_weighted <- prev[[paste0(expr_name, "TRUE")]]
+  pop_unweighted <- unweighted_prev[[paste0(expr_name, "TRUE")]]
+
+  # syby returns NA for empty levels. We set the count to 0 for these.
+  pop_weighted[is.na(pop_weighted)] <- 0
+  pop_unweighted[is.na(pop_unweighted)] <- 0
+
+  stopifnot( # check that subsets come in the right order
+    all(prev[[subset_col_name]] == unweighted_prev[[subset_col_name]])
+  )
+  data.frame(
+    Group = as.character(prev[[subset_col_name]]),
+    pop = pop_weighted,
+    unwpop = pop_unweighted,
+    stringsAsFactors = FALSE
+  )
+}
+
+
+#' @export
+compute_prevalence_estimates_for_column_by.survey_design <- function(
+    data, indicator_name, subset_col_name, prev_col_name) {
+  subset_formula <- as.formula(paste0("~", subset_col_name))
+  prev_col_formula <- as.formula(paste0("~", prev_col_name))
+  mean_est_prev <- svyby(
+    prev_col_formula,
+    subset_formula,
+    data$design,
+    svymean,
+    na.rm = TRUE,
+    na.rm.all = TRUE,
+    drop.empty.groups = FALSE
+  )
+  mean_est_ci_prev <- svyby(
+    prev_col_formula,
+    subset_formula,
+    data$design,
+    svyciprop,
+    vartype = "ci",
+    df = degf(data$design),
+    method = "logit",
+    drop.empty.groups = FALSE,
+    na.rm.all = TRUE,
+    level = 1 - prevalence_significance_level
+  )[, 3L:4L]
+  data.frame(
+    Group = as.character(mean_est_prev[[subset_col_name]]),
+    r = mean_est_prev[[prev_col_name]] * 100,
+    se = survey::SE(mean_est_prev) * 100,
+    ll = mean_est_ci_prev$ci_l * 100,
+    ul = mean_est_ci_prev$ci_u * 100,
+    stringsAsFactors = FALSE
+  )
+}
diff --git a/R/prevalence.R b/R/prevalence.R
index 3b60427..805ba6a 100644
--- a/R/prevalence.R
+++ b/R/prevalence.R
@@ -131,7 +131,6 @@
 #'  \item{_se}{Standard error}
 #' }
 #'
-#'
 #' \strong{For example:}
 #' \describe{
 #'   \item{WHZ_pop}{Weight-for-height weighted sample size}
@@ -147,8 +146,26 @@
 #'   interval limit}
 #' }
 #'
+#' \strong{Methods:}
+#'
+#' The \code{survey} package is used to compute all estimates.
+#' Confidence intervals are computed with a significance level of \code{0.05}.
+#' \link[survey]{svymean} is used the z-score mean and CIs, while for the
+#' prevalence estimates \link[survey]{svyciprop} is used with the \code{logit}
+#' method.
+#'
+#' For cases where \code{cluster/strata/sw is NULL} a fast approximation
+#' is used that does not use the \code{survey} package but computes the same
+#' results within bounds (maximum empirical difference is < 0.0001).
+#'
+#' @references
+#' T. Lumley (2024) "survey: analysis of complex survey samples".
+#' R package version 4.4.
+#'
 #' @include anthro-package.R
 #' @include assertions.R
+#' @include prevalence-simple.R
+#' @include prevalence-survey.R
 #' @importFrom stats confint
 #' @importFrom stats as.formula
 #' @importFrom stats setNames
@@ -382,6 +399,7 @@ anthro_prevalence <- function(sex,
   )
 }
 
+prevalence_significance_level <- 0.05
 
 #' @importFrom survey svytotal
 #' @importFrom survey svyby
@@ -460,7 +478,8 @@ compute_prevalence_of_zscores <- function(data,
   res <- set_flagged_zscores_to_NA(zscores_to_compute, data)
   res <- create_zscore_auxiliary_columns(zscores_to_compute, res)
   res <- create_cutoff_columns_for_each_zscore(zscores_to_compute, res)
-  survey_design <- build_survey_design(res)
+
+  survey_design <- build_design(res)
 
   final_result <- compute_prevalence_for_each_subset_and_indicator(
     subset_col_names = survey_subsets,
@@ -486,43 +505,22 @@ generate_cutoffs <- function(data_column) {
   })
 }
 
-#' @importFrom survey svydesign
-build_survey_design <- function(survey_data) {
-  has_cluster_info <- !is.null(survey_data[["cluster"]])
-  has_strata_info <- !is.null(survey_data[["strata"]])
-  has_sampling_weights <- !is.null(survey_data[["sampling_weights"]])
-  just_one_cluster <- has_cluster_info && length(unique(survey_data[["cluster"]])) == 1L
-  any_strata_na <- anyNA(survey_data[["strata"]])
-  stopifnot(!any_strata_na)
-  cluster_formula <- if (just_one_cluster || !has_cluster_info) {
-    ~1
-  } else {
-    ~cluster
-  }
-  strata_formula <- if (has_strata_info) {
-    ~strata
+build_design <- function(survey_data) {
+  # we can override the choice here using internal options.
+  # these options are not part of the public api and can change at any time.
+  if (isTRUE(getOption("anthro.internal.compute_method") == "survey")) {
+    return(build_survey_design(survey_data))
   }
-  sampling_weights_formula <- if (has_sampling_weights) {
-    ~sampling_weights
+  is_simple_problem <- is.null(survey_data[["cluster"]]) &&
+    is.null(survey_data[["strata"]]) &&
+    (
+      is.null(survey_data[["sampling_weights"]]) ||
+        all(survey_data[["sampling_weights"]] == 1, na.rm = TRUE)
+    )
+  if (is_simple_problem) {
+    return(build_simple_design(survey_data))
   }
-  design <- svydesign(
-    ids = cluster_formula,
-    strata = strata_formula,
-    weights = sampling_weights_formula,
-    data = survey_data,
-    nest = TRUE
-  )
-  design_unweighted <- svydesign(
-    ids = cluster_formula,
-    strata = strata_formula,
-    weights = NULL,
-    data = survey_data,
-    nest = TRUE
-  )
-  list(
-    design = design,
-    design_unweighted = design_unweighted
-  )
+  build_survey_design(survey_data)
 }
 
 compute_prevalence_stunting_wasting <- function(survey_design, subset_col_name) {
@@ -535,7 +533,7 @@ compute_prevalence_stunting_wasting <- function(survey_design, subset_col_name)
   list(
     compute_prevalence_sample_size(survey_design, indicator, subset_col_name),
     compute_prevalence_estimates_for_column(
-      survey_design$design, indicator$name,
+      survey_design, indicator$name,
       subset_col_name, prev_prevalence_column_name(indicator)
     )
   )
@@ -551,7 +549,7 @@ compute_prevalence_stunting_overweight <- function(survey_design, subset_col_nam
   list(
     compute_prevalence_sample_size(survey_design, indicator, subset_col_name),
     compute_prevalence_estimates_for_column(
-      survey_design$design, indicator$name,
+      survey_design, indicator$name,
       subset_col_name, prev_prevalence_column_name(indicator)
     )
   )
@@ -562,7 +560,7 @@ compute_prevalence_for_each_subset_and_indicator <- function(subset_col_names,
                                                              survey_design,
                                                              indicators) {
   has_zlen_and_wfl <- all(
-    c("zlen", "zwfl_aux") %in% colnames(survey_design$design$variables)
+    c("zlen", "zwfl_aux") %in% column_names(survey_design)
   )
   # This double apply could be moved down to the survey package as it
   # also supports calculating multiple values at once.
@@ -576,10 +574,10 @@ compute_prevalence_for_each_subset_and_indicator <- function(subset_col_names,
             survey_design, indicator, subset_col_name
           )),
           compute_prevalence_cutoff_summaries(
-            survey_design$design, indicator, subset_col_name
+            survey_design, indicator, subset_col_name
           ),
           list(compute_prevalence_zscore_summaries(
-            survey_design$design, indicator, subset_col_name
+            survey_design, indicator, subset_col_name
           ))
         )
       })
@@ -655,17 +653,15 @@ create_zscore_auxiliary_columns <- function(zscores_to_compute, dataframe) {
   }
   has_zlen_and_wfl <- all(c("zlen", "zwfl_aux") %in% colnames(dataframe))
   if (has_zlen_and_wfl) {
+    zlen <- dataframe[["zlen"]]
+    zwfl_aux <- dataframe[["zwfl_aux"]]
+    is_na <- is.na(zlen) | is.na(zwfl_aux)
+    res <- rep.int(0L, length(zlen))
+    res[is_na] <- NA_integer_
     make_combined_aux_col <- function(flag_fun) {
-      as.integer(
-        with(
-          dataframe,
-          ifelse(
-            is.na(zlen) | is.na(zwfl_aux),
-            NA_integer_,
-            ifelse(zlen < -2 & flag_fun(zwfl_aux), 1L, 0L)
-          )
-        )
-      )
+      res_local <- res
+      res_local[zlen < -2 & flag_fun(zwfl_aux)] <- 1L
+      res_local
     }
     dataframe[["zlen_zwfl_aux_stunting_wasting"]] <- make_combined_aux_col(
       flag_fun = function(x) x < -2
@@ -677,6 +673,7 @@ create_zscore_auxiliary_columns <- function(zscores_to_compute, dataframe) {
   dataframe
 }
 
+
 set_flagged_zscores_to_NA <- function(indicators, dataframe) {
   for (indicator in indicators) {
     zscore_col <- prev_zscore_value_column(indicator)
@@ -703,43 +700,11 @@ create_cutoff_columns_for_each_zscore <- function(indicators, dataframe) {
 }
 
 compute_prevalence_sample_size <- function(survey_design, indicator, subset_col_name) {
-  indicator_name <- indicator$name
-  expr_name <- paste0("I(!is.na(", prev_prevalence_column_name(indicator), "))")
-  prev_formula <- as.formula(paste0("~", expr_name))
-  subset_formula <- as.formula(paste0("~", subset_col_name))
-  prev <- svyby(
-    prev_formula,
-    subset_formula,
-    survey_design$design,
-    svytotal,
-    drop.empty.groups = FALSE
-  )
-  unweighted_prev <- svyby(
-    prev_formula,
-    subset_formula,
-    survey_design$design_unweighted,
-    svytotal,
-    drop.empty.groups = FALSE
-  )
-  pop_weighted <- prev[[paste0(expr_name, "TRUE")]]
-  pop_unweighted <- unweighted_prev[[paste0(expr_name, "TRUE")]]
-
-  # syby returns NA for empty levels. We set the count to 0 for these.
-  pop_weighted[is.na(pop_weighted)] <- 0
-  pop_unweighted[is.na(pop_unweighted)] <- 0
-
-  stopifnot( # check that subsets come in the right order
-    all(prev[[subset_col_name]] == unweighted_prev[[subset_col_name]])
-  )
-  res <- data.frame(
-    Group = as.character(prev[[subset_col_name]]),
-    pop = pop_weighted,
-    unwpop = pop_unweighted,
-    stringsAsFactors = FALSE
-  )
+  res <- compute_prevalence_sample_size_by(survey_design, indicator, subset_col_name)
 
   # due to backwards compatibility, columns HA_2_WH_2 and HA_2_WH2 will not
   # have a suffix Z for now
+  indicator_name <- indicator$name
   suffix <- if (indicator_name %in% c("HA_2_WH_2", "HA_2_WH2")) {
     c("_pop", "_unwpop")
   } else {
@@ -767,13 +732,10 @@ compute_prevalence_cutoff_summaries <- function(survey_design, indicator, subset
 }
 
 compute_prevalence_estimates_for_column <- function(survey_design, indicator_name, subset_col_name, prev_col_name) {
-  subset_formula <- as.formula(paste0("~", subset_col_name))
-  prev_col_formula <- as.formula(paste0("~", prev_col_name))
-  all_na <- all(is.na(survey_design$variables[[prev_col_name]]))
-
+  all_na <- all(is.na(column_values(survey_design, prev_col_name)))
   res <- if (all_na) {
     data.frame(
-      Group = unique_groups(survey_design$variables[[subset_col_name]]),
+      Group = unique_groups(column_values(survey_design, subset_col_name)),
       r = NA_real_,
       se = NA_real_,
       ll = NA_real_,
@@ -781,36 +743,7 @@ compute_prevalence_estimates_for_column <- function(survey_design, indicator_nam
       stringsAsFactors = FALSE
     )
   } else {
-    mean_est_prev <- svyby(
-      prev_col_formula,
-      subset_formula,
-      survey_design,
-      svymean,
-      na.rm = TRUE,
-      na.rm.all = TRUE,
-      drop.empty.groups = FALSE
-    )
-
-    mean_est_ci_prev <- svyby(
-      prev_col_formula,
-      subset_formula,
-      survey_design,
-      svyciprop,
-      vartype = "ci",
-      df = degf(survey_design),
-      method = "logit",
-      drop.empty.groups = FALSE,
-      na.rm.all = TRUE,
-      level = 0.95
-    )[, 3L:4L]
-    data.frame(
-      Group = as.character(mean_est_prev[[subset_col_name]]),
-      r = mean_est_prev[[prev_col_name]] * 100,
-      se = survey::SE(mean_est_prev) * 100,
-      ll = mean_est_ci_prev$ci_l * 100,
-      ul = mean_est_ci_prev$ci_u * 100,
-      stringsAsFactors = FALSE
-    )
+    compute_prevalence_estimates_for_column_by(survey_design, indicator_name, subset_col_name, prev_col_name)
   }
   value_col_names <- paste0(
     indicator_name, c("_r", "_se", "_ll", "_ul")
@@ -820,16 +753,15 @@ compute_prevalence_estimates_for_column <- function(survey_design, indicator_nam
   res
 }
 
+
+
 compute_prevalence_zscore_summaries <- function(survey_design,
                                                 indicator, subset_col_name) {
-  indicator_name <- indicator$name
   zscore_col_name <- prev_zscore_value_column(indicator)
-  zscore_formula <- as.formula(paste0("~", zscore_col_name))
-  subset_formula <- as.formula(paste0("~", subset_col_name))
-  all_na <- all(is.na(survey_design$variables[[zscore_col_name]]))
+  all_na <- all(is.na(column_values(survey_design, zscore_col_name)))
   res <- if (all_na) {
     data.frame(
-      Group = unique_groups(survey_design$variables[[subset_col_name]]),
+      Group = unique_groups(column_values(survey_design, subset_col_name)),
       r = NA_real_,
       se = NA_real_,
       ll = NA_real_,
@@ -838,57 +770,10 @@ compute_prevalence_zscore_summaries <- function(survey_design,
       stringsAsFactors = FALSE
     )
   } else {
-    mean_est_summary <- svyby(
-      zscore_formula,
-      subset_formula,
-      survey_design,
-      svymean,
-      na.rm = TRUE,
-      na.rm.all = TRUE,
-      drop.empty.groups = FALSE
-    )
-    mean_est_ci_summary <- confint(
-      mean_est_summary,
-      df = degf(survey_design),
-      level = 0.95
-    )
-
-    # when there is only one observation in the subset, svyvar returns a
-    # length 2 object which breaks svyby. We thus provide a robust
-    # version of svyvar that returns NA if a length 2 object is returned.
-    # this will be fixed in survey >= 4.2
-    robust_svyvar <- function(x, design, na.rm = FALSE, ...) {
-      res <- svyvar(x, design, na.rm = na.rm, ...)
-      if (length(res) == 2) {
-        new_res <- NA_real_
-        attr(new_res, "names") <- prev_zscore_value_column(indicator)
-        attr(new_res, "var") <- NA_real_
-        attr(new_res, "statistic") <- "variance"
-        class(new_res) <- c("svyvar", "svystat", "numeric")
-        return(new_res)
-      }
-      res
-    }
-    mean_est_sd_summary <- svyby(
-      zscore_formula,
-      subset_formula,
-      survey_design,
-      robust_svyvar,
-      na.rm = TRUE,
-      na.rm.all = TRUE,
-      drop.empty.groups = FALSE
-    )
-    data.frame(
-      Group = as.character(mean_est_summary[[subset_col_name]]),
-      r = mean_est_summary[[prev_zscore_value_column(indicator)]],
-      se = survey::SE(mean_est_summary),
-      ll = mean_est_ci_summary[, 1L, drop = TRUE],
-      ul = mean_est_ci_summary[, 2L, drop = TRUE],
-      stdev = sqrt(mean_est_sd_summary[, 2L, drop = TRUE]),
-      stringsAsFactors = FALSE
-    )
+    compute_prevalence_zscore_summaries_by(survey_design, indicator, subset_col_name)
   }
 
+  indicator_name <- indicator$name
   value_col_names <- paste0(
     indicator_name, c("_r", "_se", "_ll", "_ul", "_stdev")
   )
@@ -899,3 +784,53 @@ compute_prevalence_zscore_summaries <- function(survey_design,
 unique_groups <- function(values) {
   as.character(unique(values[!is.na(values)]))
 }
+
+
+build_simple_design <- function(survey_data) {
+  structure(
+    list(data = survey_data),
+    class = "simple_design"
+  )
+}
+
+#' @importFrom survey svydesign
+build_survey_design <- function(survey_data) {
+  has_cluster_info <- !is.null(survey_data[["cluster"]])
+  has_strata_info <- !is.null(survey_data[["strata"]])
+  has_sampling_weights <- !is.null(survey_data[["sampling_weights"]])
+  just_one_cluster <- has_cluster_info && length(unique(survey_data[["cluster"]])) == 1L
+  any_strata_na <- anyNA(survey_data[["strata"]])
+  stopifnot(!any_strata_na)
+  cluster_formula <- if (just_one_cluster || !has_cluster_info) {
+    ~1
+  } else {
+    ~cluster
+  }
+  strata_formula <- if (has_strata_info) {
+    ~strata
+  }
+  sampling_weights_formula <- if (has_sampling_weights) {
+    ~sampling_weights
+  }
+  design <- svydesign(
+    ids = cluster_formula,
+    strata = strata_formula,
+    weights = sampling_weights_formula,
+    data = survey_data,
+    nest = TRUE
+  )
+  design_unweighted <- svydesign(
+    ids = cluster_formula,
+    strata = strata_formula,
+    weights = NULL,
+    data = survey_data,
+    nest = TRUE
+  )
+  structure(
+    list(
+      design = design,
+      design_unweighted = design_unweighted
+    ),
+    class = "survey_design"
+  )
+}
diff --git a/man/anthro_prevalence.Rd b/man/anthro_prevalence.Rd
index 89354c7..bf3c712 100644
--- a/man/anthro_prevalence.Rd
+++ b/man/anthro_prevalence.Rd
@@ -175,7 +175,6 @@ The resulting columns are coded with a \emph{prefix},
  \item{_se}{Standard error}
 }
 
-
 \strong{For example:}
 \describe{
   \item{WHZ_pop}{Weight-for-height weighted sample size}
@@ -190,6 +189,18 @@ The resulting columns are coded with a \emph{prefix},
   weight-for-height combined (stunted & wasted) lower 95\% confidence
   interval limit}
 }
+
+\strong{Methods:}
+
+The \code{survey} package is used to compute all estimates.
+Confidence intervals are computed with a significance level of \code{0.05}.
+\link[survey]{svymean} is used the z-score mean and CIs, while for the
+prevalence estimates \link[survey]{svyciprop} is used with the \code{logit}
+method.
+
+For cases where \code{cluster/strata/sw is NULL} a fast approximation
+is used that does not use the \code{survey} package but computes the same
+results within bounds (maximum empirical difference is < 0.0001).
 }
 \description{
 Prevalence estimates according to the WHO recommended standard analysis:
@@ -243,3 +254,7 @@ colnames(res) <- c("Group", "Unweighted N", "-3SD", "-2SD", "z-score mean ")
 res
 }
 }
+\references{
+T. Lumley (2024) "survey: analysis of complex survey samples".
+R package version 4.4.
+}
diff --git a/tests/testthat/test-prevalence-simple-estimates.R b/tests/testthat/test-prevalence-simple-estimates.R
new file mode 100644
index 0000000..61f87b4
--- /dev/null
+++ b/tests/testthat/test-prevalence-simple-estimates.R
@@ -0,0 +1,41 @@
+test_that("survey and approximation yield are equal within tolerance", {
+  set.seed(1)
+  data <- data.frame(
+    sex = sample(1:2, 100, replace = TRUE),
+    age = sample(0:50, 100, replace = TRUE),
+    weight = pmax(rnorm(100, 20, 10), 1),
+    lenhei = pmax(rnorm(100, 80, 20), 30)
+  )
+  res_simple <- anthro_prevalence(
+    data$sex,
+    data$age,
+    is_age_in_month = TRUE,
+    weight = data$weight,
+    lenhei = data$lenhei
+  )
+  opts <- options("anthro.internal.compute_method" = "survey")
+  on.exit(options(opts))
+  res_survey <- anthro_prevalence(
+    data$sex,
+    data$age,
+    is_age_in_month = TRUE,
+    weight = data$weight,
+    lenhei = data$lenhei
+  )
+  expect_equal(colnames(res_simple), colnames(res_survey))
+  expect_equal(nrow(res_simple), nrow(res_survey))
+  for (col in colnames(res_simple)) {
+    expect_equal(res_simple[[!!col]], res_survey[[!!col]], tolerance = 0.0001)
+  }
+})
+
+describe("sample size", {
+  it("counts non na values", {
+    res <- sample_size(c(1, 2, NA_real_))
+    expect_s3_class(res, "data.frame")
+    expect_equal(res$pop, 2)
+    expect_equal(res$unwpop, 2)
+    expect_false(is.integer(res$pop))
+    expect_false(is.integer(res$unwpop))
+  })
+})