From 955cd8aa8d9068319d26906c93391e1cc268fc3b Mon Sep 17 00:00:00 2001
From: Ernest Guevarra <ernest@guevarra.io>
Date: Thu, 26 Dec 2024 08:37:29 +0000
Subject: [PATCH] add checks for input values

---
 R/01-sample_size.R                   | 123 ++++++++++++++++++++++++---
 README.Rmd                           |   2 +-
 man/get_n_cases.Rd                   |   6 +-
 man/get_n_clusters.Rd                |  15 ++--
 pkgdown/_pkgdown.yml                 |   3 +-
 tests/testthat/test-01-sample_size.R | 105 ++++++++++++++++-------
 tests/testthat/test-utils.R          |  36 ++++++++
 7 files changed, 235 insertions(+), 55 deletions(-)
 create mode 100644 tests/testthat/test-utils.R

diff --git a/R/01-sample_size.R b/R/01-sample_size.R
index 9864544..9736899 100644
--- a/R/01-sample_size.R
+++ b/R/01-sample_size.R
@@ -22,6 +22,49 @@
 #'
 
 get_sample_n <- function(N, dLower, dUpper, alpha = 0.1, beta = 0.1) {
+  ## Check that dLower and dUpper are within 0 to 1 ----
+  if (dLower < 0 | dLower > 1) {
+    stop(
+      "The value for `dLower` should be from 0 and 1. ",
+      "Check value and try again.",
+      call. = TRUE
+    )
+  }
+
+  if (dUpper < 0 | dUpper > 1) {
+    stop(
+      "The value for `dUpper` should be from 0 and 1. ",
+      "Check value and try again.",
+      call. = TRUE
+    )
+  }
+
+  ## Check that dLower is less than dUpper ----
+  if (dLower > dUpper) {
+    stop(
+      "Value for `dLower` should be less than `dUpper`. Check values ",
+      "and try again.",
+      call. = TRUE
+    )
+  }
+
+  ## Check that alpha and beta error values are within 0 and 1 ----
+  if (alpha < 0 | alpha > 1) {
+    stop(
+      "The value for `alpha` should be from 0 and 1. ",
+      "Check value and try again.",
+      call. = TRUE
+    )
+  }
+
+  if (beta < 0 | beta > 1) {
+    stop(
+      "The value for `beta` should be from 0 and 1. ",
+      "Check value and try again.",
+      call. = TRUE
+    )
+  }  
+
   low <- ceiling(dLower  * N)
   high <- ceiling(dUpper * N)
 
@@ -73,6 +116,49 @@ get_sample_n <- function(N, dLower, dUpper, alpha = 0.1, beta = 0.1) {
 #'
 
 get_sample_d <- function(N, n, dLower, dUpper, alpha = 0.1, beta = 0.1) {
+  ## Check that dLower and dUpper are within 0 to 1 ----
+  if (dLower < 0 | dLower > 1) {
+    stop(
+      "The value for `dLower` should be from 0 and 1. ",
+      "Check value and try again.",
+      call. = TRUE
+    )
+  }
+
+  if (dUpper < 0 | dUpper > 1) {
+    stop(
+      "The value for `dUpper` should be from 0 and 1. ",
+      "Check value and try again.",
+      call. = TRUE
+    )
+  }
+
+  ## Check that dLower is less than dUpper ----
+  if (dLower > dUpper) {
+    stop(
+      "Value for `dLower` should be less than `dUpper`. Check values ",
+      "and try again.",
+      call. = TRUE
+    )
+  }
+
+  ## Check that alpha and beta error values are within 0 and 1 ----
+  if (alpha < 0 | alpha > 1) {
+    stop(
+      "The value for `alpha` should be from 0 and 1. ",
+      "Check value and try again.",
+      call. = TRUE
+    )
+  }
+
+  if (beta < 0 | beta > 1) {
+    stop(
+      "The value for `beta` should be from 0 and 1. ",
+      "Check value and try again.",
+      call. = TRUE
+    )
+  }
+
   low <- ceiling(dLower  * N)
   high <- ceiling(dUpper * N)
 
@@ -123,10 +209,10 @@ get_sample_d <- function(N, n, dLower, dUpper, alpha = 0.1, beta = 0.1) {
 #' Calculate estimated number of cases for a condition affecting children under
 #' 5 years old in a specified survey area
 #'
-#' @param N Population for all ages in the specified survey area
+#' @param N Population for all ages in the specified survey area.
 #' @param u5 Proportion (value from 0 to 1) of population that are aged 6-59
-#'   months
-#' @param p Prevalence of condition that is to be assessed
+#'   months.
+#' @param p Prevalence (value from 0 to 1) of condition that is to be assessed.
 #'
 #' @returns A numeric value of the estimated number of cases in the specified
 #'   survey area
@@ -140,6 +226,20 @@ get_sample_d <- function(N, n, dLower, dUpper, alpha = 0.1, beta = 0.1) {
 #'
 
 get_n_cases <- function(N, u5, p) {
+  if (u5 < 0 | u5 > 1) {
+    stop(
+      "The value for `u5` should be from 0 to 1. Check value and try again.",
+      call. = TRUE
+    )
+  }
+
+  if (p < 0 | p > 1) {
+    stop(
+      "The value for `p` should be from 0 to 1. Check value and try again.",
+      call. = TRUE
+    )
+  }
+
   floor(N * u5 * p)
 }
 
@@ -147,25 +247,26 @@ get_n_cases <- function(N, u5, p) {
 #' 
 #' Calculate number of clusters to sample to reach target sample size
 #'
-#' @param n Target sample size of cases for the coverage survey
-#' @param N Average cluster population for all ages in the specified survey area
+#' @param n Target sample size of cases for the coverage survey.
+#' @param n_cluster Average cluster population for all ages in the specified
+#'   survey area.
 #' @param u5 Proportion (value from 0 to 1) of population that are aged 6-59
-#'   months
-#' @param p Prevalence of condition that is to be assessed
+#'   months.
+#' @param p Prevalence (value from 0 to 1) of condition that is to be assessed.
 #'
 #' @returns A numeric value of the estimated number of clusters to sample to 
-#'   reach target sample size
+#'   reach target sample size.
 #'
 #' @examples
 #' ## Calculate number of villages to sample given an average village population
 #' ## of 600 persons of all ages with an under-5 population of 17% and a
 #' ## prevalence of SAM of 2% if the target sample size is 40
-#' get_n_clusters(n = 40, N = 600, u5 = 0.17, p = 0.02)
+#' get_n_clusters(n = 40, n_cluster = 600, u5 = 0.17, p = 0.02)
 #'
 #' @export
 #'
 
-get_n_clusters <- function(n, N, u5, p) {
-  ceiling(n / floor(N * u5 * p))
+get_n_clusters <- function(n, n_cluster, u5, p) {
+  ceiling(n / get_n_cases(N = n_cluster, u5 = u5, p = p))
 }
 
diff --git a/README.Rmd b/README.Rmd
index 8351a68..6f94af0 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -52,7 +52,7 @@ install.packages(
 
 ### Lot quality assurance sampling frame
 
-To setup an LQAS sampling frame, a target sample size is first estimated. For example, if the survey area has an estimated population of about 600 severe acute malnourished (SAME) children and you want to assess whether coverage is reaching at least 50%, the sample size can be calculated as follows:
+To setup an LQAS sampling frame, a target sample size is first estimated. For example, if the survey area has an estimated population of about 600 severe acute malnourished (SAM) children and you want to assess whether coverage is reaching at least 50%, the sample size can be calculated as follows:
 
 ```{r samp-size-1, eval = FALSE}
 get_sample_n(N = 600, dLower = 0.5, dUpper = 0.8)
diff --git a/man/get_n_cases.Rd b/man/get_n_cases.Rd
index efc290a..89e882e 100644
--- a/man/get_n_cases.Rd
+++ b/man/get_n_cases.Rd
@@ -8,12 +8,12 @@
 get_n_cases(N, u5, p)
 }
 \arguments{
-\item{N}{Population for all ages in the specified survey area}
+\item{N}{Population for all ages in the specified survey area.}
 
 \item{u5}{Proportion (value from 0 to 1) of population that are aged 6-59
-months}
+months.}
 
-\item{p}{Prevalence of condition that is to be assessed}
+\item{p}{Prevalence (value from 0 to 1) of condition that is to be assessed.}
 }
 \value{
 A numeric value of the estimated number of cases in the specified
diff --git a/man/get_n_clusters.Rd b/man/get_n_clusters.Rd
index fda6384..7be2788 100644
--- a/man/get_n_clusters.Rd
+++ b/man/get_n_clusters.Rd
@@ -4,21 +4,22 @@
 \alias{get_n_clusters}
 \title{Calculate number of clusters to sample to reach target sample size}
 \usage{
-get_n_clusters(n, N, u5, p)
+get_n_clusters(n, n_cluster, u5, p)
 }
 \arguments{
-\item{n}{Target sample size of cases for the coverage survey}
+\item{n}{Target sample size of cases for the coverage survey.}
 
-\item{N}{Average cluster population for all ages in the specified survey area}
+\item{n_cluster}{Average cluster population for all ages in the specified
+survey area.}
 
 \item{u5}{Proportion (value from 0 to 1) of population that are aged 6-59
-months}
+months.}
 
-\item{p}{Prevalence of condition that is to be assessed}
+\item{p}{Prevalence (value from 0 to 1) of condition that is to be assessed.}
 }
 \value{
 A numeric value of the estimated number of clusters to sample to
-reach target sample size
+reach target sample size.
 }
 \description{
 Calculate number of clusters to sample to reach target sample size
@@ -27,6 +28,6 @@ Calculate number of clusters to sample to reach target sample size
 ## Calculate number of villages to sample given an average village population
 ## of 600 persons of all ages with an under-5 population of 17\% and a
 ## prevalence of SAM of 2\% if the target sample size is 40
-get_n_clusters(n = 40, N = 600, u5 = 0.17, p = 0.02)
+get_n_clusters(n = 40, n_cluster = 600, u5 = 0.17, p = 0.02)
 
 }
diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml
index 275af93..d82c7e5 100644
--- a/pkgdown/_pkgdown.yml
+++ b/pkgdown/_pkgdown.yml
@@ -58,7 +58,8 @@ reference:
 
   - title: Tests for SLEAC classifier performance
     contents:
-    - starts_with("lqas")
+    - starts_with("lqas_simulate")
+    - lqas_get_class_prob
     - print.lqasClass
     - plot.lqasSim
 
diff --git a/tests/testthat/test-01-sample_size.R b/tests/testthat/test-01-sample_size.R
index 21fe711..b96cef1 100644
--- a/tests/testthat/test-01-sample_size.R
+++ b/tests/testthat/test-01-sample_size.R
@@ -1,47 +1,88 @@
-## Test that outputs are numeric
+# Tests for sample size functions ----------------------------------------------
 
-test_that("output is numeric", {
-  expect_type(get_binom_hypergeom(n = 600, k = 40), "double")
-  expect_true(is.numeric(get_binom_hypergeom(n = 600, k = 40)))
-})
+test_that("get_sample_n works as expected", {
+  samp_plan <- get_sample_n(N = 600, dLower = 0.7, dUpper = 0.9)
 
-test_that("output is numeric", {
-  expect_type(get_hypergeom(k = 5, m = 600, n = 25, N = 10000), "double")
-  expect_true(is.numeric(get_hypergeom(k = 5, m = 600, n = 25, N = 10000)))
-})
+  expect_type(samp_plan, "list")
+  expect_named(samp_plan, expected = c("n", "d", "alpha", "beta"))
+  
+  expect_error(get_sample_n(N = 600, dLower = -0.7, dUpper = 0.9))
+  expect_error(get_sample_n(N = 600, dLower = 0.7, dUpper = -0.9))
+  expect_error(get_sample_n(N = 600, dLower = 7, dUpper = 0.9))
+  expect_error(get_sample_n(N = 600, dLower = 0.7, dUpper = 9))
+  expect_error(get_sample_n(N = 600, dLower = 0.9, dUpper = 0.7))
 
-test_that("output is numeric", {
-  expect_type(
-    get_hypergeom_cumulative(k = 5, m = 600, n = 25, N = 10000), "double"
+  expect_error(
+    get_sample_n(N = 600, dLower = 0.7, dUpper = 0.9, alpha = -0.1, beta = 0.1)
+  )
+  expect_error(
+    get_sample_n(N = 600, dLower = 0.7, dUpper = 0.9, alpha = 0.1, beta = -0.1)
   )
-  expect_true(
-    is.numeric(get_hypergeom_cumulative(k = 5, m = 600, n = 25, N = 10000))
+  expect_error(
+    get_sample_n(N = 600, dLower = 0.7, dUpper = 0.9, alpha = 1.1, beta = 0.1)
+  )
+  expect_error(
+    get_sample_n(N = 600, dLower = 0.7, dUpper = 0.9, alpha = 0.1, beta = 1.1)
   )
 })
 
-## Test that output is a list
 
-test_that("output is list", {
-  expect_type(get_sample_n(N = 600, dLower = 0.7, dUpper = 0.9), "list")
-})
+test_that("get_sample_d works as expected", {
+  samp_plan <- get_sample_d(N = 600, n = 19, dLower = 0.7, dUpper = 0.9)
 
-test_that("output is list", {
-  expect_type(get_sample_d(N = 600, n = 40, dLower = 0.7, dUpper = 0.9), "list")
+  expect_type(samp_plan, "list")
+  expect_named(samp_plan, expected = c("n", "d", "alpha", "beta"))
+  
+  expect_error(get_sample_d(N = 600, n = 19, dLower = -0.7, dUpper = 0.9))
+  expect_error(get_sample_d(N = 600, n = 19, dLower = 0.7, dUpper = -0.9))
+  expect_error(get_sample_d(N = 600, n = 19, dLower = 7, dUpper = 0.9))
+  expect_error(get_sample_d(N = 600, n = 19, dLower = 0.7, dUpper = 9))
+  expect_error(get_sample_d(N = 600, n = 19, dLower = 0.9, dUpper = 0.7))
+
+  expect_error(
+    get_sample_d(
+      N = 600, n = 19, dLower = 0.7, dUpper = 0.9, alpha = -0.1, beta = 0.1
+    )
+  )
+  expect_error(
+    get_sample_d(
+      N = 600, n = 19, dLower = 0.7, dUpper = 0.9, alpha = 0.1, beta = -0.1
+    )
+  )
+  expect_error(
+    get_sample_d(
+      N = 600, n = 19, dLower = 0.7, dUpper = 0.9, alpha = 1.1, beta = 0.1
+    )
+  )
+  expect_error(
+    get_sample_d(
+      N = 600, n = 19, dLower = 0.7, dUpper = 0.9, alpha = 0.1, beta = 1.1
+    )
+  )
 })
 
-## Test that output is an numeric
 
-test_that("output is numeric", {
-  expect_type(get_n_cases(N = 100000, u5 = 0.17, p = 0.02), "double")
-  expect_true(is.numeric(get_n_cases(N = 100000, u5 = 0.17, p = 0.02)))
-})
+test_that("get_n_cases works as expected", {
+  n_cases <- get_n_cases(N = 10000, u5 = 0.17, p = 0.02)
 
-test_that("output is numeric", {
-  expect_type(
-    get_n_clusters(n = 40, N = 100000, u5 = 0.17, p = 0.02), "double"
-  )
-  expect_true(
-    is.numeric(get_n_clusters(n = 40, N = 100000, u5 = 0.17, p = 0.02))
-  )
+  expect_type(n_cases, "double")
+  expect_true(is.numeric(n_cases))
+
+  expect_error(get_n_cases(N = 10000, u5 = -0.17, p = 0.02))
+  expect_error(get_n_cases(N = 10000, u5 = 17, p = 0.02))
+  expect_error(get_n_cases(N = 10000, u5 = 0.17, p = -0.02))
+  expect_error(get_n_cases(N = 10000, u5 = 0.17, p = 2))
 })
 
+
+test_that("get_n_clusters works as expected", {
+  n_clusters <- get_n_clusters(n = 40, n_cluster = 600, u5 = 0.17, p = 0.02)
+
+  expect_type(n_clusters, "double")
+  expect_true(is.numeric(n_clusters))
+
+  expect_error(get_n_clusters(n = 40, n_cluster = 600, u5 = -0.17, p = 0.02))
+  expect_error(get_n_clusters(n = 40, n_cluster = 600, u5 = 17, p = 0.02))
+  expect_error(get_n_clusters(n = 40, n_cluster = 600, u5 = 0.17, p = -0.02))
+  expect_error(get_n_clusters(n = 40, n_cluster = 600, u5 = 0.17, p = 2))
+})
\ No newline at end of file
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
new file mode 100644
index 0000000..cee816d
--- /dev/null
+++ b/tests/testthat/test-utils.R
@@ -0,0 +1,36 @@
+# Tests for utility functions --------------------------------------------------
+
+## Test that outputs are numeric
+
+test_that("output is numeric", {
+  expect_type(get_binom_hypergeom(n = 600, k = 40), "double")
+  expect_true(is.numeric(get_binom_hypergeom(n = 600, k = 40)))
+})
+
+test_that("output is numeric", {
+  expect_type(get_hypergeom(k = 5, m = 600, n = 25, N = 10000), "double")
+  expect_true(is.numeric(get_hypergeom(k = 5, m = 600, n = 25, N = 10000)))
+})
+
+test_that("output is numeric", {
+  expect_type(
+    get_hypergeom_cumulative(k = 5, m = 600, n = 25, N = 10000), "double"
+  )
+  expect_true(
+    is.numeric(get_hypergeom_cumulative(k = 5, m = 600, n = 25, N = 10000))
+  )
+  expect_type(
+    get_hypergeom_cumulative(
+      k = 5, m = 600, n = 25, N = 10000, tail = "upper"
+    ), 
+    "double"
+  )
+  expect_true(
+    is.numeric(
+      get_hypergeom_cumulative(
+        k = 5, m = 600, n = 25, N = 10000, tail = "upper"
+      )
+    )
+  )
+})
+