atorus-research · mstackhouse · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026
diff --git a/.gitignore b/.gitignore
@@ -13,3 +13,6 @@ scratch/
 PR.md
 .DS_Store
 TPLYR2_REQUIREMENTS.MD
+
+/.quarto/
+**/*.quarto_ipynb
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: Tplyr
 Title: A Traceability Focused Grammar of Clinical Data Summary
-Version: 1.3.0
+Version: 1.3.0.9000
 Authors@R: 
     c(
     person(given = "Eli",
@@ -46,7 +46,7 @@ License: MIT + file LICENSE
 URL: https://atorus-research.github.io/Tplyr/, https://github.com/atorus-research/Tplyr
 BugReports: https://github.com/atorus-research/Tplyr/issues
 Encoding: UTF-8
-Depends: R (>= 3.5.0)
+Depends: R (>= 4.1.0)
 Imports:
     rlang (>= 0.4.6),
     assertthat (>= 0.2.1),

diff --git a/R/sort.R b/R/sort.R
@@ -2,6 +2,36 @@
 
 #### Helpers ####
 
+#' Find row indices for special rows (total, missing, missing subjects)
+#'
+#' Locates the positions of total, missing, and missing subjects rows within
+#' formatted data by matching labels in the last row_label column.
+#'
+#' @param formatted_data The formatted data frame
+#' @param missing_string String identifying missing rows
+#' @param total_row_label Label for the total row
+#' @param missing_subjects_row_label Label for missing subjects row
+#'
+#' @return A named list with elements `missing`, `total`, and
+#'   `missing_subjects`, each NULL or an integer vector of row indices
+#' @noRd
+find_special_row_indices <- function(formatted_data,
+                                     missing_string = NULL,
+                                     total_row_label = NULL,
+                                     missing_subjects_row_label = NULL) {
+  label_row_ind <- which(names(formatted_data) %in%
+                           tail(vars_select(names(formatted_data),
+                                            starts_with("row_label")), 1))
+  label_values <- unlist(formatted_data[, label_row_ind])
+
+  list(
+    missing = if (!is.null(missing_string)) which(label_values %in% missing_string),
+    total = if (!is.null(total_row_label)) which(label_values %in% total_row_label),
+    missing_subjects = if (!is.null(missing_subjects_row_label))
+      which(label_values %in% missing_subjects_row_label)
+  )
+}
+
 #' Check for VARN variable
 #'
 #' For needed by variables, looks for a <by>N. e.g. VISITN
@@ -360,10 +390,20 @@ add_order_columns.shift_layer <- function(x) {
     )
   }
 
+  # Find explicit indices of special rows
+  total_row_label <- env_get(x, "total_row_label", default = NULL)
+  missing_subjects_row_label <- env_get(x, "missing_subjects_row_label", default = NULL)
+  row_inds <- find_special_row_indices(formatted_data,
+                                       total_row_label = total_row_label,
+                                       missing_subjects_row_label = missing_subjects_row_label)
+
   # The logic is the same now for a byvarn so reuse that function
   formatted_data[, paste0("ord_layer_", formatted_col_index)] <-
     get_data_order_byvarn(formatted_data, fact_df, as_name(target_var$row),
-                          formatted_col_index, total_row_sort_value = total_row_sort_value,
+                          formatted_col_index,
+                          total_index = row_inds$total,
+                          total_row_sort_value = total_row_sort_value,
+                          missing_subjects_index = row_inds$missing_subjects,
                           missing_subjects_sort_value = missing_subjects_sort_value)
 
   # BIND: Bind necessary variables back into layer
@@ -460,31 +500,18 @@ get_data_order_count <- function(formatted_data, formatted_col_index,
   # Switch for the sorting method
   if (order_count_method == "bycount") {
 
-    # Get the index of the row with the missing and total names
-    label_row_ind <- which(names(formatted_data) %in%
-                             tail(vars_select(names(formatted_data),
-                                              starts_with("row_label")), 1))
-
-    missing_index <- NULL
-    total_index <- NULL
-    missing_subjects_index <- NULL
-
-    if (!is.null(missing_string)) {
-      missing_index <- which(unlist(formatted_data[, label_row_ind]) %in% missing_string)
-    }
-    if (!is.null(total_row_label)) {
-      total_index <- which(unlist(formatted_data[, label_row_ind]) %in% total_row_label)
-    }
-    if (!is.null(missing_subjects_row_label)) {
-      missing_subjects_index <- which(unlist(formatted_data[, label_row_ind]) %in% missing_subjects_row_label)
-    }
+    # Get the indices of special rows (missing, total, missing subjects)
+    row_inds <- find_special_row_indices(formatted_data,
+                                         missing_string = missing_string,
+                                         total_row_label = total_row_label,
+                                         missing_subjects_row_label = missing_subjects_row_label)
 
     # No processing is needed here just pass in the needed info
     get_data_order_bycount(numeric_data, ordering_cols,
                            treat_var, by, cols, result_order_var, target_var,
-                           missing_index, missing_sort_value,
-                           total_index, total_row_sort_value,
-                           missing_subjects_index, missing_subjects_sort_value,
+                           row_inds$missing, missing_sort_value,
+                           row_inds$total, total_row_sort_value,
+                           row_inds$missing_subjects, missing_subjects_sort_value,
                            break_ties = break_ties,
                            numeric_cutoff = numeric_cutoff,
                            numeric_cutoff_stat = numeric_cutoff_stat,
@@ -498,7 +525,7 @@ get_data_order_count <- function(formatted_data, formatted_col_index,
 
     if (!is.null(missing_count_list)) {
       varN_name <- names(varn_df)[2]
-      varn_df[, 1] <- as.character(varn_df[, 1])
+      varn_df[[1]] <- as.character(varn_df[[1]])
       if (is.null(missing_sort_value)) {
         varn_df <- varn_df %>%
           bind_rows(tibble(
@@ -514,8 +541,16 @@ get_data_order_count <- function(formatted_data, formatted_col_index,
       }
     }
 
+    # Find the explicit indices of special rows
+    row_inds <- find_special_row_indices(formatted_data,
+                                         total_row_label = total_row_label,
+                                         missing_subjects_row_label = missing_subjects_row_label)
+
     get_data_order_byvarn(formatted_data, varn_df, as_name(target_var[[1]]),
-                          formatted_col_index, total_row_sort_value = total_row_sort_value,
+                          formatted_col_index,
+                          total_index = row_inds$total,
+                          total_row_sort_value = total_row_sort_value,
+                          missing_subjects_index = row_inds$missing_subjects,
                           missing_subjects_sort_value = missing_subjects_sort_value)
 
   # Here it is 'byfactor'
@@ -568,9 +603,17 @@ get_data_order_count <- function(formatted_data, formatted_col_index,
       }
     }
 
+    # Find the explicit indices of special rows
+    row_inds <- find_special_row_indices(formatted_data,
+                                         total_row_label = total_row_label,
+                                         missing_subjects_row_label = missing_subjects_row_label)
+
     # The logic is the same now for a byvarn so reuse that function
     get_data_order_byvarn(formatted_data, fact_df, as_name(target_var[[1]]),
-                          formatted_col_index, total_row_sort_value = total_row_sort_value,
+                          formatted_col_index,
+                          total_index = row_inds$total,
+                          total_row_sort_value = total_row_sort_value,
+                          missing_subjects_index = row_inds$missing_subjects,
                           missing_subjects_sort_value = missing_subjects_sort_value)
   }
 }
@@ -667,37 +710,40 @@ get_data_order_bycount <- function(numeric_data, ordering_cols,
 }
 
 get_data_order_byvarn <- function(formatted_data, by_varn_df, by_var, by_column_index,
-                                  indentation = "", total_row_sort_value = NULL,
+                                  indentation = "",
+                                  total_index = NULL, total_row_sort_value = NULL,
+                                  missing_subjects_index = NULL,
                                   missing_subjects_sort_value = NULL) {
 
   # Pull out the by values in the formatted data.
   by_values <- unlist(formatted_data[, by_column_index])
 
+  # Look up the VARN value for each row. Unmatched rows get a default
+  # value that places them at the end.
+  default_sort <- max(unlist(by_varn_df[, 2])) + 1
   varns <- map_dbl(by_values, function(a_by) {
 
     # Row containing the index and the value
     ind_row <- by_varn_df %>%
-      # Converting by_var to a symbol didn't work here for some reason but this
-      # works just as well.
       filter(.data[[as_name(by_var)]] == a_by)
 
-    # If the row is length zero it is a total row. Just add one so it appears on the bottom or use the sort_value
     if (nrow(ind_row) == 0) {
-      # Flag to determine where total row is positioned
-      if(!is.null(total_row_sort_value)) {
-        total_row_sort_value
-      } else if (!is.null(missing_subjects_sort_value)){
-        missing_subjects_sort_value
-      } else {
-        max(by_varn_df[,2]) + 1
-      }
+      default_sort
     } else {
-      # Index is always in the second row
       as.double(unlist(ind_row[, 2]))
     }
   })
 
-  # Remove the names so its just an unnamed numeric vecetor
+  # Apply sort values to specific total and missing subjects rows by index
+  if (!is.null(total_index) && !is.null(total_row_sort_value)) {
+    varns[total_index] <- total_row_sort_value
+  }
+
+  if (!is.null(missing_subjects_index) && !is.null(missing_subjects_sort_value)) {
+    varns[missing_subjects_index] <- missing_subjects_sort_value
+  }
+
+  # Remove the names so its just an unnamed numeric vector
   unname(varns)
 
 }
@@ -775,11 +821,12 @@ add_data_order_nested_vectorized <- function(formatted_data, final_col, numeric_
     #   - by[[1]] becomes row_label1 (contains NA for outer rows)
     #   - summary_var becomes row_label{length(by)+1} (contains actual outer values)
     # So we need to look at column final_col + 1, not final_col
-    all_outer$..outer_order <- all_outer %>%
-      replace_by_string_names(c(by, quo(summary_var))) %>%
-      get_data_order_byvarn(varn_df, by[[1]], final_col + 1,
-                            total_row_sort_value = total_row_sort_value,
-                            missing_subjects_sort_value = missing_subjects_sort_value)
+    renamed_outer <- all_outer %>%
+      replace_by_string_names(c(by, quo(summary_var)))
+    # No total/missing subjects rows expected at the outer nesting level,
+    # so no indices are passed
+    all_outer$..outer_order <-
+      get_data_order_byvarn(renamed_outer, varn_df, by[[1]], final_col + 1)
 
   } else if (order_count_method[1] == "bycount") {
     all_outer$..outer_order <- get_data_order_bycount(

diff --git a/R/utils.R b/R/utils.R
@@ -256,11 +256,13 @@ extract_character_from_quo <- function(var_list) {
 #' @param dat Dataframe to strip of variable attributes
 #'
 #' @return Dataframe with variable attributes removed, except for factor levels
+#'   and other structurally necessary attributes (e.g. units for difftime,
+#'   tzone for POSIXct)
 #' @noRd
 clean_attr <- function(dat) {
   for (n in names(dat)) {
     for (a in names(attributes(dat[[n]]))) {
-      if (!a  %in% c('levels', 'class', 'names', 'row.names', 'groups')) {
+      if (!a  %in% c('levels', 'class', 'names', 'row.names', 'groups', 'units', 'tzone')) {
         attr(dat[[n]], a) <- NULL
       }
     }

diff --git a/tests/testthat/test-sort.R b/tests/testthat/test-sort.R
@@ -300,6 +300,47 @@ test_that("Nested counts with by variables process properly", {
 
 })
 
+# Added to address #202
+test_that("byvarn sorting with a total row only applies sort_value to the total row", {
+  t <- tplyr_table(tplyr_adsl, TRT01A) %>%
+    add_layer(
+      group_count(AGEGR1) %>%
+        set_order_count_method("byvarn") %>%
+        add_total_row(fmt = f_str("xx", n), count_missings = FALSE, sort_value = -Inf) %>%
+        set_total_row_label("n")
+    )
+  b_t <- build(t)
+
+  # The total row should have -Inf, other rows should have their AGEGR1N values
+  total_mask <- b_t$row_label1 == "n"
+  expect_true(all(b_t$ord_layer_1[total_mask] == -Inf))
+  expect_true(all(b_t$ord_layer_1[!total_mask] != -Inf))
+
+  # Non-total rows should have distinct sort values matching AGEGR1N
+  non_total_sorts <- b_t$ord_layer_1[!total_mask]
+  agegr1n_values <- sort(unique(tplyr_adsl$AGEGR1N))
+  expect_equal(sort(non_total_sorts), agegr1n_values)
+
+  # Same test with a by variable and set_missing_count, which exercises
+  # the as.character() coercion path for varn_df
+  t2 <- tplyr_table(tplyr_adsl, TRT01A) %>%
+    add_layer(
+      group_count(AGEGR1, by = "Age Categories n (%)") %>%
+        set_order_count_method("byvarn") %>%
+        add_total_row(fmt = f_str("xx", n), count_missings = FALSE, sort_value = -Inf) %>%
+        set_total_row_label("n") %>%
+        set_missing_count(fmt = f_str("xx", n), denom_ignore = TRUE, "Missing" = NA)
+    )
+  b_t2 <- build(t2)
+
+  total_mask2 <- b_t2$row_label2 == "n"
+  missing_mask2 <- b_t2$row_label2 == "Missing"
+  data_mask2 <- !total_mask2 & !missing_mask2
+
+  expect_true(all(b_t2$ord_layer_2[total_mask2] == -Inf))
+  expect_equal(sort(b_t2$ord_layer_2[data_mask2]), sort(unique(tplyr_adsl$AGEGR1N)))
+})
+
 test_that("Sorting functions work correctly after refactoring", {
   # Test all three sorting methods to ensure functionality is preserved
 

diff --git a/vignettes/desc.Rmd b/vignettes/desc.Rmd
@@ -186,8 +186,12 @@ tplyr_table(tplyr_adsl, TRT01P) %>%
     group_desc(AGE) %>% 
       set_format_strings("Mean" = f_str('xx.xx', mean))
   ) %>% 
-  build() %>% 
+  build() %>%
   kable()
 ```
 
+```{r echo=FALSE}
+options(tplyr.custom_summaries = NULL)
+```
+
 Note that the table code used to produce the output is the same. Now **Tplyr** used the custom summary function for `mean` as specified in the `tplyr.custom_summaries` option. Also note the use of `rlang::quos()`. We've done our best to mask this from the user everywhere possible and make the interfaces clean and intuitive, but a great deal of **Tplyr** is built using 'rlang' and non-standard evaluation. Within this option is one of the very few instances where a user needs to concern themselves with the use of quosures. If you'd like to learn more about non-standard evaluation and quosures, we recommend [Section IV](https://adv-r.hadley.nz/metaprogramming.html) in Advanced R. 
diff --git a/vignettes/options.Rmd b/vignettes/options.Rmd
@@ -258,7 +258,11 @@ tplyr_table(tplyr_adsl, TRT01P) %>%
   kable()
 ```
 
-## IBM Rounding 
+```{r include=FALSE}
+options(op)
+```
+
+## IBM Rounding
 
 In certain cases users may want to match tables produced by other languages that IBM rounding.**Tplyr** offers the option 'tplyr.IBMRounding' to change the default rounding behavior of **Tplyr** tables. Review var1_4 in the tables below.