Make household travel survey summaries
Usage
hts_summary(
prepped_dt,
summarize_var,
summarize_by = NULL,
summarize_vartype = "categorical",
id_cols = c("hh_id", "person_id", "day_id", "trip_id", "vehicle_id"),
weighted = TRUE,
se = FALSE,
wtname = NULL,
strataname = NULL,
checkbox_valname = "value",
checkbox_yesval = 1
)
Arguments
- prepped_dt
A prepared dataset in data.table format with the variable to summarize, the variable to summarize by, and the weights, if used.
- summarize_var
Name of the variable to summarize. Default is NULL.
- summarize_by
Name of the variable to summarize the summarize_var by. Default is NULL.
- summarize_vartype
String; one of either 'categorical' (when the variable being summarized is categorical), 'checkbox' (when the variable being summarized is derived from a multiple response, aka select-all-that-apply question) or 'numeric', when the variable being summarized is numeric.
- id_cols
names of possible ids in prepped_dt to return unique counts of
- weighted
Whether the data is weighted. Default is TRUE.
- se
Whether to calculate standard error. Default is FALSE. Will be set to FALSE if weighted is FALSE.
- wtname
Name of the weight column to use. Default is NULL. Must be specified when weighted = TRUE.
- strataname
Name of strata name to bring in. Default is NULL.
- checkbox_valname
Name of the column with the checkbox value. Default is 'value'. Must be provided if summarize_var is a checkbox variable.
- checkbox_yesval
Value of checkbox_valname that indicates it was selected. Default is 1. Must be provided if summarize_var is a checkbox variable.
Value
A list containing (if applicable) categorical and numeric summaries of the specified variable(s), as well as sample sizes and whether or not the summarized variable is a shared checkbox variable. To access the categorical/numeric df use output$summary. To access the weighted df use output$summary$wtd, and output$summary$unwtd for the unweighted df. To access the weight name use output$summary$weight_name. To access sample sizes use output$n_ls. To access weighted and unweighted sample sizes respectively, use output$n_ls$wtd and output$n_ls$unwtd.
Examples
require(data.table)
require(stringr)
require(dplyr)
#> Loading required package: dplyr
#> Warning: package 'dplyr' was built under R version 4.3.2
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:data.table':
#>
#> between, first, last
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
require(srvyr)
#> Loading required package: srvyr
#> Warning: package 'srvyr' was built under R version 4.3.2
#>
#> Attaching package: 'srvyr'
#> The following object is masked from 'package:stats':
#>
#> filter
DT = hts_prep_variable(
summarize_var = "age",
summarize_by = "employment",
variables_dt = variable_list,
data = list(
"hh" = hh,
"person" = person,
"day" = day,
"trip" = trip,
"vehicle" = vehicle
)
)$cat
output = hts_summary(
prepped_dt = DT,
summarize_var = "age",
summarize_by = "employment",
summarize_vartype = "categorical",
wtname = "person_weight"
)
DT = hts_prep_variable(
summarize_var = "speed_mph",
summarize_by = "age",
variables_dt = variable_list,
data = list(
"hh" = hh,
"person" = person,
"day" = day,
"trip" = trip,
"vehicle" = vehicle
)
)$num
#> Warning: 378 outliers were removed based on the threshold of 0.975.
output = hts_summary(
prepped_dt = DT,
summarize_var = "speed_mph",
summarize_by = "age",
summarize_vartype = "numeric",
wtname = "trip_weight"
)
DT = hts_prep_variable(
summarize_var = "race",
summarize_by = "age",
variables_dt = variable_list,
data = list(
"hh" = hh,
"person" = person,
"day" = day,
"trip" = trip,
"vehicle" = vehicle
)
)$cat
output = hts_summary(
prepped_dt = DT,
summarize_var = "race",
summarize_by = "age",
summarize_vartype = "checkbox",
wtname = "person_weight"
)