Prepare datasets to make summaries
Usage
hts_prep_variable(
summarize_var = NULL,
summarize_by = NULL,
variables_dt = variable_list,
data = list(hh = hh, person = person, day = day, trip = trip, vehicle = vehicle),
id_cols = c("hh_id", "person_id", "day_id", "trip_id", "vehicle_id"),
weighted = TRUE,
wt_cols = c("hh_weight", "person_weight", "day_weight", "trip_weight", "hh_weight"),
remove_outliers = TRUE,
threshold = 0.975,
remove_missing = TRUE,
missing_values = c("Missing Response", "995"),
not_imputable = -1,
strataname = NULL
)
Arguments
- summarize_var
Name of the variable to summarize. Default is NULL
- summarize_by
Name of the variable to summarize the summarize_var by. Default is NULL.
- variables_dt
List of variable locations and descriptions in data.table format.
- data
List of household, person, vehicle, day, and trip tables in data.table format.
- id_cols
name of unique identifier for each table in hts_data
- weighted
Whether the data is weighted. Default is TRUE.
- wt_cols
weight name for each table in hts_data
- remove_outliers
Whether to remove outliers for numeric variable. Default is TRUE.
- threshold
Threshold to define outliers. Default is 0.975.
- remove_missing
Whether to remove missing values from the summary. Default is TRUE.
- missing_values
Missing values to remove. Default is 995.
- not_imputable
Value representing 'Not imputable' to remove. Default is -1.
- strataname
Name of strata name to bring in. Default is NULL.
Value
List containing the categorical and numeric datasets of the summary variables and key columns, and either whether the summarize variable is shared or a breakdown of outliers, depending on if the summarize variable is categorical or numeric.
Examples
require(data.table)
require(stringr)
hts_prep_variable(
summarize_var = "age",
variables_dt = variable_list,
data = list(
"hh" = hh,
"person" = person,
"day" = day,
"trip" = trip,
"vehicle" = vehicle
)
)
#> $cat
#> hh_id person_id age person_weight
#> 1: 356 1 10 229
#> 2: 724 2 11 128
#> 3: 681 3 7 888
#> 4: 114 4 2 350
#> 5: 165 5 10 825
#> ---
#> 2043: 931 2043 7 116
#> 2044: 667 2044 1 122
#> 2045: 543 2045 3 494
#> 2046: 749 2046 11 874
#> 2047: 364 2047 11 393
#>
#> $num
#> NULL
#>
hts_prep_variable(
summarize_var = "speed_mph",
summarize_by = "age",
variables_dt = variable_list,
data = list(
"hh" = hh,
"person" = person,
"day" = day,
"trip" = trip,
"vehicle" = vehicle
)
)
#> Warning: 378 outliers were removed based on the threshold of 0.975.
#> $cat
#> hh_id person_id day_id trip_id person_weight trip_weight speed_mph age
#> 1: 2 425 388 11340 130 258 1-10 12
#> 2: 2 425 388 9915 130 57 19-28 12
#> 3: 2 425 1320 4947 130 233 10-19 12
#> 4: 2 892 1559 4450 715 845 1 or less 3
#> 5: 2 892 1559 7943 715 685 10-19 3
#> ---
#> 14718: 1000 352 1663 2279 883 177 19-28 10
#> 14719: 1000 352 3389 7926 883 819 10-19 10
#> 14720: 1000 352 3389 1639 883 25 10-19 10
#> 14721: 1000 352 3389 13926 883 216 1-10 10
#> 14722: 1000 352 3389 11951 883 700 1-10 10
#>
#> $num
#> hh_id person_id day_id trip_id person_weight trip_weight speed_mph age
#> 1: 2 425 388 11340 130 258 2.981870 12
#> 2: 2 425 388 9915 130 57 20.761805 12
#> 3: 2 425 1320 4947 130 233 16.885215 12
#> 4: 2 892 1559 4450 715 845 0.610109 3
#> 5: 2 892 1559 7943 715 685 10.902785 3
#> ---
#> 14718: 1000 352 1663 2279 883 177 26.002610 10
#> 14719: 1000 352 3389 7926 883 819 10.201849 10
#> 14720: 1000 352 3389 1639 883 25 13.915420 10
#> 14721: 1000 352 3389 13926 883 216 3.337872 10
#> 14722: 1000 352 3389 11951 883 700 2.583252 10
#>
#> $outliers
#> threshold num_removed min_outlier max_outlier
#> 1: 0.975 378 112.9918 228233.1
#>
hts_prep_variable(
summarize_var = "employment",
summarize_by = c("age", "race"),
variables_dt = variable_list,
data = list(
"hh" = hh,
"person" = person,
"day" = day,
"trip" = trip,
"vehicle" = vehicle
)
)
#> $cat
#> hh_id person_id person_weight employment age race
#> 1: 2 217 92 3 3 Two or more
#> 2: 2 425 130 5 12 Two or more
#> 3: 2 892 715 1 3 Prefer not to answer
#> 4: 3 417 987 8 10 Prefer not to answer
#> 5: 3 1671 644 5 9 Two or more
#> ---
#> 1749: 997 1724 452 2 11 Prefer not to answer
#> 1750: 998 409 406 5 7 Two or more
#> 1751: 998 817 664 6 4 Prefer not to answer
#> 1752: 999 1305 836 6 10 Two or more
#> 1753: 1000 352 883 1 10 Two or more
#>
#> $num
#> NULL
#>