Skip to contents

Prepare datasets to make summaries


  summarize_var = NULL,
  summarize_by = NULL,
  variables_dt = variable_list,
  data = hts_data,
  id_cols = c("hh_id", "person_id", "day_id", "trip_id", "vehicle_id"),
  weighted = TRUE,
  wt_cols = c("hh_weight", "person_weight", "day_weight", "trip_weight", "hh_weight"),
  remove_outliers = TRUE,
  threshold = 0.975,
  remove_missing = TRUE,
  missing_values = c("Missing Response", "995"),
  not_imputable = -1,
  strataname = NULL



Name of the variable to summarize. Default is NULL


Name of the variable to summarize the summarize_var by. Default is NULL.


List of variable locations and descriptions in data.table format.


List of household, person, vehicle, day, and trip tables in data.table format.


name of unique identifier for each table in hts_data


Whether the data is weighted. Default is TRUE.


weight name for each table in hts_data


Whether to remove outliers for numeric variable. Default is TRUE.


Threshold to define outliers. Default is 0.975.


Whether to remove missing values from the summary. Default is TRUE.


Missing values to remove. Default is 995.


Value representing 'Not imputable' to remove. Default is -1.


Name of strata name to bring in. Default is NULL.


List containing the categorical and numeric datasets of the summary variables and key columns, and either whether the summarize variable is shared or a breakdown of outliers, depending on if the summarize variable is categorical or numeric.


hts_prep_data(summarize_var = 'age',
              variables_dt = variable_list,
              data = list('hh' = hh,
                            'person' = person,
                            'day' = day,
                            'trip' = trip,
                            'vehicle' = vehicle))
#> Creating a summary of person age 
#> $cat
#>       hh_id person_id age person_weight
#>    1:   356         1  10           229
#>    2:   724         2  11           128
#>    3:   681         3   7           888
#>    4:   114         4   2           350
#>    5:   165         5  10           825
#>   ---                                  
#> 2043:   931      2043   7           116
#> 2044:   667      2044   1           122
#> 2045:   543      2045   3           494
#> 2046:   749      2046  11           874
#> 2047:   364      2047  11           393
#> $num
hts_prep_data(summarize_var = 'speed_mph',
              summarize_by = 'age',
              variables_dt = variable_list,
              data = list('hh' = hh,
                            'person' = person,
                            'day' = day,
                            'trip' = trip,
                            'vehicle' = vehicle))
#> Creating a summary of trip speed_mph broken down by person age
#> $cat
#>        hh_id person_id day_id trip_id person_weight trip_weight speed_mph age
#>     1:     2       425    388   11340           130         258      1-10  12
#>     2:     2       425    388    9915           130          57     19-28  12
#>     3:     2       425   1320    4947           130         233     10-19  12
#>     4:     2       892   1559    4450           715         845 1 or less   3
#>     5:     2       892   1559    7943           715         685     10-19   3
#>    ---                                                                       
#> 14718:  1000       352   1663    2279           883         177     19-28  10
#> 14719:  1000       352   3389    7926           883         819     10-19  10
#> 14720:  1000       352   3389    1639           883          25     10-19  10
#> 14721:  1000       352   3389   13926           883         216      1-10  10
#> 14722:  1000       352   3389   11951           883         700      1-10  10
#> $num
#>        hh_id person_id day_id trip_id person_weight trip_weight speed_mph age
#>     1:     2       425    388   11340           130         258  2.981870  12
#>     2:     2       425    388    9915           130          57 20.761805  12
#>     3:     2       425   1320    4947           130         233 16.885215  12
#>     4:     2       892   1559    4450           715         845  0.610109   3
#>     5:     2       892   1559    7943           715         685 10.902785   3
#>    ---                                                                       
#> 14718:  1000       352   1663    2279           883         177 26.002610  10
#> 14719:  1000       352   3389    7926           883         819 10.201849  10
#> 14720:  1000       352   3389    1639           883          25 13.915420  10
#> 14721:  1000       352   3389   13926           883         216  3.337872  10
#> 14722:  1000       352   3389   11951           883         700  2.583252  10
#> $outliers
#>    threshold num_removed min_outlier max_outlier
#> 1:     0.975         378    112.9918    228233.1
hts_prep_data(summarize_var = 'employment',
              summarize_by = c('age', 'race'),
              variables_dt = variable_list,
              data = list('hh' = hh,
                            'person' = person,
                            'day' = day,
                            'trip' = trip,
                            'vehicle' = vehicle))
#> Creating a summary of person employment broken down by person age and person race
#> $cat
#>       hh_id person_id person_weight employment age                 race
#>    1:     2       217            92          3   3          Two or more
#>    2:     2       425           130          5  12          Two or more
#>    3:     2       892           715          1   3 Prefer not to answer
#>    4:     3       417           987          8  10 Prefer not to answer
#>    5:     3      1671           644          5   9          Two or more
#>   ---                                                                  
#> 1749:   997      1724           452          2  11 Prefer not to answer
#> 1750:   998       409           406          5   7          Two or more
#> 1751:   998       817           664          6   4 Prefer not to answer
#> 1752:   999      1305           836          6  10          Two or more
#> 1753:  1000       352           883          1  10          Two or more
#> $num