# Load hts.weighting package
devtools::load_all()
# Load settings; pass python_env explicitly for Quarto
settings = get_settings(reload_settings = TRUE,
print = FALSE)
# Set pathways for reference
code_root = get("code_root", settings)
work_dir = get('working_dir', settings)
inputs_dir = get('inputs_dir', settings)
outputs_dir = get('outputs_dir', settings)
report_dir = get('report_dir', settings)
popsim_initial_label = get("popsim_initial_label", settings)
popsim_daypat_label = get("popsim_daypat_label", settings)10 Round 3 Weighting: Trip Weight Adjustments
11 Overview
In the previous chapter, Round 2 household-level weights were produced, adjusting for day-pattern response bias. This script will start by using those weights to produce person- and day-level weights. These weights are applied to the trip table to generate unlinked (and, if available, linked trip and tour) trip weights. However, analysis of other household travel surveys has shown that the number of trips reported varies systematically by diary platform. To correct for this bias, this script applies a final stage of adjustment at the trip level, referred to as Round 3 weighting.
12 Chapter Setup
12.1 Load Packages, Settings, and Directory Pathways
13 Calculate Round 2 Person-, Day-, and Trip-level Weights
After Round 2 household-level weights are generated from PopulationSim, person- and day-level weights are calculated, and from this point foreward receive no further adjustments. These weights are then used to calculate the Round 2 unlinked and, if available, linked trip- and tour-level weights.
- Person-level weights: Each person is assigned their corresponding household-level weight for each weighted day group (
weight_dow_groups) specified in the settings. If adjustment for unrelated household members is specified in the settings, their weights are set to 0 and the remaining household members have their weights adjusted up. - Day-level weights: The day weight is calculated as the person weight divided by the number of completed days in the household. Days that are not study days (i.e., not in specified day-of-week groups) and/or are incomplete are assigned a weight of 0.
- Unlinked trip-level weights: Each unlinked trip is assigned their corresponding day weight. Unlinked trips that occurred on non-study days are set to 0. Synthetically produced trips are assigned a weight of 0 if
weight_synthetic_tripsin the settings is set to false.
- Day-of-week weights were not calculated for PSRC, so the day-level weight represents one average weekday for each person in the study. Thus, the sum of the trip-level weight represents the total trips in the region on an average weekday (Tuesday, Wednesday, and Thursday)./
- Synthetic trips were not weighted.
Linked trip and tour weights represent the average weight of their component trips (linked trips) or legs (tours). Linked trip weights are the average of the weighted unlinked trips that make up that linked trip.
# Load imputed household and person data, along with survey tables.
households = readRDS(file.path(work_dir, 'households_survey_imputed.rds'))
persons = readRDS(file.path(work_dir, 'persons_survey_imputed.rds'))
# Get codebook and other survey tables for value labels, days, and trips
value_labels = fetch_hts_table('value_labels', settings)
days = fetch_hts_table("day", settings)
trips = fetch_hts_table("trip", settings)
# Remove any records associated with dropped households to ensure data consistency
persons = persons[hh_id %in% households$hh_id]
days = days[hh_id %in% households$hh_id]
trips = trips[hh_id %in% households$hh_id]
# Organize weighting rounds and prepare to collect output weights for diagnostics
popsim_rounds = list(
round_1 = get('popsim_initial_label', settings),
round_2 = get('popsim_daypat_label', settings)
)
weights_list = list()
# This prepares a set of weights for each weighting round, for diagnostic purposes.
for (round_type in names(popsim_rounds)) {
if (
round_type == "round_2" &&
! get('daypat_weighting', settings)
) {
message(paste0("Skipping ", round_type, " weights as 070_daypat_weighting.R was not run."))
next
}
hh_weights_path = file.path(
get('working_dir', settings),
popsim_rounds[[round_type]],
'output/final_summary_hh_weights.csv'
)
hh_weights_raw = fread(hh_weights_path, colClasses = c(hh_id_dow = 'character'))
setnames(hh_weights_raw, 'zone_group_balanced_weight', 'hh_weight')
# Add num_trips to days -------------------------------------------------------
if (!("num_trips" %in% names(days))) {
days[
trips[, .N, keyby = .(day_id)],
num_trips := i.N,
on = .(day_id)
]
days[is.na(num_trips), num_trips := 0]
}
# Check that hh_num_weekdays_complete is correct
# Otherwise, weight totals will not match
hh_complete_days = calc_complete_hhdays(households, days, settings, include_zeros = TRUE)
# If only one day_group, remove dummy nonstudy_day
if (length(get("weight_dow_groups", settings)) == 1) {
hh_complete_days = hh_complete_days[day_group != 'nonstudy_day', ]
}
hh_weights = merge(
hh_weights_raw,
hh_complete_days[, .(hh_id_dow, n_complete_hh_days, diary_platform, day_group, hh_id)],
by = "hh_id_dow",
all = TRUE
)
hh_weights[is.na(hh_weight), hh_weight := 0]
# Recalculate number of complete person days for each day_group
day_groups = get_day_groups(settings)
days[, person_id := as.character(person_id)]
days[day_groups, day_group := i.day_group, on = .(travel_dow)]
days[, n_complete_person_days := 0]
days[is_complete == 1, n_complete_person_days := .N, by = .(person_id, day_group)]
# All Online diaries should only have one complete day
stopifnot(hh_weights[diary_platform != "rmove" & n_complete_hh_days != 1, .N] == 0)
# Sum of weights should not change
stopifnot(hh_weights[hh_weight > 0, .N] == hh_weights_raw[, .N])
# Calculate person weights -----------------------------------------------------
hh_weights[households, `:=`
(
num_people = i.num_people,
num_adults = i.num_adults
),
on = .(hh_id)
]
person_weights = calc_person_weights(
hh_weights,
households,
persons,
value_labels,
settings
)
# Calculate day weights -------------------------------------------------------
# days[, person_id := as.character(person_id)] # Can we delete this line?
day_weights = calc_day_weights(
person_weights,
households,
persons,
days,
trips,
value_labels,
settings
)
# Trip weights =================================================================
trip_weights = calc_trip_weights(trips, day_weights, settings)
# Cross-check for missing relationships, set to 0 if orphaned ------------------
# Find common day_ids in days, trips
common_day_ids = intersect(days$day_id, trips$day_id)
common_person_ids = intersect(intersect(persons$person_id, days$person_id), trips$person_id)
# Check for orphaned trips that don't have a day or person or household
# This should not happen, but if it does, set the weight to 0
trip_weights[
!day_id %in% common_day_ids |
!hh_id %in% households$hh_id |
!person_id %in% common_person_ids,
trip_weight := 0
]
# If person level study, adjust the hh_weights by num_adults since the hh_weight is the person weight
if (get("study_unit", settings) == "person") {
hh_weights[, hh_weight := hh_weight / num_adults]
}
# Append to output weights -----------------------------------------------------
weights_round = list(
hh_weights = hh_weights[, .(hh_id, day_group, hh_weight)],
person_weights = person_weights[, .(hh_id, person_id, day_group, person_weight)],
day_weights = day_weights[, .(hh_id, person_id, day_id, day_group, day_weight)],
trip_weights = trip_weights[,
.(hh_id, person_id, day_id, trip_id, linked_trip_id, tour_id, day_group, trip_weight)
]
)
# Check that all nonstudy_day day_groups are 0 weight
for (wt in weights_round) {
wt_col = stringr::str_subset(names(wt), "weight$")
if (wt[day_group == "nonstudy_day", sum(get(wt_col))] != 0) {
stop(paste0("Nonstudy day group has non-zero weight in ", wt_col, " column."))
}
}
# Linked trip and tour weights =================================================
if ("linked_trip" %in% names(settings$hts_table_map)) {
# Load linked trips
linked_trips = fetch_hts_table("linked_trip", settings)
# Filter out records for dropped households
linked_trips = linked_trips[linked_trip_id %in% unique(trips$linked_trip_id), ]
# Assign day_group
linked_trips[day_weights, day_group := i.day_group, on = .(day_id)]
stopifnot(all(!is.na(linked_trips$day_group)))
# Calculate linked trip weights
linked_trip_weights = calc_linked_trip_weights(
linked_trips,
unlinked_trips = trips,
unlinked_trip_weights = trip_weights,
day_weights,
settings
)
# Append to output
weights_round[['linked_trip_weights']] = linked_trip_weights[,
.(hh_id, person_id, day_id, linked_trip_id, tour_id, day_group, linked_trip_weight)
]
}
if ("tour" %in% names(settings$hts_table_map)) {
# Load tour data
tours = fetch_hts_table("tour", settings)
# Assign day_group
linked_trips[day_weights, day_group := i.day_group, on = .(day_id)]
stopifnot(all(!is.na(linked_trips$day_group)))
# Filter out records for dropped households
tours = tours[tour_id %in% unique(linked_trips$tour_id), ]
# Calculate tour weights
tour_weights = calc_tour_weights(
tours,
unlinked_trips = trips,
unlinked_trip_weights = trip_weights,
day_weights,
settings
)
# Append to output
weights_round[['tour_weights']] = tour_weights[,
.(hh_id, person_id, day_id, tour_id, day_group, tour_weight)
]
}
weights_list[[round_type]] = weights_round
}13.0.1 Save Round 2 Person-, Day-, and Trip-Level Weights
# Append weights with suffix and write to CSV ==============================================
# Unsuffixed weight is the "current" weight with the adjustments from the latest round
appended_weights_list = list()
for (round_name in names(weights_list)) {
weights_round = weights_list[[round_name]]
message(paste0("Appending weights from round: ", round_name))
for (table_name in names(weights_round)) {
# First round, just copy
if (is.null(appended_weights_list[[table_name]])) {
weight_comb = copy(weights_round[[table_name]])
# Copy current weight to a suffixed column
wt_col = stringr::str_subset(names(weight_comb), '_weight$')
wt_col_adj = paste0(wt_col, '_', round_name)
weight_comb[, (wt_col_adj) := get(wt_col)]
} else {
weight_prev = appended_weights_list[[table_name]]
id_cols = c(
stringr::str_subset(names(weight_prev), '_id$'),
"day_group"
)
weight_comb = merge(
weight_prev,
weights_round[[table_name]],
by = id_cols,
suffixes = c('', paste0('_', round_name)),
all = TRUE
)
# Updated canonical weight to the latest round
wt_col = stringr::str_subset(names(weight_prev), '_weight$')
wt_col_adj = paste0(wt_col, '_', round_name)
weight_comb[, (wt_col) := get(wt_col_adj)]
}
# Check that the canonical weight matches the latest suffixed weight
stopifnot(all(weight_comb[[wt_col]] == weight_comb[[wt_col_adj]], na.rm = TRUE))
# Update the combined weight table
appended_weights_list[[table_name]] = weight_comb
}
}
# Write out combined weights for reference
for (weight_table in names(appended_weights_list)) {
path = file.path(get('outputs_dir', settings), paste0(weight_table, '.csv'))
message(paste0('Writing combined ', weight_table, ' to ', path))
fwrite(appended_weights_list[[weight_table]], file = path)
}
# Save a dummy file to indicate 080 has run
write.table(
"This is placeholder text. This file is used track the pipeline outputs.",
file.path(get('working_dir', settings), ".unadjusted_weights"),
row.names = FALSE, col.names = FALSE, quote = FALSE
)14 Round 3 Weighting
14.1 Trip weight adjustment: Overview
Analyses of other RSG household travel surveys show that the number of trips reported varies systematically by diary platform. In particular, Online and Call Center respondents tend to under-report certain types of trips compared to respondents using the rMove smartphone app, which automatically records travel in the background. To correct for this bias, RSG adjusts trip-level weights using calculated correction factors, referred to as Round 3 weighting.
The adjustment procedure operates at the linked trip level, unless linked trips are unavailable in which case unlinked trips are used. Each linked trip is classified into a mutually exclusive trip type, and trip rate factors are then estimated and applied to linked trips. Unlinked trips and tours receive their weights from linked trips. To ensure that the adjustment models have sufficient sample size, produce stable factors across person types and diary platforms, and yield interpretable results, this process estimates trip adjustment models for broader classes of trip purposes rather than for each survey-reported purpose category. Using origin and destination purposes, linked trips were classified into 4 categories (work, school, other, and loop). Only trips eligible for that trip type (i.e., work trips among employed persons) are considered for trip bias adjustment.
- Filter out unweighted records from each dataset.
- Categorize each linked trip (or if unavailable, unlinked trip) into mutually exclusive groups using the survey-reported trip purpose.
- Calculate the number of trips (overall and by trip type) for each person-day.
- Prepare the person-day dataset for modelling by assigning household- and person-level characteristics to each observation, which will serve as model covariates.
- Estimate Poisson regression models for each trip type, predicting the number of trips per person-day.
- Calculate correction factors by trip type by comparing predictions with and without diary platform coefficients.
- Adjust eligible trips using correction factors.
- If linked trip data are used in the steps above, calculate unlinked trip and tour weights using the new Round 3 adjusted weight.
- Review model fit/coefficients and trip rates by diary platform across each round of weighting for quality assurance.
Trip weight adjustment is actively under development. Planned updates are improvement in model fit by testing different model types and applying adjustments in PopulationSim. The goal of this work is to update models for accuracy and to not separate weight adjustments from the PopulationSim adjustment procedures.
14.2 Load Data
Load the household, person, day, and trip survey datasets and weights.
# Load value labels and imputed HTS data for households, persons, days, and trips
value_labels = fetch_hts_table('value_labels', settings)
# Load HTS data with imputations
households_raw = readRDS(file.path(work_dir, 'households_survey_imputed.rds'))
persons_raw = readRDS(file.path(work_dir, 'persons_survey_imputed.rds'))
days_raw = fetch_hts_table("day", settings)
trips_raw = fetch_hts_table("trip", settings)
# Set up column classes for joining and reading weights
colclasses = c(
hh_id = 'character',
person_id = 'character',
day_id = 'character',
trip_id = 'character',
linked_trip_id = 'character',
tour_id = 'character'
)
# Load the Round 2 weights ------
hh_weights = fread(
file.path(get('outputs_dir', settings), 'hh_weights.csv'),
colClasses = colclasses[c('hh_id')]
)
person_weights = fread(
file.path(get('outputs_dir', settings), 'person_weights.csv'),
colClasses = colclasses[c('hh_id', 'person_id')]
)
day_weights = fread(
file.path(get('outputs_dir', settings), 'day_weights.csv'),
colClasses = colclasses[c('hh_id', 'person_id', 'day_id')]
)
header = names(fread(file.path(get('outputs_dir', settings), 'trip_weights.csv'), nrows = 0))
trip_weights = fread(
file.path(get('outputs_dir', settings), 'trip_weights.csv'),
colClasses = colclasses[intersect(names(colclasses), header)]
)14.3 Data Cleaning
This section prepares the survey data by filtering out records without valid weights. Then, the number of unweighted trips per day using only weighted records, and weight variables are reset to the most recent round of weighting. For day-of-week weights, an average point estimate is generated for the household and person across survey days. The reset weights are then joined to the survey data, and diary platform information is appended to the unlinked trip weights to ensure consistency and readiness for subsequent analyses.
14.3.1 Filter Out Unweighted Records
Remove households and associated data with zero weights from all relevant datasets, ensuring only records with valid weights are retained.
# Find hh's with all zero weight days
zero_wt_hhs = hh_weights[, sum(hh_weight), by = hh_id][V1 == 0, hh_id]
# Drop all unweighted data
households = households_raw[!hh_id %in% zero_wt_hhs]
persons = persons_raw[person_id %in% person_weights[person_weight > 0, person_id]]
days = days_raw[day_id %in% day_weights[day_weight > 0, day_id]]
trips = trips_raw[trip_id %in% trip_weights[trip_weight > 0, trip_id]]14.3.2 Recalculate Number of Trips in Day Table from Trip Table
Ensure that the number of trips in the day table reflects only weighted trips for consistency.
# Recalculate num_trips in day table from trip table to ensure consistency ------
days[
trips[, .(num_trips = .N), by = .(day_id)],
num_trips := i.num_trips,
on = .(day_id)
]
day_weights[days, num_trips := i.num_trips, on = "day_id"]
day_weights[is.na(num_trips), num_trips := 0]14.3.3 Reset Trip Weight to Previous Weighting Round and Drop All Dummy Weights
Reset trip weights to their most recent round of weighting, removing outdated or dummy weight columns, and verify weight sums for data integrity.
# Reset canonical trip weight to latest non-adjusted trip weight (e.g., round 1 or 2) ----
if ("trip_weight_round_3" %in% names(trip_weights)) {
trip_weights[, trip_weight_round_3 := NULL]
}
if ('trip_weight_round_2' %in% names(trip_weights)) {
trip_weights[, trip_weight := trip_weight_round_2]
} else {
trip_weights[, trip_weight := trip_weight_round_1]
}
# Check that we didn't drop something we should have
stopifnot(
all.equal(
day_weights[, sum(day_weight * num_trips)],
trip_weights[, sum(trip_weight)]
)
)
# Drop dummy integer weights from survey tables -------------------------------
for (tbl_name in list("households", "persons", "days", "trips")) {
dt = get(tbl_name)
wt_col = grep('_weight$', names(dt), value = TRUE)
if (length(wt_col) > 1) {
stop("Multiple weight columns found in table ", tbl_name)
} else if (length(wt_col) == 1) {
dt[, (wt_col) := NULL]
}
}14.3.4 Calculate a Point Estimate Weight for Households/Persons Across Days
Compute average weights for households and persons across all days to provide point estimates for further analysis. If only one average weight across all days was calculated, the calculated weight will be the same.
# Calculate a point estimate weight for households and persons across days
hh_weights_avg = hh_weights[, .(hh_weight = mean(hh_weight)), by = .(hh_id)]
person_weights_avg = person_weights[, .(person_weight = mean(person_weight)), by = .(person_id)]14.3.5 Join Weights to Survey Data
Attach the weights to their respective household, person, day, and trip tables based on unique identifiers.
# Join the weights to their respective tables using appropriate keys
households[hh_weights_avg, hh_weight := i.hh_weight, on = "hh_id"]
persons[person_weights_avg, person_weight := i.person_weight, on = "person_id"]
days[day_weights, day_weight := i.day_weight, on = "day_id"]
trips[trip_weights, trip_weight := i.trip_weight, on = "trip_id"]14.3.6 Append Diary Platform Information to Trip Weights
Adds diary platform details to trip weights to support adjustment modeling.
# Add diary platform information to trip weights for adjustment modeling
trip_weights[households, diary_platform := i.diary_platform, on = "hh_id"]14.4 Trip Type Assignment
Trips are classified using the reported origin and destination purposes in the survey data into four mutually exclusive categories:
Work: trips with one end at work or a work-related location.
School: trips with one end at school or a school-related location.
Other: all other trips excluding loop trips.
Loop: trips that start and end in the same location.
Loop trips are identified in the linked trip table but excluded from the adjustment procedures. Loop trips are typically made for walking the dog or exercising and are not typically modeled. They also tend to be represented more in rMove data than in other survey modes and therefore would obfuscate the trip adjustment procedure if included.
14.4.1 Prepare Trip Types Table
This section creates a table for trip type assignment, using linked trip data if available. The table is limited to just the variables needed for trip type assignment.
# Prepare trip types table, using linked trips if available, otherwise use unlinked trips
if ("linked_trip" %in% names(settings$hts_table_map)) {
linked_trips = fetch_hts_table("linked_trip", settings)
idcols = stringr::str_subset(
names(fread(file.path(get('outputs_dir', settings), 'linked_trip_weights.csv'), nrows = 1)),
"(.+)_id$"
)
# Same logic, if this is a re-run read in all rounds of weights.
linked_trip_weights = fread(
file.path(get('outputs_dir', settings), 'linked_trip_weights.csv'),
colClasses = setNames(rep('character', length(idcols)), idcols)
)
# Limit to weighted households only
linked_trips = linked_trips[hh_id %in% households$hh_id]
# Create table for trip type assignment
trips_types = copy(
linked_trips[, .(
hh_id,
linked_trip_id,
day_id,
person_id,
opcat = o_purpose_category,
dpcat = d_purpose_category
)
]
)
join_col = "linked_trip_id"
# Join linked trip ID to trip weights
trip_weights[trips_raw, linked_trip_id := i.linked_trip_id, on = "trip_id"]
# Join linked trip weight and day group to trip types table
trips_types[linked_trip_weights, `:=`(
trip_weight = i.linked_trip_weight,
day_group = i.day_group
), on = "linked_trip_id"]
# Reset canonical linked trip weight to latest non-adjusted trip weight (e.g., daypat or initial) ----
if ("linked_trip_weight_round_3" %in% names(linked_trip_weights)) {
linked_trip_weights[, linked_trip_weight_round_3 := NULL]
}
if ('linked_trip_weight_round_2' %in% names(linked_trip_weights)) {
linked_trip_weights[, linked_trip_weight := linked_trip_weight_round_2]
} else {
linked_trip_weights[, linked_trip_weight := linked_trip_weight_round_1]
}
# Limit to weighted linked trips only
linked_trips = linked_trips[linked_trip_id %in% linked_trip_weights[linked_trip_weight > 0, linked_trip_id]]
} else {
# Make a copy of trips for the trip types table
trips_types = copy(
trips_raw[, .(
hh_id, trip_id, day_id, person_id,
opcat = o_purpose_category,
dpcat = d_purpose_category
)
]
)
join_col = "trip_id"
trips_types[trip_weights, `:=`(
trip_weight = i.trip_weight,
day_group = i.day_group
), on = "trip_id"]
}14.4.2 Categorize and Assign Trip Types
This step categorizes each trip into an aggregated trip type category based on its origin and destination purposes. This process ensures all trips receive an appropriate type, aggregates results as defined in settings, and merges trip type assignments back to the main trips table.
# Assign trip types to trip table
label_dt = value_labels[variable == "d_purpose_category", .(value, label = str_to_lower(label))]
pcat_work = label_dt[str_detect(label, "work"), value]
pcat_school = label_dt[str_detect(label, "school"), value]
pcat_home = label_dt[str_detect(label, "home"), value]
pcat_loop = 14L
# Identify loop trip IDs in trips table
measure_id = 'trip_id'
if ("linked_trip" %in% names(settings$hts_table_map)) measure_id = 'linked_trip_id'
loop_trip_ids = trips_raw[split_loop == 1 | split_loop_leg == 1, get(measure_id)]
# Assign trip type to each trip
trips_types[, trip_type_detailed := fcase(
get(measure_id) %in% loop_trip_ids, "num_loop",
(dpcat %in% pcat_loop), "num_loop",
(opcat %in% pcat_home) & (dpcat %in% pcat_work), "num_work",
(dpcat %in% pcat_home) & (opcat %in% pcat_work), "num_work",
(opcat %in% pcat_home) & (dpcat %in% pcat_school), "num_school",
(dpcat %in% pcat_home) & (opcat %in% pcat_school), "num_school",
(opcat %in% pcat_home) & !(dpcat %in% c(pcat_work, pcat_school)), "num_other",
(dpcat %in% pcat_home) & !(opcat %in% c(pcat_work, pcat_school)), "num_other",
!(opcat %in% pcat_home) & (dpcat %in% pcat_work), "num_work",
!(dpcat %in% pcat_home) & (opcat %in% pcat_work), "num_work",
!(opcat %in% pcat_home) & (dpcat %in% pcat_school), "num_school",
!(dpcat %in% pcat_home) & (opcat %in% pcat_school), "num_school",
!(opcat %in% pcat_home) & !(dpcat %in% c(pcat_work, pcat_school)), "num_other",
!(dpcat %in% pcat_home) & !(opcat %in% c(pcat_work, pcat_school)), "num_other"
)]
# Assign trip types to the trips_types table, aggregate as specified in settings
model_lhs = get("trip_rate_model_lhs", settings)
for (var in names(model_lhs)) {
subtypes = if (is.null(model_lhs[[var]])) var else model_lhs[[var]]
trips_types[trip_type_detailed %in% subtypes, trip_type := var]
}
# Check for NA
stopifnot(trips_types[is.na(trip_type_detailed), .N] == 0)
# Merge trips_types back onto linked_trips table and weights
linked_trips[trips_types, trip_type := i.trip_type, on = "linked_trip_id"]
# Add in diary platform to trips_types table
trips_types[households, diary_platform := diary_platform, on = "hh_id"]
# For consistency, limit trip types data to complete weighted observations
if ("linked_trip" %in% names(settings$hts_table_map)) {
trips_types = trips_types[linked_trip_id %in% linked_trip_weights[linked_trip_weight > 0, linked_trip_id] &
hh_id %in% households$hh_id]
} else {
trips_types = trips_types[trip_id %in% trip_weights[trip_weight > 0, trip_id] &
hh_id %in% households$hh_id]
}14.5 Calculate Number of Trips by Person-Day
This step calculates the number of trips for each person on weighted complete travel days. The day-level data is limited to completed weighted day observations and diary platform is appended to the dataset. The number of trips per day is then recalculated using the categorized trips table. Since the number of trips per day is calculated from the categorized trips table, missing observations for num_trips occur when there are no trips and thus are relabelled to 0s. The datasets are then saved for future reference in QAQC outputs.
# Get complete days and day of week
complete_days = days[
hh_day_complete == 1 & travel_dow %in% unlist(get("weight_dow_groups", settings)),
.(hh_id, person_id, day_id, travel_dow, num_trips, day_weight)
]
complete_days[get_day_groups(settings), day_group := i.day_group, on = "travel_dow"]
# Add in diary platform to complete_days table
complete_days[households, diary_platform := diary_platform, on = "hh_id"]
# Filter complete weekdays
trips_types_complete = trips_types[day_id %in% complete_days[, day_id], ]
n_trips = nrow(trips_types_complete)
# Recalculate the effective number of trips per day from trips table
# This means using linked trips when available, so it might be different from num_trips in days table
complete_days[
trips_types[, .N, by = .(day_id)],
num_trips := i.N,
on = .(day_id)
]
# Fill NAs with zeros for no trips
setnafill(complete_days, cols = c("num_trips"), fill = 0)
# Write datasets to working directory for future reference
fwrite(trips_types_complete,
file.path(get("working_dir", settings), "trip_types_complete.csv"))
fwrite(complete_days,
file.path(get("working_dir", settings), "day_complete_days.csv"))14.6 Prepare Data for Modelling
This step constructs a modeling dataset for each trip type containing the number of trips per person-day, filtered to those person-day records from eligible person-types. Eligibility criteria are determined using household- and person-level characteristics and appended to the person-day dataset here.
The eligibility criteria for trip weight adjustment is defined by trip type below:
Work trips: Adults who are employed full- or part-time.
School trips: Children who are proxy reported.
Other trips: All adults.
These eligibility rules define the subset of person-days that enter the trip rate models. Only the trip records associated with eligible person-type × trip-type combinations are used in estimation; and only eligible person-type × trip-type combinations receive adjustments. The majority of school trips are proxy-reported trips for children, so adults who report school trips are excluded. This is because that model would make different assumptions and the sample size of adults making school trips tends to be too low to produce stable estimates. In some cases, respondents may record a trip to work or school even when they do not report themselves as a student or employee. In these cases, the person’s signup survey information (employment status, student status) is prioritized over the person’s travel diary information, and trip adjustment procedures are restricted to the logical categories of people who should receive them. Trips made by those under 18 were not eligible for work and other trip adjustment.
This step also creates and appends the independent variables incorporated into the trip adjustment models. These models include independent variables that might affect how the type of diary people use relates to the number of trips they report. Daily trip rates serve as the dependent variables in the adjustment models.
Due to small sample size of Call Center participants, Call Center and Online diary platform responses were combined into one category for modelling and trip weight adjustment.
Independent variables, their categorization, and reference categories for each model can be found in the table below.
| Variable | Categories | Work model | School model | Other model |
|---|---|---|---|---|
| Diary platform |
|
☓ | ☓ | ☓ |
| Household size |
|
☓ | ☓ | ☓ |
| Number of kids in HH |
|
☓ | ☓ | |
| Household income |
|
☓ | ☓ | ☓ |
| Employment status |
|
☓ | ☓ | |
| Works from home |
|
☓ | ☓ | |
| Age |
|
☓ | ☓ | |
| Student status |
|
☓ | ☓ | |
| Work location regularly varies |
|
☓ | ||
| Educational attainment |
|
☓ | ☓ | |
| Proxy reporter’s employment status |
|
☓ | ||
| Proxy reporter works from home |
|
☓ | ||
| Proxy reporter’s gender |
|
☓ | ||
| Current school level |
|
☓ |
## Pivot trip dataset to be day-level for merging
trip_type_counts = dcast(
trips_types_complete[, .N, by = .(day_id, person_id, trip_type)],
day_id ~ trip_type,
value.var = "N",
fill = 0
)
## For merge make sure id is character
trip_type_counts[, day_id := as.character(day_id)]
complete_days[, day_id := as.character(day_id)]
## Add "num_" prefix to column names of trip_type_counts
renamees = names(trip_type_counts)["day_id" != names(trip_type_counts)]
setnames(trip_type_counts, old = renamees, new = paste0("num_", renamees))
## Now merge that onto complete weekdays, filling days with no observations as 0 trips
trip_counts = merge(
complete_days,
trip_type_counts,
by = "day_id", all.x = TRUE
)
setnafill(trip_counts, cols = str_subset(names(trip_counts), "num_"), fill = 0)
# some qaqc
trip_type_cols = paste0("num_", trips_types_complete[, unique(trip_type)])
trip_counts[, rowsums := rowSums(.SD), .SDcols = trip_type_cols]
stopifnot(trip_counts[num_trips != rowsums, .N] == 0)
# Write dataset to folder
fwrite(trip_counts,
file.path(get("working_dir", settings), "trip_counts.csv"))14.6.0.1 Create Household-Level Model Variables
## Household-level variables for models-----------------------------------------
households <- as.data.table(households)
# Household size
households[, hh_size := fcase(
num_people == 1, 1,
num_people == 2, 2,
num_people >= 3, 3,
default = NA
)]
# Number of kids in HH
households[, n_kids := fcase(
num_kids == 0, 0, # 0
num_kids >= 1, 1, # 1+
default = NA
)]
# Income categorized further
households[, income_cat := fcase(
income_imputed %in% c(1,2), 1, # Under $49,999k
income_imputed %in% c(3,4), 2, # $50k - $99,999k
income_imputed %in% c(5,6), 3, # Above $100k
default = NA
)]
# Merge household variables into model data ------------------------------------
fit_dt = merge(
trip_counts,
households[, .(
hh_id, hh_size, n_kids,
income_imputed, income_imputed_label, income_cat
)],
by = "hh_id",
all.x = TRUE # Drop households with no trips. They probably didn't have complete days
)14.6.0.2 Create Person-Level Model Variables
# Person-level variables for models --------------------------------------------
persons <- as.data.table(persons)
# Adults
persons[, is_adult := fcase(
age > 3, 1,
age <= 3, 0,
default = NA
)]
# Employment status
persons[, employment_status := fcase(
paid_work == 0 | employment == 5, 0, # Unemployed
paid_work %in% c(1,2) & employment %in% c(1, 3) , 1, # Full-time
paid_work %in% c(1,2) & employment %in% c(2, 8), 2, # Part-time
paid_work == 995, NA,
default = 0
)]
## If under 16, count as unemployed
persons[age < 3, employment_status := 0]
# Work from home
persons[employment_status %in% c(1,2), wfh := fcase(
work_from_home == 0, 3, # Never
work_from_home == 3, 1, # Always
work_from_home %in% c(1,2), 2, # Hybrid
default = NA
)]
# If unemployed, count as never works from home
persons[employment_status == 0, wfh := 3]
# Age categorized
persons[, age_cat_adults := fcase(
age < 4, 0, # Under 18
age == 4, 1, # 18-24
age %in% c(5, 6, 7, 8), 2, # 25-64
age > 8, 3, # 65+
default = NA
)]
# Educational attainment
persons[, education_cat := fcase(
education %in% c(1,2,4), 1, # Less than/Completed high school/Vocational training
education == 3, 2, # Some college
education %in% c(5,6), 3, # Bachelor's/Associate's degree
education == 7, 4, # Graduate degree
default = NA
)]
# Student status
persons[, is_student := fcase(
student %in% c(0, 1, 3, 4), 1, # Current student (adult)
student == 2, 0, # Not a student (adult)
default = NA
)]
# Work location varies
persons[employment_status %in% c(1, 2), work_loc_varies := fcase(
wfh == 1, 0, # works from home, does not vary
wfh != 1 & job_type %in% c(2,3), 1, # Work location varies/delivery driver
wfh != 1 & job_type == 1, 0, # Work location does not vary
default = NA
)]
persons[employment_status == 0, work_loc_varies := 0]
# School type
persons[, school_cat := fcase(
age < 4 & school_type %in% c(1, 4), 1, # Home schooled/cared for at home
age < 4 & school_type == 2, 2, # Pre-k/daycare,
age < 4 & school_type %in% c(5, 6), 3, # Elementary/Middle school
age < 4 & school_type == 7, 4, # High-school/Vocational training
age >= 3 & school_type %in% c(7, 10), 4, # High-school/Vocational training
age >= 3 & school_type %in% c(11,13), 5, # Bachelor's/Associate's/Graduate
age >= 3 & is_student == 0, 0, # Not in school
default = NA
)]
# Add in student status for those under 18
persons[age < 4, is_student := fcase(
school_cat %in% c(2, 3, 4, 5), 1,
school_cat == 1, 0,
default = NA
)]
# Separate proxy reporters and create proxy reporter variables for school models
kp <- persons[is_proxy == 1]
kp[, `:=`(
p_employment = employment_status,
p_wfh = wfh,
p_gender = gender_imputed
)]
# Use household ID to link proxy reporter information to persons who were proxy-reported
# in that household
persons[kp, `:=`(
p_employment = i.p_employment,
p_wfh = i.p_wfh,
p_gender = i.p_gender
), on = "hh_id"]
# Merge person variables into model data ---------------------------------------
fit_dt = merge(
fit_dt,
persons[,
.(
person_id, is_adult, employment_status, wfh,
age_cat_adults, education_cat, is_student, work_loc_varies,
school_cat, p_employment, p_wfh, p_gender, has_proxy
)],
by = "person_id",
all.x = TRUE
)14.6.0.3 Create Trip Model Estimation Weights
This step calculates estimation weights for model fitting by scaling each record’s day weight relative to the average day weight, ensuring the total weights match the number of observations.
Estimation weights are used in Poisson regression to adjust for differing probabilities of selection or representation in the sample, making the model’s parameter estimates more reflective of the target population. Scaling ensures comparability and proper influence of each record in the model fitting process.
# Estimation weights -----------------------------------------------------------
# Scale the day weight to the dataset for models
fit_dt[, estimation_weight := day_weight / mean(day_weight)]
stopifnot(all.equal(fit_dt[, sum(estimation_weight)], nrow(fit_dt)))14.6.0.4 Configure Variables for Models
To ensure the models are estimated correctly with easily understood outputs, this step converts each variable to a factor variable and explicitly defines the categorical levels.
# Factor vars for model --------------------------------------------------------
# Diary platform as a binary variable for model
fit_dt[, diary_binary := fcase(
diary_platform %in% c("browser", "call"), "online/call",
default = diary_platform
)]
# For the models, make variables factors and add labels; some extras in here for
# testing with expanded categories
fit_dt[, `:=`(
diary_platform = factor(diary_platform,
levels = c("rmove", "browser", "call"),
ordered = FALSE),
diary_binary = factor(diary_binary,
levels = c("rmove", "online/call"),
ordered = FALSE),
hh_size = factor(hh_size,
levels = c(1, 2, 3),
labels = c("1", "2", "3+"),
ordered = FALSE),
n_kids = factor(n_kids,
levels = c(0, 1),
labels = c("0", "1+"),
ordered = FALSE),
income_imputed = factor(income_imputed,
levels = c(1,2,3,4,5,6),
labels = c("$0-$24,999", "$25,000-$49,999", "$50,000-$74,999",
"$75,000-$99,999", "$100,000-$199,999", "$200,000+"),
ordered = FALSE),
employment_status = factor(employment_status,
levels = c(0,1,2),
labels = c("Unemployed", "Full-time", "Part-time"),
ordered = FALSE),
wfh = factor(wfh,
levels = c(1,2,3),
labels = c("Always", "Hybrid", "Never"),
ordered = FALSE),
age_cat_adults = factor(age_cat_adults,
levels = c(0,1,2,3),
labels = c("Under 18","18-24", "25-64", "65+"),
ordered = FALSE),
education_cat = factor(education_cat,
levels = c(1,2,3,4),
labels = c("Less than/Completed high school/Vocational training",
"Some college", "Bachelor's/Associate's degree", "Graduate degree"),
ordered = FALSE),
is_student = factor(is_student,
levels = c(0, 1),
labels = c("Not a student", "Student"),
ordered = FALSE),
work_loc_varies = factor(work_loc_varies,
levels = c(0, 1),
labels = c("No", "Yes"),
ordered = FALSE),
income_cat = factor(income_cat,
levels = c(1, 2, 3),
labels = c("Under $50k", "$50-$100k", "Above $100k"),
ordered = FALSE),
school_cat = factor(school_cat,
levels = c(0, 1, 2, 3, 4, 5),
labels = c("Not in school", "Home schooled/cared for at home", "Pre-k/Daycare",
"Elementary/Middle school", "High-school/Vocational training", "Bachelor's/Associate's/Graduate"),
ordered = FALSE),
p_employment = factor(p_employment,
levels = c(0,1,2),
labels = c("Unemployed", "Full-time", "Part-time"),
ordered = FALSE),
p_wfh = factor(p_wfh,
levels = c(1,2,3),
labels = c("Always", "Hybrid", "Never"),
ordered = FALSE),
p_gender = factor(p_gender,
levels = c("male", "female"),
labels = c("Male", "Female"),
ordered = FALSE)
)]14.6.1 Review Sample Size for Models
To ensure that model sample size is sufficient, review the number of person-days, adjustment-eligible trips, and total trips for each trip type (Work, School, Other, Total) across different diary platforms. This breakdown helps ensure sufficient data across platforms and trip categories, as well as the correct assignment of trip adjustment eligibility and trip categories before fitting models.
14.6.1.1 Table: Total number of person-days, adjustment-eligible trips, and total trips by trip type and diary platform
The table below outlines the number of person-days, adjustment eligible trips, and total trips by trip type and diary platform. This information outlines the number of trips eligible for adjustment for bias with respect to the total number of trips.
Trip type | Diary platform | Person-days | Adjustment-eligible trips | Total trips |
|---|---|---|---|---|
Other | Online | 4,255 | 8,048 | 8,826 |
Call Center | 155 | 361 | 373 | |
rMove | 2,297 | 5,215 | 5,474 | |
All platforms | 6,707 | 13,624 | 14,673 | |
School | Online | 683 | 1,040 | 1,215 |
Call Center | 9 | 14 | 16 | |
rMove | 179 | 251 | 368 | |
All platforms | 871 | 1,305 | 1,599 | |
Work | Online | 2,343 | 3,003 | 3,185 |
Call Center | 48 | 59 | 63 | |
rMove | 1,549 | 2,689 | 2,816 | |
All platforms | 3,940 | 5,751 | 6,064 | |
Total | Online | 4,938 | 12,091 | 13,578 |
Call Center | 164 | 434 | 462 | |
rMove | 2,476 | 8,155 | 9,337 | |
All platforms | 7,578 | 20,680 | 23,377 |
14.6.2 Prepare Model Formulas
Lastly, the variables for each model formula are read in from the settings. This enables code to be efficient and dynamic in producing models.
# Right-hand side of formula
model_rhs_vars = get("trip_rate_model_vars", settings)
model_lhs = get("trip_rate_model_lhs", settings)
# Left-hand side of formula
model_lhs_num = paste(paste0("num_", names(model_lhs)))
cli::cli_inform(c("Model formula: ", model_formula))Error: object 'model_formula' not found
14.7 Estimate Models
This step estimates the trip adjustment Poisson regression models that predict the number of trips of each type per person-day. The dependent variables are trip counts by type (e.g., number of work trips). Model fit statistics are also calculated for QAQC.
model_ls = list()
trip_rate_factors_all = fit_dt[, .(hh_id, person_id, day_id, diary_binary)]
for (lhs in model_lhs_num) {
if (lhs == "num_loop") next
model_rhs_num = model_rhs_vars[[lhs]]
model_rhs = paste("~", paste(model_rhs_num, collapse = " + "))
# Build formula from the remaining terms
fmla = as.formula(paste(lhs, model_rhs))
cli::cli_inform(c("model formula: ", fmla))
# TODO: parameterize
# Restrict to appropriate sub-populations
if (lhs %like% 'work') {fit_dt_est = fit_dt[is_adult == 1 & employment_status %in% c("Full-time", "Part-time")]
} else if (lhs %like% 'school') {fit_dt_est = fit_dt[is_adult == 0 & has_proxy == 1]
} else if (lhs %like% 'other') {fit_dt_est = fit_dt[is_adult == 1]
}
# Estimate model
model = glm(
fmla,
data = fit_dt_est,
weights = fit_dt_est[, estimation_weight],
family = "poisson"
)
model_ls[[lhs]] = model
model_desc = broom::tidy(model)
# Prepare model results
model_perf = as.data.table(performance::model_performance(model))
r2 = t(model_perf)[, 1][grepl("R2", names(model_perf))]
range = paste(paste(c('low:', 'high:'), round(range(fitted(model)), 4)), collapse = ", ")
report_dir = get('report_dir', settings)
model_name = gsub("num_", "model_", lhs)
# Save model
fwrite(model_desc, file.path(report_dir, "models", paste0(model_name, "_coef.csv")))
fwrite(model_perf, file.path(report_dir, "models", paste0(model_name, "_fit.csv")))
}14.7.1 Review Trip Weight Adjustment Models
The results of the trip rate adjustment models are shown in the tables below. P-values for individual variables may not always be significant, but the variables are retained in the model for estimation purposes. McFadden’s R2 is also presented below each table to represent the model fit.
14.7.1.1 Table: Other trip model estimates
Variable | Estimate | Std Error | T-Statistic | p.value |
|---|---|---|---|---|
Model intercept | 1.237 | 0.066 | 18.75 | <0.001 |
-0.310 | 0.023 | -13.73 | <0.001 | |
HH size: 2 (Ref: 1) | -0.029 | 0.029 | -1.02 | 0.308 |
HH size: 3+ (Ref: 1) | 0.015 | 0.034 | 0.43 | 0.664 |
HH income: $50-$100k (Ref: Under $50k) | 0.058 | 0.028 | 2.09 | 0.036 |
HH income: Above $100k (Ref: Under $50k) | -0.092 | 0.027 | -3.41 | <0.001 |
1+ kids in HH (Ref: no kids) | 0.331 | 0.026 | 12.71 | <0.001 |
Employment status: Full-time (Ref: Unemployed or Part-time) | -0.698 | 0.025 | -27.56 | <0.001 |
Employment status: Part-time (Ref: Unemployed) | -0.353 | 0.028 | -12.43 | <0.001 |
Works from home: Hybrid (Ref: Always) | -0.264 | 0.035 | -7.45 | <0.001 |
Works from home: Never (Ref: Always) | -0.354 | 0.034 | -10.31 | <0.001 |
Age: 25-64 (Ref: 18-24) | 0.319 | 0.043 | 7.39 | <0.001 |
Age: 65+ (Ref: 18-24) | 0.302 | 0.048 | 6.28 | <0.001 |
Student (Ref: Not student) | -0.174 | 0.044 | -3.96 | <0.001 |
Education: Some college (Ref: Less than/complete high school/Vocational training) | 0.226 | 0.032 | 7.15 | <0.001 |
Education: Bachelor's/Associate's (Ref: Less than/complete high school/Vocational training) | 0.291 | 0.026 | 11.32 | <0.001 |
Education: Graduate degree (Ref: Less than/complete high school/Vocational training) | 0.320 | 0.029 | 11.18 | <0.001 |
McFadden's rho-squared: 0.273 | ||||
14.7.1.2 Table: School trip model estimates
Variable | Estimate | Std Error | T-Statistic | p.value |
|---|---|---|---|---|
Model intercept | -3.195 | 0.447 | -7.15 | <0.001 |
0.115 | 0.061 | 1.89 | 0.058 | |
HH size: 3+ (Ref: 1) | -0.453 | 0.133 | -3.39 | <0.001 |
HH income: $50-$100k (Ref: Under $50k) | 0.145 | 0.107 | 1.36 | 0.175 |
HH income: Above $100k (Ref: Under $50k) | 0.457 | 0.099 | 4.64 | <0.001 |
School level: Pre-k/Daycare (Ref: Home-schooled/cared for at home) | 4.059 | 0.417 | 9.74 | <0.001 |
School level: Elementary/Middle school (Ref: Home-schooled/cared for at home) | 4.192 | 0.414 | 10.12 | <0.001 |
School level: High-school/Vocational training (Ref: Home-school/cared for at home | 4.261 | 0.416 | 10.25 | <0.001 |
School level: Bachelor's/Associate's/Graduate degree (Ref: Home-school/cared for at home | 2.717 | 0.814 | 3.34 | <0.001 |
Proxy employment status: Full-time (Ref: Unemployed) | -0.340 | 0.058 | -5.87 | <0.001 |
Employment status: Part-time (Ref: Unemployed) | -0.252 | 0.074 | -3.41 | <0.001 |
Proxy works from home: Hybrid (Ref: Always) | -0.117 | 0.091 | -1.28 | 0.2 |
Proxy works from home: Never (Ref: Always) | -0.295 | 0.092 | -3.20 | 0.001 |
Proxy gender: Female (Ref: Male) | 0.013 | 0.048 | 0.27 | 0.784 |
McFadden's rho-squared: 0.649 | ||||
14.7.1.3 Table: Work trip model estimates
Variable | Estimate | Std Error | T-Statistic | p.value |
|---|---|---|---|---|
Model intercept | -1.994 | 0.182 | -10.93 | <0.001 |
-0.486 | 0.032 | -15.40 | <0.001 | |
HH size: 2 (Ref: 1) | -0.025 | 0.043 | -0.57 | 0.57 |
HH size: 3+ (Ref: 1) | -0.010 | 0.053 | -0.18 | 0.857 |
HH income: $50-$100k (Ref: Under $50k) | 0.210 | 0.047 | 4.49 | <0.001 |
HH income: Above $100k (Ref: Under $50k) | 0.049 | 0.045 | 1.11 | 0.269 |
1+ kids in HH (Ref: no kids) | -0.010 | 0.041 | -0.24 | 0.811 |
Employment status: Part-time (Ref: Unemployed) | -0.339 | 0.038 | -9.03 | <0.001 |
Works from home: Hybrid (Ref: Always) | 2.791 | 0.168 | 16.59 | <0.001 |
Works from home: Never (Ref: Always) | 3.021 | 0.168 | 18.02 | <0.001 |
Age: 25-64 (Ref: 18-24) | 0.075 | 0.057 | 1.32 | 0.187 |
Age: 65+ (Ref: 18-24) | 0.009 | 0.085 | 0.10 | 0.919 |
Student (Ref: Not student) | 0.118 | 0.067 | 1.76 | 0.078 |
Work location varies regularly (Ref: Does not vary regurlarly) | 0.406 | 0.029 | 13.81 | <0.001 |
Education: Some college (Ref: Less than/complete high school/Vocational training) | -0.668 | 0.058 | -11.48 | <0.001 |
Education: Bachelor's/Associate's (Ref: Less than/complete high school/Vocational training) | -0.200 | 0.035 | -5.76 | <0.001 |
Education: Graduate degree (Ref: Less than/complete high school/Vocational training) | -0.273 | 0.040 | -6.85 | <0.001 |
McFadden's rho-squared: 0.449 | ||||
14.8 Calculate Trip Weight Adjustment Factors
To isolate the effect of trip diary bias, and calculate bias adjustment factors, two sets of predictions are produced for each model:
With diary platform coefficients included, reflecting observed differences between platforms.
With diary platform coefficients set to zero, removing platform effects.
The ratio of these two predictions represent the modeled trip rate adjustment factor (see “Modeled” column). For example, if the model predicted 1.10 trips with diary platform effects and 1.32 without them, the resulting adjustment factor was 1.20 (1.32 ÷ 1.10).
Because rMove is specified as the reference platform, its modeled factor is always 1.0.
14.8.1 Calculate Initial Adjustment Factors from Models
# Calculate trip weight adjustment factors =====================================
trip_rate_factors_all = fit_dt[, .(hh_id, person_id, day_id, diary_binary)]
for (lhs in model_lhs_num) {
if (lhs == "num_loop") next
model = model_ls[[lhs]]
# Restrict to appropriate sub-populations
if (lhs %like% 'work') {fit_dt_est = fit_dt[is_adult == 1 & employment_status %in% c("Full-time", "Part-time")]
} else if (lhs %like% 'school') {fit_dt_est = fit_dt[is_adult == 0 & has_proxy == 1]
}else if (lhs %like% 'other') fit_dt_est = fit_dt[is_adult == 1]
bias_vars = stringr::str_subset(all.vars(formula(model)), "^diary_*")
bias_vars = paste(bias_vars[bias_vars != "diary_rmove"], collapse = "|")
model_coefs = coef(model)
bias_idx = which(str_detect(names(model_coefs), bias_vars))
# Make copy and set bias coefficients to 0
model_corrected = model
model_corrected$coefficients[bias_idx] = 0
# Make trip count predictions
fit_dt_est[, pred := predict(model, newdata = fit_dt_est, type = "response")]
fit_dt_est[, pred_corrected := predict(model_corrected, newdata = fit_dt_est, type = "response")]
# Calculate trip rate factor
fit_dt_est[, trip_rate_factor := pred_corrected / pred]
fit_dt_est[, .N, keyby = .(diary_binary, round(trip_rate_factor, 2))]
# Clean up
response_var = all.vars(formula(model))[1]
newname = str_replace(response_var, "num", "trip_rate_factor")
setnames(fit_dt_est, "trip_rate_factor", newname)
# Save individual factors
rate_factor_name_i = names(fit_dt_est)[names(fit_dt_est) %like% 'trip_rate_factor']
factor_cols = c(
"hh_id", "person_id", "day_id", "diary_binary",
# names(trip_rate_factors_all),
rate_factor_name_i
)
factors_i = fit_dt_est[, .SD, .SDcols = factor_cols]
# Join, fill missing
trip_rate_factors_all = merge(
trip_rate_factors_all,
factors_i,
by = c("hh_id", "person_id", "day_id", "diary_binary"),
all.x = TRUE
)
setnafill(trip_rate_factors_all, "const", fill = 1.0, cols = rate_factor_name_i)
}
#fit_dt_est = merge(
fit_dt_all = merge(
fit_dt,
trip_rate_factors_all,
by = c("hh_id", "person_id", "day_id", "diary_binary"), #"day_group"),
all.x = TRUE
)14.8.2 Rescale and Cap Adjustment Factors
After the first calculation, the modeled factors are rescaled so that the minimum factor for each trip type is set to 1.0. This step enforces the working assumption that platform differences reflect under-reporting rather than over-reporting: if a platform’s raw factor fell below 1.0, that value is reset to 1.0 and all other platform factors are scaled upward relative to it. As a result, rMove’s factor could remain at 1.0 or increase above 1.0, depending on the relative positioning of other platforms. To avoid extreme corrections, factors are then capped at 2.0.
trip_rate_cols = grep("trip_rate_factor_", names(fit_dt_all), value = TRUE)
# Make a dataset with just the ids and factors
estimate_dt_all = copy(
fit_dt_all[, mget(c("hh_id", "person_id", "day_id", "diary_binary", trip_rate_cols))]
)
# Melt dataset for reporting trip adjustment factors by diary platform and trip type
rate_factor_dt = data.table::melt(
estimate_dt_all[, c("diary_binary", trip_rate_cols), with = FALSE],
measure.vars = trip_rate_cols,
id.vars = 'diary_binary',
variable.name = 'trip_type',
value.name = 'factor'
)
# Make factor column names just the trip type
rate_factor_dt[, trip_type := stringr::str_replace(trip_type, 'trip_rate_factor_', '')]
# Remove duplicate factors
raw_factors = dcast(rate_factor_dt[(diary_binary == "online/call" & factor != 1) | diary_binary == "rmove"],
diary_binary ~ trip_type,
value.var = "factor",
fun.aggregate = mean, na.rm = TRUE)
modelled_factors = dcast(rate_factor_dt[(diary_binary == "online/call" & factor != 1) | diary_binary == "rmove"],
trip_type ~ diary_binary,
value.var = "factor",
fun.aggregate = mean, na.rm = TRUE)
# Rescale the factors if rmove if not the minimum factor
weight_adj_dt = rate_factor_dt
weight_adj_dt[, ref_factor := min(factor, na.rm=TRUE), by = .(trip_type)]
weight_adj_dt[, rescaled_factor := factor / ref_factor]
# Reshape back to original format
rescaled_factors = dcast(weight_adj_dt[(diary_binary == "online/call" & factor != 1) | diary_binary == "rmove"],
trip_type ~ diary_binary,
value.var = "rescaled_factor",
fun.aggregate = mean, na.rm = TRUE )
hard_cap = 2.0 # Default hard cap for trip rate factors
if ("trip_rate_factor_cap" %in% names(settings)) {
hard_cap = as.numeric(settings$trip_rate_factor_cap)
}
# Cap any factors over 2
capped_factors = copy(rescaled_factors)
capped_factors[`online/call` > 2, `online/call` := 2]14.8.3 Export Adjustment Factors
# Combine into one dataset
all_factors <- rbindlist(list("modeled" = modelled_factors,
"rescaled" = rescaled_factors,
"capped" = capped_factors),
idcol = "processing_step")
all_factors <- melt(all_factors, id.vars = c("processing_step", "trip_type"),
value.name = "factor", variable.name = "platform")
all_factors <- dcast(all_factors, trip_type + platform ~ processing_step)
setcolorder(all_factors, c("trip_type", "platform", "modeled", "rescaled", "capped"))
all_factors[, platform := factor(platform, levels = c("online/call", "rmove"))]
all_factors[, trip_type := factor(trip_type, levels = c("other", "school", "work"))]
setorderv(all_factors, c("trip_type", "platform"))
# Write dataset for reporting
fwrite(all_factors, file.path(settings$report_dir, "models", "trip_adjustment_factors.csv"))14.8.4 Review Adjustment Factors
14.8.4.1 Table: Trip weight adjustment factors by diary platform and trip type
Trip type | Diary platform | Modeled | Rescaled | Capped |
|---|---|---|---|---|
Other | Online/Call Center | 1.36 | 1.36 | 1.36 |
rMove | 1.00 | 1.00 | 1.00 | |
School | Online/Call Center | 0.89 | 1.00 | 1.00 |
rMove | 1.00 | 1.12 | 1.12 | |
Work | Online/Call Center | 1.63 | 1.63 | 1.63 |
rMove | 1.00 | 1.00 | 1.00 |
14.9 Adjust Trip Weights by Trip Type and Person Eligibility
In this step, factors are applied to trip weights produced in Round 2 based on adjustment eligibility and trip type.
# Change name in trip rate factor dataset to match
setnames(all_factors, "platform", "diary_binary", skip_absent = TRUE)
all_factors[, `:=`(
trip_type = as.character(trip_type),
diary_binary = as.character(diary_binary)
)]
# Create eligibility variables for reference and join to linked_trip_weights
fit_dt_all[, work_eligible := fcase(
employment_status %in% c("Full-time", "Part-time") & is_adult == 1, 1,
default = 0
)]
fit_dt_all[, school_eligible := fcase(
is_adult == 0 & has_proxy == 1, 1,
default = 0
)]
fit_dt_all[, other_eligible := fcase(
is_adult == 1, 1,
default = 0
)]
linked_trip_weights[fit_dt_all, `:=`(
work_eligible = i.work_eligible,
school_eligible = i.school_eligible,
other_eligible = i.other_eligible,
diary_binary = i.diary_binary
), on = c("day_id", "person_id", "hh_id")]
linked_trip_weights[trips_types, trip_type := i.trip_type, on = "linked_trip_id"]
# Join the trip rate factors to the trip table for broad models
linked_trip_weights[all_factors,
factor := i.capped,
on = c("diary_binary", "trip_type")]
# Apply factors
linked_trip_weights[trip_type == "work" & work_eligible == 1,
linked_trip_weight_round_3 := linked_trip_weight_round_2 * factor]
linked_trip_weights[trip_type == "work" & is.na(linked_trip_weight_round_3),
linked_trip_weight_round_3 := linked_trip_weight_round_2]
linked_trip_weights[trip_type == "other" & other_eligible == 1,
linked_trip_weight_round_3 := linked_trip_weight_round_2 * factor]
linked_trip_weights[trip_type == "other" & is.na(linked_trip_weight_round_3),
linked_trip_weight_round_3 := linked_trip_weight_round_2]
linked_trip_weights[trip_type == "school" & school_eligible == 1,
linked_trip_weight_round_3 := linked_trip_weight_round_2 * factor]
linked_trip_weights[trip_type == "school" & is.na(linked_trip_weight_round_3),
linked_trip_weight_round_3 := linked_trip_weight_round_2]
linked_trip_weights[trip_type == "loop", linked_trip_weight_round_3 := linked_trip_weight_round_2]
linked_trip_weights[, linked_trip_weight := linked_trip_weight_round_3]
linked_trip_weights[is.na(linked_trip_weight_round_3), linked_trip_weight_round_3 := 0]
fwrite(linked_trip_weights, file.path(settings$outputs_dir, "linked_trip_weights.csv"))
# Export person-day dataset
fwrite(fit_dt_all, file.path(report_dir, "models", paste0("fit_dt_all.csv")))14.10 Assign Round 3 Adjusted Linked Trip Weights to Corresponding Unlinked Trips
If linked trips are used to calculate adjustment factors, the adjusted linked trip weights are then mapped back to unlinked trips.
if ("linked_trip" %in% names(settings$weight_output_map)) {
# Unlinked trip weights will be the same as the linked trip weights
# Store as a new suffixed column, and as the canonical trip_weight column
trip_weights[linked_trip_weights, trip_weight_round_3 := i.linked_trip_weight_round_3, on = 'linked_trip_id']
trip_weights[, trip_weight := trip_weight_round_3]
# Assert no NA trip weights
stopifnot(trip_weights[is.na(trip_weight), .N] == 0)
} else {
cat("Skipping this step - linked trip data were not used.")
}14.11 Calculate New Tour Weights From Round 3 Adjusted Linked Trip Weights
If linked trips are used to calculate adjustment factors, this step re-calculates tour weights as the mean of the adjusted weights for the linked trips comprising each tour.
# Tour weights will be the mean of the linked trip weights that make up the tour
if ("tour" %in% names(settings$weight_output_map)) {
tour_weights = fread(
file.path(get('outputs_dir', settings), 'tour_weights.csv'),
colClasses = colclasses[c("hh_id", "person_id", "day_id", "tour_id")]
)
# Ensure id variables are all character - this sometimes gets reset throughout the process
linked_trip_weights[, `:=`(
hh_id = as.character(hh_id),
person_id = as.character(person_id),
day_id = as.character(day_id),
tour_id = as.character(tour_id)
)]
# Create round 3 variable or set to 0 if a re-run
tour_weights[, tour_weight_round_3 := NA_integer_]
# Calculate new tour weights based on linked trip weights
tour_weights[linked_trip_weights, tour_weight_round_3 :=
mean(linked_trip_weight_round_3[linked_trip_weight_round_3 != 0], na.rm = TRUE),
on = .(hh_id, person_id, day_id, tour_id)]
tour_weights[is.na(tour_weight_round_3), tour_weight_round_3 := 0]
tour_weights[, tour_weight := tour_weight_round_3]
} else {
cat("Skipping this step - either linked trip data were not used and/or tour data are not available.")
}14.12 Save Round 3 Weights
# Save weights =================================================================
weights_output = list(
hh_weights = hh_weights, # Updating to exclude rounds 1-3 columns
person_weights = person_weights, # Updating to exclude rounds 1-3 columns
day_weights = day_weights, # Updating to exclude rounds 1-3 columns
trip_weights = trip_weights
)
# Append linked trip weights if there are linked trips
if ("linked_trip" %in% names(settings$weight_output_map)) {
weights_output[['linked_trip']] = linked_trip_weights
}
# Append tour weights if there are tours
if ("tour" %in% names(settings$weight_output_map)) {
weights_output[['tour']] = tour_weights
}
# Save weights
for (wt_table in names(weights_output)) {
path = file.path(get('outputs_dir', settings), paste0(wt_table, '.csv'))
message(paste0('Writing ', wt_table, ' to ', path))
# Drop any column that isn't an ID or a weight column
id_cols = stringr::str_subset(names(weights_output[[wt_table]]), "(.+)_id$")
wt_cols = stringr::str_subset(names(weights_output[[wt_table]]), "(.+)_weight")
factor_cols = stringr::str_subset(names(weights_output[[wt_table]]), "(.+)_factor$")
keep_cols = c("day_group", id_cols, wt_cols, factor_cols)
weights_output[[wt_table]] = weights_output[[wt_table]][, ..keep_cols]
# Order column so the unsuffixed weight column is last
final_col = stringr::str_subset(wt_cols, '_weight$')
wt_order = c(id_cols, setdiff(wt_cols, final_col), final_col)
setcolorder(weights_output[[wt_table]], wt_order)
fwrite(weights_output[[wt_table]], file = path)
}14.13 Review Impact of Round 3 Weight Adjustments on Trip Rates
The below figures summarize how reported trip rates evolve across weighting steps by diary platform. Average daily trip rates are calculated among only those persons eligible for that type of adjustment in Round 3 (i.e., work trip rates are calculated amongst those who are employed). This is done to assess the impact of Round 3 adjustment on trip rate results. Trip rates with large confidence intervals may represent combinations with small sample sizes and should be interpreted with caution.
14.13.0.1 Figure: Average daily trip weight by weighting round and diary platform - Other trips

14.13.0.2 Figure: Average daily trip weight by weighting round and diary platform - School trips

14.13.0.3 Figure: Average daily trip weight by weighting round and diary platform - Work trips
