10  Round 3 Weighting: Trip Weight Adjustments

11 Overview

In the previous chapter, Round 2 household-level weights were produced, adjusting for day-pattern response bias. This script will start by using those weights to produce person- and day-level weights. These weights are applied to the trip table to generate unlinked (and, if available, linked trip and tour) trip weights. However, analysis of other household travel surveys has shown that the number of trips reported varies systematically by diary platform. To correct for this bias, this script applies a final stage of adjustment at the trip level, referred to as Round 3 weighting.

12 Chapter Setup

12.1 Load Packages, Settings, and Directory Pathways

# Load hts.weighting package
devtools::load_all()

# Load settings; pass python_env explicitly for Quarto
settings = get_settings(reload_settings = TRUE, 
                        print = FALSE)

# Set pathways for reference
code_root = get("code_root", settings)
work_dir = get('working_dir', settings)
inputs_dir = get('inputs_dir', settings)
outputs_dir = get('outputs_dir', settings)
report_dir = get('report_dir', settings)
popsim_initial_label = get("popsim_initial_label", settings)
popsim_daypat_label = get("popsim_daypat_label", settings)

13 Calculate Round 2 Person-, Day-, and Trip-level Weights

After Round 2 household-level weights are generated from PopulationSim, person- and day-level weights are calculated, and from this point foreward receive no further adjustments. These weights are then used to calculate the Round 2 unlinked and, if available, linked trip- and tour-level weights.

NoteKey Concept: How Are Person-, Day-, and Unlinked Trip-Level Weights Calculated?
  • Person-level weights: Each person is assigned their corresponding household-level weight for each weighted day group (weight_dow_groups) specified in the settings. If adjustment for unrelated household members is specified in the settings, their weights are set to 0 and the remaining household members have their weights adjusted up.
  • Day-level weights: The day weight is calculated as the person weight divided by the number of completed days in the household. Days that are not study days (i.e., not in specified day-of-week groups) and/or are incomplete are assigned a weight of 0.
  • Unlinked trip-level weights: Each unlinked trip is assigned their corresponding day weight. Unlinked trips that occurred on non-study days are set to 0. Synthetically produced trips are assigned a weight of 0 if weight_synthetic_trips in the settings is set to false.
NotePSRC Specifics
  • Day-of-week weights were not calculated for PSRC, so the day-level weight represents one average weekday for each person in the study. Thus, the sum of the trip-level weight represents the total trips in the region on an average weekday (Tuesday, Wednesday, and Thursday)./
  • Synthetic trips were not weighted.
NoteKey Concept: Linked Trip and Tour Weights

Linked trip and tour weights represent the average weight of their component trips (linked trips) or legs (tours). Linked trip weights are the average of the weighted unlinked trips that make up that linked trip.

# Load imputed household and person data, along with survey tables.
households = readRDS(file.path(work_dir, 'households_survey_imputed.rds'))
persons = readRDS(file.path(work_dir, 'persons_survey_imputed.rds'))

# Get codebook and other survey tables for value labels, days, and trips
value_labels = fetch_hts_table('value_labels', settings)
days = fetch_hts_table("day", settings)
trips = fetch_hts_table("trip", settings)

# Remove any records associated with dropped households to ensure data consistency
persons = persons[hh_id %in% households$hh_id]
days = days[hh_id %in% households$hh_id]
trips = trips[hh_id %in% households$hh_id]

# Organize weighting rounds and prepare to collect output weights for diagnostics
popsim_rounds = list(
  round_1 = get('popsim_initial_label', settings),
  round_2 = get('popsim_daypat_label', settings)
)

weights_list = list()

# This prepares a set of weights for each weighting round, for diagnostic purposes.
for (round_type in names(popsim_rounds)) {
  
    if (
        round_type == "round_2" &&
        ! get('daypat_weighting', settings)
        ) {
        message(paste0("Skipping ", round_type, " weights as 070_daypat_weighting.R was not run."))
        next
    }
    
    hh_weights_path = file.path(
        get('working_dir', settings),
        popsim_rounds[[round_type]],
        'output/final_summary_hh_weights.csv'
    )
    
    hh_weights_raw = fread(hh_weights_path, colClasses = c(hh_id_dow = 'character'))
    setnames(hh_weights_raw, 'zone_group_balanced_weight', 'hh_weight')

    # Add num_trips to days -------------------------------------------------------
    if (!("num_trips" %in% names(days))) {
        days[
        trips[, .N, keyby = .(day_id)],
        num_trips := i.N,
        on = .(day_id)
        ]
        days[is.na(num_trips), num_trips := 0]
    }
    
    # Check that hh_num_weekdays_complete is correct
    # Otherwise, weight totals will not match
    hh_complete_days = calc_complete_hhdays(households, days, settings, include_zeros = TRUE)

    # If only one day_group, remove dummy nonstudy_day
    if (length(get("weight_dow_groups", settings)) == 1) {
        hh_complete_days = hh_complete_days[day_group != 'nonstudy_day', ]
    }

    hh_weights = merge(
        hh_weights_raw,
        hh_complete_days[, .(hh_id_dow, n_complete_hh_days, diary_platform, day_group, hh_id)],
        by = "hh_id_dow",
        all = TRUE
    )
    hh_weights[is.na(hh_weight), hh_weight := 0]

    # Recalculate number of complete person days for each day_group
    day_groups = get_day_groups(settings)
    days[, person_id := as.character(person_id)]
    days[day_groups, day_group := i.day_group, on = .(travel_dow)]

    days[, n_complete_person_days := 0]
    days[is_complete == 1, n_complete_person_days := .N, by = .(person_id, day_group)]
    
    # All Online diaries should only have one complete day
    stopifnot(hh_weights[diary_platform != "rmove" & n_complete_hh_days != 1, .N] == 0)
    
    # Sum of weights should not change
    stopifnot(hh_weights[hh_weight > 0, .N] == hh_weights_raw[, .N])    
    
    # Calculate person weights -----------------------------------------------------
    hh_weights[households, `:=`
        (
            num_people = i.num_people,
            num_adults = i.num_adults
        ),
        on = .(hh_id)
    ]
    
    person_weights = calc_person_weights(
        hh_weights,    
        households,
        persons,    
        value_labels,
        settings
    )
    
    # Calculate day weights -------------------------------------------------------
    # days[, person_id := as.character(person_id)] # Can we delete this line?
    day_weights = calc_day_weights(
        person_weights,
        households,
        persons,
        days,
        trips,
        value_labels,
        settings
    )
    
    # Trip weights =================================================================
    trip_weights = calc_trip_weights(trips, day_weights, settings)

    # Cross-check for missing relationships, set to 0 if orphaned ------------------
    # Find common day_ids in days, trips
    common_day_ids = intersect(days$day_id, trips$day_id)
    common_person_ids = intersect(intersect(persons$person_id, days$person_id), trips$person_id)
    
    # Check for orphaned trips that don't have a day or person or household
    # This should not happen, but if it does, set the weight to 0
    trip_weights[
        !day_id %in% common_day_ids |
        !hh_id %in% households$hh_id |
        !person_id %in% common_person_ids,
        trip_weight := 0
    ]
    
    # If person level study, adjust the hh_weights by num_adults since the hh_weight is the person weight
    if (get("study_unit", settings) == "person") {
        hh_weights[, hh_weight := hh_weight / num_adults]
    }
    
    # Append to output weights -----------------------------------------------------
    weights_round = list(
        hh_weights = hh_weights[, .(hh_id, day_group, hh_weight)],
        person_weights = person_weights[, .(hh_id, person_id, day_group, person_weight)],
        day_weights = day_weights[, .(hh_id, person_id, day_id, day_group, day_weight)],
        trip_weights = trip_weights[,
            .(hh_id, person_id, day_id, trip_id, linked_trip_id, tour_id, day_group, trip_weight)
        ]
    )

    # Check that all nonstudy_day day_groups are 0 weight
    for (wt in weights_round) {
        wt_col = stringr::str_subset(names(wt), "weight$")
        if (wt[day_group == "nonstudy_day", sum(get(wt_col))] != 0) {
            stop(paste0("Nonstudy day group has non-zero weight in ", wt_col, " column."))
        }
    }

    # Linked trip and tour weights =================================================
    if ("linked_trip" %in% names(settings$hts_table_map)) {
        # Load linked trips
        linked_trips = fetch_hts_table("linked_trip", settings)

        # Filter out records for dropped households
        linked_trips = linked_trips[linked_trip_id %in% unique(trips$linked_trip_id), ]

        # Assign day_group
        linked_trips[day_weights, day_group := i.day_group, on = .(day_id)]
        stopifnot(all(!is.na(linked_trips$day_group)))

        # Calculate linked trip weights
        linked_trip_weights = calc_linked_trip_weights(
            linked_trips,
            unlinked_trips = trips,
            unlinked_trip_weights = trip_weights,
            day_weights,
            settings
        )

        # Append to output
        weights_round[['linked_trip_weights']] = linked_trip_weights[,
            .(hh_id, person_id, day_id, linked_trip_id, tour_id, day_group, linked_trip_weight)
        ]
    }
    
    
    if ("tour" %in% names(settings$hts_table_map)) {
        # Load tour data
        tours = fetch_hts_table("tour", settings)

        # Assign day_group
        linked_trips[day_weights, day_group := i.day_group, on = .(day_id)]
        stopifnot(all(!is.na(linked_trips$day_group)))

        # Filter out records for dropped households
        tours = tours[tour_id %in% unique(linked_trips$tour_id), ]
        
        # Calculate tour weights
        tour_weights = calc_tour_weights(
            tours,
            unlinked_trips = trips,
            unlinked_trip_weights = trip_weights,
            day_weights,
            settings
        )
        
        # Append to output
        weights_round[['tour_weights']] = tour_weights[,
            .(hh_id, person_id, day_id, tour_id, day_group, tour_weight)
        ]
        
    }

    weights_list[[round_type]] = weights_round
}

13.0.1 Save Round 2 Person-, Day-, and Trip-Level Weights

# Append weights with suffix and write to CSV ==============================================
# Unsuffixed weight is the "current" weight with the adjustments from the latest round
appended_weights_list = list()
for (round_name in names(weights_list)) {

    weights_round = weights_list[[round_name]]
    message(paste0("Appending weights from round: ", round_name))
    
    for (table_name in names(weights_round)) {
        # First round, just copy
        if (is.null(appended_weights_list[[table_name]])) {
            weight_comb = copy(weights_round[[table_name]])

            # Copy current weight to a suffixed column
            wt_col = stringr::str_subset(names(weight_comb), '_weight$')
            wt_col_adj = paste0(wt_col, '_', round_name)
            weight_comb[, (wt_col_adj) := get(wt_col)]

        } else {
            weight_prev = appended_weights_list[[table_name]]
            id_cols = c(
                stringr::str_subset(names(weight_prev), '_id$'),
                "day_group"
            )
            weight_comb = merge(
                weight_prev,
                weights_round[[table_name]],
                by = id_cols,
                suffixes = c('', paste0('_', round_name)),
                all = TRUE
            )

            # Updated canonical weight to the latest round
            wt_col = stringr::str_subset(names(weight_prev), '_weight$')
            wt_col_adj = paste0(wt_col, '_', round_name)
            weight_comb[, (wt_col) := get(wt_col_adj)]
        }

        # Check that the canonical weight matches the latest suffixed weight
        stopifnot(all(weight_comb[[wt_col]] == weight_comb[[wt_col_adj]], na.rm = TRUE))

        # Update the combined weight table
        appended_weights_list[[table_name]] = weight_comb

    }
}

# Write out combined weights for reference
for (weight_table in names(appended_weights_list)) {
    path = file.path(get('outputs_dir', settings), paste0(weight_table, '.csv'))
    message(paste0('Writing combined ', weight_table, ' to ', path))
    fwrite(appended_weights_list[[weight_table]], file = path)
}

# Save a dummy file to indicate 080 has run
write.table(
    "This is placeholder text. This file is used track the pipeline outputs.",
    file.path(get('working_dir', settings), ".unadjusted_weights"),
    row.names = FALSE, col.names = FALSE, quote = FALSE
)

14 Round 3 Weighting

14.1 Trip weight adjustment: Overview

Analyses of other RSG household travel surveys show that the number of trips reported varies systematically by diary platform. In particular, Online and Call Center respondents tend to under-report certain types of trips compared to respondents using the rMove smartphone app, which automatically records travel in the background. To correct for this bias, RSG adjusts trip-level weights using calculated correction factors, referred to as Round 3 weighting.

The adjustment procedure operates at the linked trip level, unless linked trips are unavailable in which case unlinked trips are used. Each linked trip is classified into a mutually exclusive trip type, and trip rate factors are then estimated and applied to linked trips. Unlinked trips and tours receive their weights from linked trips. To ensure that the adjustment models have sufficient sample size, produce stable factors across person types and diary platforms, and yield interpretable results, this process estimates trip adjustment models for broader classes of trip purposes rather than for each survey-reported purpose category. Using origin and destination purposes, linked trips were classified into 4 categories (work, school, other, and loop). Only trips eligible for that trip type (i.e., work trips among employed persons) are considered for trip bias adjustment.

NoteRound 3 Weighting: What You’ll Do
  1. Filter out unweighted records from each dataset.
  2. Categorize each linked trip (or if unavailable, unlinked trip) into mutually exclusive groups using the survey-reported trip purpose.
  3. Calculate the number of trips (overall and by trip type) for each person-day.
  4. Prepare the person-day dataset for modelling by assigning household- and person-level characteristics to each observation, which will serve as model covariates.
  5. Estimate Poisson regression models for each trip type, predicting the number of trips per person-day.
  6. Calculate correction factors by trip type by comparing predictions with and without diary platform coefficients.
  7. Adjust eligible trips using correction factors.
  8. If linked trip data are used in the steps above, calculate unlinked trip and tour weights using the new Round 3 adjusted weight.
  9. Review model fit/coefficients and trip rates by diary platform across each round of weighting for quality assurance.
TipIn development

Trip weight adjustment is actively under development. Planned updates are improvement in model fit by testing different model types and applying adjustments in PopulationSim. The goal of this work is to update models for accuracy and to not separate weight adjustments from the PopulationSim adjustment procedures.

14.2 Load Data

Load the household, person, day, and trip survey datasets and weights.

# Load value labels and imputed HTS data for households, persons, days, and trips
value_labels = fetch_hts_table('value_labels', settings)

# Load HTS data with imputations
households_raw = readRDS(file.path(work_dir, 'households_survey_imputed.rds'))
persons_raw = readRDS(file.path(work_dir, 'persons_survey_imputed.rds'))
days_raw = fetch_hts_table("day", settings)
trips_raw = fetch_hts_table("trip", settings)


# Set up column classes for joining and reading weights
colclasses = c(
    hh_id = 'character',
    person_id = 'character',
    day_id = 'character',
    trip_id = 'character',
    linked_trip_id = 'character',
    tour_id = 'character'
)

# Load the Round 2 weights ------
  hh_weights = fread(
    file.path(get('outputs_dir', settings), 'hh_weights.csv'),
    colClasses = colclasses[c('hh_id')]
  )
  person_weights = fread(
    file.path(get('outputs_dir', settings), 'person_weights.csv'),
    colClasses = colclasses[c('hh_id', 'person_id')]
  )
  day_weights = fread(
    file.path(get('outputs_dir', settings), 'day_weights.csv'),
    colClasses = colclasses[c('hh_id', 'person_id', 'day_id')]
  )
  
  header = names(fread(file.path(get('outputs_dir', settings), 'trip_weights.csv'), nrows = 0))
  trip_weights = fread(
    file.path(get('outputs_dir', settings), 'trip_weights.csv'),
    colClasses = colclasses[intersect(names(colclasses), header)]
  )

14.3 Data Cleaning

This section prepares the survey data by filtering out records without valid weights. Then, the number of unweighted trips per day using only weighted records, and weight variables are reset to the most recent round of weighting. For day-of-week weights, an average point estimate is generated for the household and person across survey days. The reset weights are then joined to the survey data, and diary platform information is appended to the unlinked trip weights to ensure consistency and readiness for subsequent analyses.

14.3.1 Filter Out Unweighted Records

Remove households and associated data with zero weights from all relevant datasets, ensuring only records with valid weights are retained.

# Find hh's with all zero weight days
zero_wt_hhs = hh_weights[, sum(hh_weight), by = hh_id][V1 == 0, hh_id]

# Drop all unweighted data
households = households_raw[!hh_id %in% zero_wt_hhs]
persons = persons_raw[person_id %in% person_weights[person_weight > 0, person_id]]
days = days_raw[day_id %in% day_weights[day_weight > 0, day_id]]
trips = trips_raw[trip_id %in% trip_weights[trip_weight > 0, trip_id]]

14.3.2 Recalculate Number of Trips in Day Table from Trip Table

Ensure that the number of trips in the day table reflects only weighted trips for consistency.

# Recalculate num_trips in day table from trip table to ensure consistency ------
days[
    trips[, .(num_trips = .N), by = .(day_id)],
    num_trips := i.num_trips,
    on = .(day_id)
]
day_weights[days, num_trips := i.num_trips, on = "day_id"]
day_weights[is.na(num_trips), num_trips := 0]

14.3.3 Reset Trip Weight to Previous Weighting Round and Drop All Dummy Weights

Reset trip weights to their most recent round of weighting, removing outdated or dummy weight columns, and verify weight sums for data integrity.

# Reset canonical trip weight to latest non-adjusted trip weight (e.g., round 1 or 2) ----
if ("trip_weight_round_3" %in% names(trip_weights)) {
    trip_weights[, trip_weight_round_3 := NULL]
}
if ('trip_weight_round_2' %in% names(trip_weights)) {
    trip_weights[, trip_weight := trip_weight_round_2]
} else {
    trip_weights[, trip_weight := trip_weight_round_1]
}
# Check that we didn't drop something we should have
stopifnot(
    all.equal(
        day_weights[, sum(day_weight * num_trips)],
        trip_weights[, sum(trip_weight)]
    )
)

# Drop dummy integer weights from survey tables -------------------------------
for (tbl_name in list("households", "persons", "days", "trips")) {
    dt = get(tbl_name)
    wt_col = grep('_weight$', names(dt), value = TRUE)
    if (length(wt_col) > 1) {
        stop("Multiple weight columns found in table ", tbl_name)
    } else if (length(wt_col) == 1) {
        dt[, (wt_col) := NULL]
    }
}

14.3.4 Calculate a Point Estimate Weight for Households/Persons Across Days

Compute average weights for households and persons across all days to provide point estimates for further analysis. If only one average weight across all days was calculated, the calculated weight will be the same.

# Calculate a point estimate weight for households and persons across days
hh_weights_avg = hh_weights[, .(hh_weight = mean(hh_weight)), by = .(hh_id)]
person_weights_avg = person_weights[, .(person_weight = mean(person_weight)), by = .(person_id)]

14.3.5 Join Weights to Survey Data

Attach the weights to their respective household, person, day, and trip tables based on unique identifiers.

# Join the weights to their respective tables using appropriate keys
households[hh_weights_avg, hh_weight := i.hh_weight, on = "hh_id"]
persons[person_weights_avg, person_weight := i.person_weight, on = "person_id"]
days[day_weights, day_weight := i.day_weight, on = "day_id"]
trips[trip_weights, trip_weight := i.trip_weight, on = "trip_id"]

14.3.6 Append Diary Platform Information to Trip Weights

Adds diary platform details to trip weights to support adjustment modeling.

# Add diary platform information to trip weights for adjustment modeling
trip_weights[households, diary_platform := i.diary_platform, on = "hh_id"]

14.4 Trip Type Assignment

Trips are classified using the reported origin and destination purposes in the survey data into four mutually exclusive categories:

  • Work: trips with one end at work or a work-related location.

  • School: trips with one end at school or a school-related location.

  • Other: all other trips excluding loop trips.

  • Loop: trips that start and end in the same location.

Loop trips are identified in the linked trip table but excluded from the adjustment procedures. Loop trips are typically made for walking the dog or exercising and are not typically modeled. They also tend to be represented more in rMove data than in other survey modes and therefore would obfuscate the trip adjustment procedure if included.

14.4.1 Prepare Trip Types Table

This section creates a table for trip type assignment, using linked trip data if available. The table is limited to just the variables needed for trip type assignment.

# Prepare trip types table, using linked trips if available, otherwise use unlinked trips
if ("linked_trip" %in% names(settings$hts_table_map)) {
    linked_trips = fetch_hts_table("linked_trip", settings)

    idcols = stringr::str_subset(
        names(fread(file.path(get('outputs_dir', settings), 'linked_trip_weights.csv'), nrows = 1)),
        "(.+)_id$"
    )

    # Same logic, if this is a re-run read in all rounds of weights.
    linked_trip_weights = fread(
        file.path(get('outputs_dir', settings), 'linked_trip_weights.csv'),
        colClasses = setNames(rep('character', length(idcols)), idcols)
    )
    
    # Limit to weighted households only
    linked_trips = linked_trips[hh_id %in% households$hh_id]
  
    # Create table for trip type assignment
    trips_types = copy(
        linked_trips[, .(
            hh_id,
            linked_trip_id,
            day_id,
            person_id,
            opcat = o_purpose_category,
            dpcat = d_purpose_category
            )
        ]
    )
    join_col = "linked_trip_id"
    
    # Join linked trip ID to trip weights
    trip_weights[trips_raw, linked_trip_id := i.linked_trip_id, on = "trip_id"]
    
    # Join linked trip weight and day group to trip types table
    trips_types[linked_trip_weights, `:=`(
                trip_weight = i.linked_trip_weight,
                day_group = i.day_group
                ), on = "linked_trip_id"]
    
    # Reset canonical linked trip weight to latest non-adjusted trip weight (e.g., daypat or initial) ----
    if ("linked_trip_weight_round_3" %in% names(linked_trip_weights)) {
      linked_trip_weights[, linked_trip_weight_round_3 := NULL]
    }
    
    if ('linked_trip_weight_round_2' %in% names(linked_trip_weights)) {
      linked_trip_weights[, linked_trip_weight := linked_trip_weight_round_2]
    } else {
      linked_trip_weights[, linked_trip_weight := linked_trip_weight_round_1]
    } 
  
    # Limit to weighted linked trips only
    linked_trips = linked_trips[linked_trip_id %in% linked_trip_weights[linked_trip_weight > 0, linked_trip_id]]
    
    
} else {

    # Make a copy of trips for the trip types table
    trips_types = copy(
        trips_raw[, .(
            hh_id, trip_id, day_id, person_id,
            opcat = o_purpose_category,
            dpcat = d_purpose_category
            )
        ]
    )
    
    join_col = "trip_id"
    trips_types[trip_weights, `:=`(
      trip_weight = i.trip_weight,
      day_group = i.day_group
      ), on = "trip_id"]
    
}

14.4.2 Categorize and Assign Trip Types

This step categorizes each trip into an aggregated trip type category based on its origin and destination purposes. This process ensures all trips receive an appropriate type, aggregates results as defined in settings, and merges trip type assignments back to the main trips table.

# Assign trip types to trip table
label_dt = value_labels[variable == "d_purpose_category", .(value, label = str_to_lower(label))]
pcat_work = label_dt[str_detect(label, "work"), value]
pcat_school = label_dt[str_detect(label, "school"), value]
pcat_home = label_dt[str_detect(label, "home"), value]
pcat_loop = 14L

# Identify loop trip IDs in trips table
measure_id = 'trip_id'
if ("linked_trip" %in% names(settings$hts_table_map)) measure_id = 'linked_trip_id'
loop_trip_ids = trips_raw[split_loop == 1 | split_loop_leg == 1, get(measure_id)]

# Assign trip type to each trip
trips_types[, trip_type_detailed := fcase(
  get(measure_id) %in% loop_trip_ids, "num_loop",
  (dpcat %in% pcat_loop), "num_loop",
  (opcat %in% pcat_home) & (dpcat %in% pcat_work), "num_work",
  (dpcat %in% pcat_home) & (opcat %in% pcat_work), "num_work",
  (opcat %in% pcat_home) & (dpcat %in% pcat_school), "num_school",
  (dpcat %in% pcat_home) & (opcat %in% pcat_school), "num_school",
  (opcat %in% pcat_home) & !(dpcat %in% c(pcat_work, pcat_school)), "num_other",
  (dpcat %in% pcat_home) & !(opcat %in% c(pcat_work, pcat_school)), "num_other",
  !(opcat %in% pcat_home) & (dpcat %in% pcat_work), "num_work",
  !(dpcat %in% pcat_home) & (opcat %in% pcat_work), "num_work",
  !(opcat %in% pcat_home) & (dpcat %in% pcat_school), "num_school",
  !(dpcat %in% pcat_home) & (opcat %in% pcat_school), "num_school",
  !(opcat %in% pcat_home) & !(dpcat %in% c(pcat_work, pcat_school)), "num_other",
  !(dpcat %in% pcat_home) & !(opcat %in% c(pcat_work, pcat_school)), "num_other"
)]

# Assign trip types to the trips_types table, aggregate as specified in settings
model_lhs = get("trip_rate_model_lhs", settings)
for (var in names(model_lhs)) {
    subtypes = if (is.null(model_lhs[[var]])) var else model_lhs[[var]]
    trips_types[trip_type_detailed %in% subtypes, trip_type := var]
}

# Check for NA
stopifnot(trips_types[is.na(trip_type_detailed), .N] == 0)

# Merge trips_types back onto linked_trips table and weights
linked_trips[trips_types, trip_type := i.trip_type, on = "linked_trip_id"]

# Add in diary platform to trips_types table
trips_types[households, diary_platform := diary_platform, on = "hh_id"]

# For consistency, limit trip types data to complete weighted observations
if ("linked_trip" %in% names(settings$hts_table_map)) {
trips_types = trips_types[linked_trip_id %in% linked_trip_weights[linked_trip_weight > 0, linked_trip_id] & 
                            hh_id %in% households$hh_id]
} else {
  trips_types = trips_types[trip_id %in% trip_weights[trip_weight > 0, trip_id] & 
                            hh_id %in% households$hh_id]
}

14.5 Calculate Number of Trips by Person-Day

This step calculates the number of trips for each person on weighted complete travel days. The day-level data is limited to completed weighted day observations and diary platform is appended to the dataset. The number of trips per day is then recalculated using the categorized trips table. Since the number of trips per day is calculated from the categorized trips table, missing observations for num_trips occur when there are no trips and thus are relabelled to 0s. The datasets are then saved for future reference in QAQC outputs.

# Get complete days and day of week
complete_days = days[
  hh_day_complete == 1 & travel_dow %in% unlist(get("weight_dow_groups", settings)),
  .(hh_id, person_id, day_id, travel_dow, num_trips, day_weight)
]
complete_days[get_day_groups(settings), day_group := i.day_group, on = "travel_dow"]

# Add in diary platform to complete_days table
complete_days[households, diary_platform := diary_platform, on = "hh_id"]

# Filter complete weekdays
trips_types_complete = trips_types[day_id %in% complete_days[, day_id], ]
n_trips = nrow(trips_types_complete)

# Recalculate the effective number of trips per day from trips table
# This means using linked trips when available, so it might be different from num_trips in days table
complete_days[
    trips_types[, .N, by = .(day_id)],
    num_trips := i.N,
    on = .(day_id)
]

# Fill NAs with zeros for no trips
setnafill(complete_days, cols = c("num_trips"), fill = 0)

# Write datasets to working directory for future reference
fwrite(trips_types_complete, 
       file.path(get("working_dir", settings), "trip_types_complete.csv"))

fwrite(complete_days, 
       file.path(get("working_dir", settings), "day_complete_days.csv"))

14.6 Prepare Data for Modelling

This step constructs a modeling dataset for each trip type containing the number of trips per person-day, filtered to those person-day records from eligible person-types. Eligibility criteria are determined using household- and person-level characteristics and appended to the person-day dataset here.

The eligibility criteria for trip weight adjustment is defined by trip type below:

  • Work trips: Adults who are employed full- or part-time.

  • School trips: Children who are proxy reported.

  • Other trips: All adults.

These eligibility rules define the subset of person-days that enter the trip rate models. Only the trip records associated with eligible person-type × trip-type combinations are used in estimation; and only eligible person-type × trip-type combinations receive adjustments. The majority of school trips are proxy-reported trips for children, so adults who report school trips are excluded. This is because that model would make different assumptions and the sample size of adults making school trips tends to be too low to produce stable estimates. In some cases, respondents may record a trip to work or school even when they do not report themselves as a student or employee. In these cases, the person’s signup survey information (employment status, student status) is prioritized over the person’s travel diary information, and trip adjustment procedures are restricted to the logical categories of people who should receive them. Trips made by those under 18 were not eligible for work and other trip adjustment.

This step also creates and appends the independent variables incorporated into the trip adjustment models. These models include independent variables that might affect how the type of diary people use relates to the number of trips they report. Daily trip rates serve as the dependent variables in the adjustment models.

NotePSRC Specifics

Due to small sample size of Call Center participants, Call Center and Online diary platform responses were combined into one category for modelling and trip weight adjustment.

Independent variables, their categorization, and reference categories for each model can be found in the table below.

Variable Categories Work model School model Other model
Diary platform
  • Online/Call center

  • rMove (Reference)

Household size
  • 1 (Reference)

  • 2

  • 3+

Number of kids in HH
  • 0 (Reference)

  • 1+

Household income
  • Under $50,000 (Reference)

  • $50,000 - $100,000

  • Above $100,000

Employment status
  • Unemployed (Reference: Other model)

  • Part-time (Reference: Work model)

  • Full-time

Works from home
  • Always (Reference)

  • Hybrid

  • Never

Age
  • 18-24 (Reference)

  • 25-64

  • 65+

Student status
  • Not a student (Reference)

  • Student

Work location regularly varies
  • Does not vary regularly (Reference)

  • Varies regularly

Educational attainment
  • Less than/completed high school/Vocational training (Reference)

  • Some college

  • Bachelor’s/Associate’s degree

  • Graduate degree

Proxy reporter’s employment status
  • Unemployed (Reference)

  • Part-time

  • Full-time

Proxy reporter works from home
  • Always (Reference)

  • Hybrid

  • Never

Proxy reporter’s gender
  • Male (Reference)

  • Female

Current school level
  • Cared for at home/Home-school (Reference)

  • Pre-k/Daycare

  • Elementary/Middle school

  • High-school/Vocational training

  • Bachelor’s/Associate’s/Graduate degree

## Pivot trip dataset to be day-level for merging
trip_type_counts = dcast(
  trips_types_complete[, .N, by = .(day_id, person_id, trip_type)],
  day_id ~ trip_type,
  value.var = "N",
  fill = 0
)

## For merge make sure id is character
trip_type_counts[, day_id := as.character(day_id)]
complete_days[, day_id := as.character(day_id)]

## Add "num_" prefix to column names of trip_type_counts
renamees = names(trip_type_counts)["day_id" != names(trip_type_counts)]
setnames(trip_type_counts, old = renamees, new = paste0("num_", renamees))

## Now merge that onto complete weekdays, filling days with no observations as 0 trips
trip_counts = merge(
  complete_days,
  trip_type_counts,
  by = "day_id", all.x = TRUE
)
setnafill(trip_counts, cols = str_subset(names(trip_counts), "num_"), fill = 0)

# some qaqc
trip_type_cols = paste0("num_", trips_types_complete[, unique(trip_type)])
trip_counts[, rowsums := rowSums(.SD), .SDcols = trip_type_cols]
stopifnot(trip_counts[num_trips != rowsums, .N] == 0)

# Write dataset to folder
fwrite(trip_counts, 
       file.path(get("working_dir", settings), "trip_counts.csv"))

14.6.0.1 Create Household-Level Model Variables

## Household-level variables for models-----------------------------------------
households <- as.data.table(households)

# Household size
households[, hh_size := fcase(
  num_people == 1, 1,
  num_people == 2, 2,
  num_people >= 3, 3,
  default = NA
)]

# Number of kids in HH
households[, n_kids := fcase(
  num_kids == 0, 0, # 0
  num_kids >= 1, 1, # 1+
  default = NA
)]

# Income categorized further
households[, income_cat := fcase(
  income_imputed %in% c(1,2), 1, # Under $49,999k
  income_imputed %in% c(3,4), 2, # $50k - $99,999k
  income_imputed %in% c(5,6), 3, # Above $100k
  default = NA
)]

# Merge household variables into model data ------------------------------------
fit_dt = merge(
  trip_counts, 
  households[, .(
    hh_id, hh_size, n_kids,
    income_imputed, income_imputed_label, income_cat
  )],
  by = "hh_id",
  all.x = TRUE  # Drop households with no trips.  They probably didn't have complete days 
)

14.6.0.2 Create Person-Level Model Variables

# Person-level variables for models --------------------------------------------
persons <- as.data.table(persons)

# Adults
persons[, is_adult := fcase(
  age > 3, 1,
  age <= 3, 0,
  default = NA
)]

# Employment status
persons[, employment_status := fcase(
  paid_work == 0  | employment == 5, 0, # Unemployed
  paid_work %in% c(1,2) & employment %in% c(1, 3) , 1, # Full-time
  paid_work %in% c(1,2) & employment %in% c(2, 8), 2, # Part-time
  paid_work == 995, NA,
  default = 0
)]
## If under 16, count as unemployed
persons[age < 3, employment_status := 0]

# Work from home
persons[employment_status %in% c(1,2), wfh := fcase(
  work_from_home == 0, 3, # Never
  work_from_home == 3, 1, # Always
  work_from_home %in% c(1,2), 2, # Hybrid
  default = NA
)]
# If unemployed, count as never works from home
persons[employment_status == 0, wfh := 3]

# Age categorized
persons[, age_cat_adults := fcase(
  age < 4, 0, # Under 18
  age == 4, 1, # 18-24
  age %in% c(5, 6, 7, 8), 2, # 25-64
  age > 8, 3, # 65+
  default = NA
)]

# Educational attainment
persons[, education_cat := fcase(
  education %in% c(1,2,4), 1, # Less than/Completed high school/Vocational training
  education == 3, 2, # Some college
  education %in% c(5,6), 3, # Bachelor's/Associate's degree
  education == 7, 4, # Graduate degree
  default = NA
)]

# Student status
persons[, is_student := fcase(
  student %in% c(0, 1, 3, 4), 1, # Current student (adult)
  student == 2, 0, # Not a student (adult)
  default = NA
)]

# Work location varies
persons[employment_status %in% c(1, 2), work_loc_varies := fcase(
  wfh == 1, 0, # works from home, does not vary
  wfh != 1 & job_type %in% c(2,3), 1, # Work location varies/delivery driver
  wfh != 1 & job_type == 1, 0, # Work location does not vary
  default = NA
)]
persons[employment_status == 0, work_loc_varies := 0]

# School type
persons[, school_cat := fcase(
  age < 4 & school_type %in% c(1, 4), 1, # Home schooled/cared for at home
  age < 4 & school_type == 2, 2, # Pre-k/daycare,
  age < 4 & school_type %in% c(5, 6), 3, # Elementary/Middle school
  age < 4 & school_type == 7, 4, # High-school/Vocational training
  age >= 3 & school_type %in% c(7, 10), 4, # High-school/Vocational training
  age >= 3 & school_type %in% c(11,13), 5, # Bachelor's/Associate's/Graduate
  age >= 3 & is_student == 0, 0, # Not in school
  default = NA  
)]

# Add in student status for those under 18
persons[age < 4, is_student := fcase(
  school_cat %in% c(2, 3, 4, 5), 1,
  school_cat == 1, 0,
  default = NA
)]

# Separate proxy reporters and create proxy reporter variables for school models
kp <- persons[is_proxy == 1]


kp[, `:=`(
  p_employment = employment_status,
  p_wfh = wfh,
  p_gender = gender_imputed
)]

# Use household ID to link proxy reporter information to persons who were proxy-reported
# in that household
persons[kp, `:=`(
  p_employment = i.p_employment,
  p_wfh = i.p_wfh,
  p_gender = i.p_gender
), on = "hh_id"]


# Merge person variables into model data ---------------------------------------
fit_dt = merge(
  fit_dt,
  persons[,
          .(
            person_id, is_adult, employment_status, wfh,
            age_cat_adults, education_cat, is_student, work_loc_varies,
            school_cat, p_employment, p_wfh, p_gender, has_proxy
          )],
  by = "person_id",
  all.x = TRUE
)

14.6.0.3 Create Trip Model Estimation Weights

This step calculates estimation weights for model fitting by scaling each record’s day weight relative to the average day weight, ensuring the total weights match the number of observations.

NotePurpose of the Estimation Weight

Estimation weights are used in Poisson regression to adjust for differing probabilities of selection or representation in the sample, making the model’s parameter estimates more reflective of the target population. Scaling ensures comparability and proper influence of each record in the model fitting process.

# Estimation weights -----------------------------------------------------------
# Scale the day weight to the dataset for models
fit_dt[, estimation_weight := day_weight / mean(day_weight)]

stopifnot(all.equal(fit_dt[, sum(estimation_weight)], nrow(fit_dt)))

14.6.0.4 Configure Variables for Models

To ensure the models are estimated correctly with easily understood outputs, this step converts each variable to a factor variable and explicitly defines the categorical levels.

# Factor vars for model --------------------------------------------------------

# Diary platform as a binary variable for model
fit_dt[, diary_binary := fcase(
  diary_platform %in% c("browser", "call"), "online/call",
  default = diary_platform
)]

# For the models, make variables factors and add labels; some extras in here for 
# testing with expanded categories
fit_dt[, `:=`(
  diary_platform = factor(diary_platform, 
                          levels = c("rmove", "browser", "call"),
                          ordered = FALSE),
  diary_binary = factor(diary_binary, 
                        levels = c("rmove", "online/call"),
                        ordered = FALSE),
  hh_size = factor(hh_size, 
                   levels = c(1, 2, 3), 
                   labels = c("1", "2", "3+"), 
                   ordered = FALSE),
  n_kids = factor(n_kids, 
                  levels = c(0, 1), 
                  labels = c("0", "1+"), 
                  ordered = FALSE),
  income_imputed = factor(income_imputed, 
                          levels = c(1,2,3,4,5,6), 
                          labels = c("$0-$24,999", "$25,000-$49,999", "$50,000-$74,999", 
                                     "$75,000-$99,999", "$100,000-$199,999", "$200,000+"), 
                          ordered = FALSE),
  employment_status = factor(employment_status, 
                             levels = c(0,1,2),
                             labels = c("Unemployed", "Full-time", "Part-time"),
                             ordered = FALSE),
  wfh = factor(wfh,
               levels = c(1,2,3),
               labels = c("Always", "Hybrid", "Never"),
               ordered = FALSE),
  age_cat_adults = factor(age_cat_adults,
                          levels = c(0,1,2,3),
                          labels = c("Under 18","18-24", "25-64", "65+"),
                          ordered = FALSE),
  education_cat = factor(education_cat,
                         levels = c(1,2,3,4),
                         labels = c("Less than/Completed high school/Vocational training",
                                    "Some college", "Bachelor's/Associate's degree", "Graduate degree"),
                         ordered = FALSE),
  is_student = factor(is_student,
                      levels = c(0, 1),
                      labels = c("Not a student", "Student"),
                      ordered = FALSE),
  work_loc_varies = factor(work_loc_varies,
                           levels = c(0, 1),
                           labels = c("No", "Yes"),
                           ordered = FALSE),
  income_cat = factor(income_cat,
                      levels = c(1, 2, 3),
                      labels = c("Under $50k", "$50-$100k", "Above $100k"),
                      ordered = FALSE),
  school_cat = factor(school_cat,
                      levels = c(0, 1, 2, 3, 4, 5),
                      labels = c("Not in school", "Home schooled/cared for at home", "Pre-k/Daycare",
                                 "Elementary/Middle school", "High-school/Vocational training", "Bachelor's/Associate's/Graduate"),
                      ordered = FALSE),
  p_employment = factor(p_employment, 
                        levels = c(0,1,2),
                        labels = c("Unemployed", "Full-time", "Part-time"),
                        ordered = FALSE),
  p_wfh = factor(p_wfh,
                 levels = c(1,2,3),
                 labels = c("Always", "Hybrid", "Never"),
                 ordered = FALSE),
  p_gender = factor(p_gender,
                    levels = c("male", "female"),
                    labels = c("Male", "Female"),
                    ordered = FALSE)
)]

14.6.1 Review Sample Size for Models

To ensure that model sample size is sufficient, review the number of person-days, adjustment-eligible trips, and total trips for each trip type (Work, School, Other, Total) across different diary platforms. This breakdown helps ensure sufficient data across platforms and trip categories, as well as the correct assignment of trip adjustment eligibility and trip categories before fitting models.

14.6.1.1 Table: Total number of person-days, adjustment-eligible trips, and total trips by trip type and diary platform

The table below outlines the number of person-days, adjustment eligible trips, and total trips by trip type and diary platform. This information outlines the number of trips eligible for adjustment for bias with respect to the total number of trips.

Trip type

Diary platform

Person-days

Adjustment-eligible trips

Total trips

Other

Online

4,255

8,048

8,826

Call Center

155

361

373

rMove

2,297

5,215

5,474

All platforms

6,707

13,624

14,673

School

Online

683

1,040

1,215

Call Center

9

14

16

rMove

179

251

368

All platforms

871

1,305

1,599

Work

Online

2,343

3,003

3,185

Call Center

48

59

63

rMove

1,549

2,689

2,816

All platforms

3,940

5,751

6,064

Total

Online

4,938

12,091

13,578

Call Center

164

434

462

rMove

2,476

8,155

9,337

All platforms

7,578

20,680

23,377

14.6.2 Prepare Model Formulas

Lastly, the variables for each model formula are read in from the settings. This enables code to be efficient and dynamic in producing models.

# Right-hand side of formula
model_rhs_vars = get("trip_rate_model_vars", settings)

model_lhs = get("trip_rate_model_lhs", settings)

# Left-hand side of formula
model_lhs_num = paste(paste0("num_", names(model_lhs)))


cli::cli_inform(c("Model formula: ", model_formula))
Error: object 'model_formula' not found

14.7 Estimate Models

This step estimates the trip adjustment Poisson regression models that predict the number of trips of each type per person-day. The dependent variables are trip counts by type (e.g., number of work trips). Model fit statistics are also calculated for QAQC.

model_ls = list()
trip_rate_factors_all = fit_dt[, .(hh_id, person_id, day_id, diary_binary)]
for (lhs in model_lhs_num) {
    if (lhs == "num_loop") next
    
    model_rhs_num = model_rhs_vars[[lhs]]
    model_rhs = paste("~", paste(model_rhs_num, collapse = " + "))
    
    # Build formula from the remaining terms
    fmla = as.formula(paste(lhs, model_rhs))

    cli::cli_inform(c("model formula: ", fmla))
    # TODO: parameterize
    # Restrict to appropriate sub-populations
    if (lhs %like% 'work') {fit_dt_est = fit_dt[is_adult == 1 & employment_status %in% c("Full-time", "Part-time")]
      } else if (lhs %like% 'school') {fit_dt_est = fit_dt[is_adult == 0 & has_proxy == 1]
      } else if (lhs %like% 'other') {fit_dt_est = fit_dt[is_adult == 1]
      }
    
    # Estimate model
    model = glm(
        fmla,
        data = fit_dt_est,
        weights = fit_dt_est[, estimation_weight],
        family = "poisson"
    )
    model_ls[[lhs]] = model
    model_desc = broom::tidy(model)
    
    # Prepare model results
    model_perf = as.data.table(performance::model_performance(model))
    r2 = t(model_perf)[, 1][grepl("R2", names(model_perf))]
    range = paste(paste(c('low:', 'high:'), round(range(fitted(model)), 4)), collapse = ", ")
    
    report_dir = get('report_dir', settings)
    model_name = gsub("num_", "model_", lhs)

    # Save model
    fwrite(model_desc, file.path(report_dir, "models", paste0(model_name, "_coef.csv")))
    fwrite(model_perf, file.path(report_dir, "models", paste0(model_name, "_fit.csv")))
    
}

14.7.1 Review Trip Weight Adjustment Models

The results of the trip rate adjustment models are shown in the tables below. P-values for individual variables may not always be significant, but the variables are retained in the model for estimation purposes. McFadden’s R2 is also presented below each table to represent the model fit.

14.7.1.1 Table: Other trip model estimates

Variable

Estimate

Std Error

T-Statistic

p.value

Model intercept

1.237

0.066

18.75

<0.001

-0.310

0.023

-13.73

<0.001

HH size: 2 (Ref: 1)

-0.029

0.029

-1.02

0.308

HH size: 3+ (Ref: 1)

0.015

0.034

0.43

0.664

HH income: $50-$100k (Ref: Under $50k)

0.058

0.028

2.09

0.036

HH income: Above $100k (Ref: Under $50k)

-0.092

0.027

-3.41

<0.001

1+ kids in HH (Ref: no kids)

0.331

0.026

12.71

<0.001

Employment status: Full-time (Ref: Unemployed or Part-time)

-0.698

0.025

-27.56

<0.001

Employment status: Part-time (Ref: Unemployed)

-0.353

0.028

-12.43

<0.001

Works from home: Hybrid (Ref: Always)

-0.264

0.035

-7.45

<0.001

Works from home: Never (Ref: Always)

-0.354

0.034

-10.31

<0.001

Age: 25-64 (Ref: 18-24)

0.319

0.043

7.39

<0.001

Age: 65+ (Ref: 18-24)

0.302

0.048

6.28

<0.001

Student (Ref: Not student)

-0.174

0.044

-3.96

<0.001

Education: Some college (Ref: Less than/complete high school/Vocational training)

0.226

0.032

7.15

<0.001

Education: Bachelor's/Associate's (Ref: Less than/complete high school/Vocational training)

0.291

0.026

11.32

<0.001

Education: Graduate degree (Ref: Less than/complete high school/Vocational training)

0.320

0.029

11.18

<0.001

McFadden's rho-squared: 0.273

14.7.1.2 Table: School trip model estimates

Variable

Estimate

Std Error

T-Statistic

p.value

Model intercept

-3.195

0.447

-7.15

<0.001

0.115

0.061

1.89

0.058

HH size: 3+ (Ref: 1)

-0.453

0.133

-3.39

<0.001

HH income: $50-$100k (Ref: Under $50k)

0.145

0.107

1.36

0.175

HH income: Above $100k (Ref: Under $50k)

0.457

0.099

4.64

<0.001

School level: Pre-k/Daycare (Ref: Home-schooled/cared for at home)

4.059

0.417

9.74

<0.001

School level: Elementary/Middle school (Ref: Home-schooled/cared for at home)

4.192

0.414

10.12

<0.001

School level: High-school/Vocational training (Ref: Home-school/cared for at home

4.261

0.416

10.25

<0.001

School level: Bachelor's/Associate's/Graduate degree (Ref: Home-school/cared for at home

2.717

0.814

3.34

<0.001

Proxy employment status: Full-time (Ref: Unemployed)

-0.340

0.058

-5.87

<0.001

Employment status: Part-time (Ref: Unemployed)

-0.252

0.074

-3.41

<0.001

Proxy works from home: Hybrid (Ref: Always)

-0.117

0.091

-1.28

0.2

Proxy works from home: Never (Ref: Always)

-0.295

0.092

-3.20

0.001

Proxy gender: Female (Ref: Male)

0.013

0.048

0.27

0.784

McFadden's rho-squared: 0.649

14.7.1.3 Table: Work trip model estimates

Variable

Estimate

Std Error

T-Statistic

p.value

Model intercept

-1.994

0.182

-10.93

<0.001

-0.486

0.032

-15.40

<0.001

HH size: 2 (Ref: 1)

-0.025

0.043

-0.57

0.57

HH size: 3+ (Ref: 1)

-0.010

0.053

-0.18

0.857

HH income: $50-$100k (Ref: Under $50k)

0.210

0.047

4.49

<0.001

HH income: Above $100k (Ref: Under $50k)

0.049

0.045

1.11

0.269

1+ kids in HH (Ref: no kids)

-0.010

0.041

-0.24

0.811

Employment status: Part-time (Ref: Unemployed)

-0.339

0.038

-9.03

<0.001

Works from home: Hybrid (Ref: Always)

2.791

0.168

16.59

<0.001

Works from home: Never (Ref: Always)

3.021

0.168

18.02

<0.001

Age: 25-64 (Ref: 18-24)

0.075

0.057

1.32

0.187

Age: 65+ (Ref: 18-24)

0.009

0.085

0.10

0.919

Student (Ref: Not student)

0.118

0.067

1.76

0.078

Work location varies regularly (Ref: Does not vary regurlarly)

0.406

0.029

13.81

<0.001

Education: Some college (Ref: Less than/complete high school/Vocational training)

-0.668

0.058

-11.48

<0.001

Education: Bachelor's/Associate's (Ref: Less than/complete high school/Vocational training)

-0.200

0.035

-5.76

<0.001

Education: Graduate degree (Ref: Less than/complete high school/Vocational training)

-0.273

0.040

-6.85

<0.001

McFadden's rho-squared: 0.449

14.8 Calculate Trip Weight Adjustment Factors

To isolate the effect of trip diary bias, and calculate bias adjustment factors, two sets of predictions are produced for each model:

  1. With diary platform coefficients included, reflecting observed differences between platforms.

  2. With diary platform coefficients set to zero, removing platform effects.

The ratio of these two predictions represent the modeled trip rate adjustment factor (see “Modeled” column). For example, if the model predicted 1.10 trips with diary platform effects and 1.32 without them, the resulting adjustment factor was 1.20 (1.32 ÷ 1.10).

Because rMove is specified as the reference platform, its modeled factor is always 1.0.

14.8.1 Calculate Initial Adjustment Factors from Models

# Calculate trip weight adjustment factors =====================================
trip_rate_factors_all = fit_dt[, .(hh_id, person_id, day_id, diary_binary)]

for (lhs in model_lhs_num) {
    if (lhs == "num_loop") next
  
    model = model_ls[[lhs]]
  
    # Restrict to appropriate sub-populations
    if (lhs %like% 'work')  {fit_dt_est = fit_dt[is_adult == 1 & employment_status %in% c("Full-time", "Part-time")]
      } else if (lhs %like% 'school') {fit_dt_est = fit_dt[is_adult == 0 & has_proxy == 1]
      }else if (lhs %like% 'other') fit_dt_est = fit_dt[is_adult == 1]
    
    bias_vars = stringr::str_subset(all.vars(formula(model)), "^diary_*")
    bias_vars = paste(bias_vars[bias_vars != "diary_rmove"], collapse = "|")

    model_coefs = coef(model)
    bias_idx = which(str_detect(names(model_coefs), bias_vars))

    # Make copy and set bias coefficients to 0
    model_corrected = model
    model_corrected$coefficients[bias_idx] = 0

    # Make trip count predictions
    fit_dt_est[, pred := predict(model, newdata = fit_dt_est, type = "response")]
    fit_dt_est[, pred_corrected := predict(model_corrected, newdata = fit_dt_est, type = "response")]

    # Calculate trip rate factor
    fit_dt_est[, trip_rate_factor := pred_corrected / pred]
    fit_dt_est[, .N, keyby = .(diary_binary, round(trip_rate_factor, 2))]

    # Clean up
    response_var = all.vars(formula(model))[1]
    newname = str_replace(response_var, "num", "trip_rate_factor")

    setnames(fit_dt_est, "trip_rate_factor", newname)
      
    # Save individual factors
    rate_factor_name_i = names(fit_dt_est)[names(fit_dt_est) %like% 'trip_rate_factor']
    factor_cols = c(
      "hh_id",  "person_id", "day_id", "diary_binary",
      # names(trip_rate_factors_all),
      rate_factor_name_i
    )
    factors_i = fit_dt_est[, .SD, .SDcols = factor_cols]
    
    # Join, fill missing
    trip_rate_factors_all = merge(
      trip_rate_factors_all,
      factors_i,
      by = c("hh_id", "person_id", "day_id", "diary_binary"),
      all.x = TRUE
    )
    setnafill(trip_rate_factors_all, "const", fill = 1.0, cols = rate_factor_name_i)
}

#fit_dt_est = merge(
fit_dt_all = merge(
  fit_dt,
  trip_rate_factors_all,
  by = c("hh_id", "person_id", "day_id", "diary_binary"), #"day_group"),
  all.x = TRUE
)

14.8.2 Rescale and Cap Adjustment Factors

After the first calculation, the modeled factors are rescaled so that the minimum factor for each trip type is set to 1.0. This step enforces the working assumption that platform differences reflect under-reporting rather than over-reporting: if a platform’s raw factor fell below 1.0, that value is reset to 1.0 and all other platform factors are scaled upward relative to it. As a result, rMove’s factor could remain at 1.0 or increase above 1.0, depending on the relative positioning of other platforms. To avoid extreme corrections, factors are then capped at 2.0.

trip_rate_cols = grep("trip_rate_factor_", names(fit_dt_all), value = TRUE)

# Make a dataset with just the ids and factors
estimate_dt_all = copy(
  fit_dt_all[, mget(c("hh_id", "person_id", "day_id", "diary_binary", trip_rate_cols))]
)

# Melt dataset for reporting trip adjustment factors by diary platform and trip type
rate_factor_dt = data.table::melt(
  estimate_dt_all[, c("diary_binary", trip_rate_cols), with = FALSE],
  measure.vars = trip_rate_cols,
  id.vars = 'diary_binary', 
  variable.name = 'trip_type', 
  value.name = 'factor'
)
# Make factor column names just the trip type
rate_factor_dt[, trip_type := stringr::str_replace(trip_type, 'trip_rate_factor_', '')]

# Remove duplicate factors
raw_factors = dcast(rate_factor_dt[(diary_binary == "online/call" & factor != 1) | diary_binary == "rmove"], 
                    diary_binary ~ trip_type, 
                    value.var = "factor", 
                    fun.aggregate = mean, na.rm = TRUE)

modelled_factors = dcast(rate_factor_dt[(diary_binary == "online/call" & factor != 1) | diary_binary == "rmove"], 
                         trip_type ~ diary_binary, 
                         value.var = "factor", 
                         fun.aggregate = mean, na.rm = TRUE)

# Rescale the factors if rmove if not the minimum factor
weight_adj_dt = rate_factor_dt
weight_adj_dt[, ref_factor := min(factor, na.rm=TRUE), by = .(trip_type)]
weight_adj_dt[, rescaled_factor := factor / ref_factor]

# Reshape back to original format
rescaled_factors = dcast(weight_adj_dt[(diary_binary == "online/call" & factor != 1) | diary_binary == "rmove"], 
                         trip_type ~ diary_binary, 
                         value.var = "rescaled_factor", 
                         fun.aggregate = mean, na.rm = TRUE )

hard_cap = 2.0 # Default hard cap for trip rate factors
if ("trip_rate_factor_cap" %in% names(settings)) {
    hard_cap = as.numeric(settings$trip_rate_factor_cap)    
}

# Cap any factors over 2
capped_factors = copy(rescaled_factors)
capped_factors[`online/call` > 2, `online/call` := 2]

14.8.3 Export Adjustment Factors

# Combine into one dataset
all_factors <- rbindlist(list("modeled" = modelled_factors, 
                              "rescaled" = rescaled_factors, 
                              "capped" = capped_factors),
                         idcol = "processing_step")

all_factors <- melt(all_factors, id.vars = c("processing_step", "trip_type"),
                    value.name = "factor", variable.name = "platform")

all_factors <- dcast(all_factors, trip_type + platform ~ processing_step)

setcolorder(all_factors, c("trip_type", "platform", "modeled", "rescaled", "capped"))
all_factors[, platform := factor(platform, levels = c("online/call", "rmove"))]
all_factors[, trip_type := factor(trip_type, levels = c("other", "school", "work"))]

setorderv(all_factors, c("trip_type", "platform"))

# Write dataset for reporting
fwrite(all_factors, file.path(settings$report_dir, "models", "trip_adjustment_factors.csv"))

14.8.4 Review Adjustment Factors

14.8.4.1 Table: Trip weight adjustment factors by diary platform and trip type

Trip type

Diary platform

Modeled

Rescaled

Capped

Other

Online/Call Center

1.36

1.36

1.36

rMove

1.00

1.00

1.00

School

Online/Call Center

0.89

1.00

1.00

rMove

1.00

1.12

1.12

Work

Online/Call Center

1.63

1.63

1.63

rMove

1.00

1.00

1.00

14.9 Adjust Trip Weights by Trip Type and Person Eligibility

In this step, factors are applied to trip weights produced in Round 2 based on adjustment eligibility and trip type.

# Change name in trip rate factor dataset to match
setnames(all_factors, "platform", "diary_binary", skip_absent = TRUE)
all_factors[, `:=`(
  trip_type = as.character(trip_type),
  diary_binary = as.character(diary_binary)
  )]

# Create eligibility variables for reference and join to linked_trip_weights
fit_dt_all[, work_eligible := fcase(
  employment_status %in% c("Full-time", "Part-time") & is_adult == 1, 1,
  default = 0
)]
fit_dt_all[, school_eligible := fcase(
  is_adult == 0 & has_proxy == 1, 1,
  default = 0
)]
fit_dt_all[, other_eligible := fcase(
  is_adult == 1, 1,
  default = 0
)]

linked_trip_weights[fit_dt_all, `:=`(
  work_eligible = i.work_eligible,
  school_eligible = i.school_eligible,
  other_eligible = i.other_eligible,
  diary_binary = i.diary_binary
), on = c("day_id", "person_id", "hh_id")]

linked_trip_weights[trips_types, trip_type := i.trip_type, on = "linked_trip_id"]

# Join the trip rate factors to the trip table for broad models
linked_trip_weights[all_factors,
                    factor := i.capped,
                    on = c("diary_binary", "trip_type")]

# Apply factors
linked_trip_weights[trip_type == "work" & work_eligible == 1,
                    linked_trip_weight_round_3 := linked_trip_weight_round_2 * factor]
linked_trip_weights[trip_type == "work" & is.na(linked_trip_weight_round_3),
                    linked_trip_weight_round_3 := linked_trip_weight_round_2]

linked_trip_weights[trip_type == "other" & other_eligible == 1,
                    linked_trip_weight_round_3 := linked_trip_weight_round_2 * factor]
linked_trip_weights[trip_type == "other" & is.na(linked_trip_weight_round_3),
                    linked_trip_weight_round_3 := linked_trip_weight_round_2]

linked_trip_weights[trip_type == "school" & school_eligible == 1,
                    linked_trip_weight_round_3 := linked_trip_weight_round_2 * factor]
linked_trip_weights[trip_type == "school" & is.na(linked_trip_weight_round_3),
                    linked_trip_weight_round_3 := linked_trip_weight_round_2]

linked_trip_weights[trip_type == "loop", linked_trip_weight_round_3 := linked_trip_weight_round_2]

linked_trip_weights[, linked_trip_weight := linked_trip_weight_round_3]
linked_trip_weights[is.na(linked_trip_weight_round_3), linked_trip_weight_round_3 := 0]

fwrite(linked_trip_weights, file.path(settings$outputs_dir, "linked_trip_weights.csv"))

# Export person-day dataset
fwrite(fit_dt_all, file.path(report_dir, "models", paste0("fit_dt_all.csv")))

14.10 Assign Round 3 Adjusted Linked Trip Weights to Corresponding Unlinked Trips

If linked trips are used to calculate adjustment factors, the adjusted linked trip weights are then mapped back to unlinked trips.

if ("linked_trip" %in% names(settings$weight_output_map)) {

# Unlinked trip weights will be the same as the linked trip weights
# Store as a new suffixed column, and as the canonical trip_weight column
trip_weights[linked_trip_weights, trip_weight_round_3 := i.linked_trip_weight_round_3, on = 'linked_trip_id']
trip_weights[, trip_weight := trip_weight_round_3]

# Assert no NA trip weights
stopifnot(trip_weights[is.na(trip_weight), .N] == 0)
} else {
  cat("Skipping this step - linked trip data were not used.")
}

14.11 Calculate New Tour Weights From Round 3 Adjusted Linked Trip Weights

If linked trips are used to calculate adjustment factors, this step re-calculates tour weights as the mean of the adjusted weights for the linked trips comprising each tour.

# Tour weights will be the mean of the linked trip weights that make up the tour
if ("tour" %in% names(settings$weight_output_map)) {
  
  tour_weights = fread(
    file.path(get('outputs_dir', settings), 'tour_weights.csv'),
    colClasses = colclasses[c("hh_id", "person_id", "day_id", "tour_id")]
  )
  
  # Ensure id variables are all character - this sometimes gets reset throughout the process
  linked_trip_weights[, `:=`(
    hh_id = as.character(hh_id),
    person_id = as.character(person_id),
    day_id = as.character(day_id),
    tour_id = as.character(tour_id)
  )]
  
  # Create round 3 variable or set to 0 if a re-run
  tour_weights[, tour_weight_round_3 := NA_integer_]
  
  # Calculate new tour weights based on linked trip weights
  tour_weights[linked_trip_weights, tour_weight_round_3 := 
                 mean(linked_trip_weight_round_3[linked_trip_weight_round_3 != 0], na.rm = TRUE), 
               on = .(hh_id, person_id, day_id, tour_id)]
  tour_weights[is.na(tour_weight_round_3), tour_weight_round_3 := 0]
  tour_weights[, tour_weight := tour_weight_round_3]
  
} else {
  cat("Skipping this step - either linked trip data were not used and/or tour data are not available.")
}

14.12 Save Round 3 Weights

# Save weights =================================================================
weights_output = list(
  hh_weights = hh_weights, # Updating to exclude rounds 1-3 columns
  person_weights = person_weights, # Updating to exclude rounds 1-3 columns
  day_weights = day_weights, # Updating to exclude rounds 1-3 columns
  trip_weights = trip_weights
)

# Append linked trip weights if there are linked trips
if ("linked_trip" %in% names(settings$weight_output_map)) {
  weights_output[['linked_trip']] = linked_trip_weights
}
# Append tour weights if there are tours
if ("tour" %in% names(settings$weight_output_map)) {
  weights_output[['tour']] = tour_weights
}

# Save weights
for (wt_table in names(weights_output)) {
  
  path = file.path(get('outputs_dir', settings), paste0(wt_table, '.csv'))
  
  message(paste0('Writing ', wt_table, ' to ', path))
  
  # Drop any column that isn't an ID or a weight column
  id_cols = stringr::str_subset(names(weights_output[[wt_table]]), "(.+)_id$")
  wt_cols = stringr::str_subset(names(weights_output[[wt_table]]), "(.+)_weight")
  factor_cols = stringr::str_subset(names(weights_output[[wt_table]]), "(.+)_factor$")
  keep_cols = c("day_group", id_cols, wt_cols, factor_cols)
  weights_output[[wt_table]] = weights_output[[wt_table]][, ..keep_cols]
  
  # Order column so the unsuffixed weight column is last
  final_col = stringr::str_subset(wt_cols, '_weight$')
  wt_order = c(id_cols, setdiff(wt_cols, final_col), final_col)
  setcolorder(weights_output[[wt_table]], wt_order)
  
  fwrite(weights_output[[wt_table]], file = path)
  
}

14.13 Review Impact of Round 3 Weight Adjustments on Trip Rates

The below figures summarize how reported trip rates evolve across weighting steps by diary platform. Average daily trip rates are calculated among only those persons eligible for that type of adjustment in Round 3 (i.e., work trip rates are calculated amongst those who are employed). This is done to assess the impact of Round 3 adjustment on trip rate results. Trip rates with large confidence intervals may represent combinations with small sample sizes and should be interpreted with caution.

14.13.0.1 Figure: Average daily trip weight by weighting round and diary platform - Other trips

14.13.0.2 Figure: Average daily trip weight by weighting round and diary platform - School trips

14.13.0.3 Figure: Average daily trip weight by weighting round and diary platform - Work trips