library(dml.sensemakr)
#> See details in:
#> - Chernozhukov, V. Cinelli, C. Newey, W. Sharma, A. Syrgkanis, V. (2026). Long Story Short: Omitted Variable Bias in Causal Machine Learning. Review of Economics and Statistics.
#> - Available at: https://doi.org/10.1162/REST.a.1705Vignette under construction.
data("Penn")
y <- Penn[, "inuidur1"]
d <- Penn[, "tg"]
x <- model.matrix(~ -1 + female + black + othrace + dep + q2 + q3 + q4 + q5 + q6 + agelt35 +
agegt54 + durable + lusd + husd, data = Penn)
# ranger
dml.ranger <- dml(y, d, x, model = "plm")
#> Warning: The reference category for the variable 'dep' contains no
#> observations.
#> Debiased Machine Learning
#>
#> Model: Partially Linear
#> Target: ate
#> Cross-Fitting: 5 folds, 1 reps
#> ML Method: outcome (yreg0:ranger, yreg1:ranger), treatment (ranger)
#> Tuning: dirty
#>
#>
#> ====================================
#> Tuning parameters using all the data
#> ====================================
#>
#> - Tuning Model for D.
#> -- Best Tune:
#> mtry min.node.size splitrule
#> 1 5 5 variance
#>
#> - Tuning Model for Y (partially linear).
#> -- Best Tune:
#> mtry min.node.size splitrule
#> 1 5 5 variance
#>
#>
#> ======================================
#> Repeating 5-fold cross-fitting 1 times
#> ======================================
#>
#> -- Rep 1 -- Folds: 1 2 3 4 5
summary(dml.ranger)
#>
#> Debiased Machine Learning
#>
#> Model: Partially Linear
#> Cross-Fitting: 5 folds, 1 reps
#> ML Method: outcome (yreg0:ranger, yreg1:ranger, R2 = 0%), treatment (ranger, R2 = 0%)
#> Tuning: dirty
#>
#> Average Treatment Effect:
#>
#> Estimate Std. Error t value P(>|t|)
#> ate.all -0.07415 0.03567 -2.078 0.0377 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> Note: DML estimates combined using the median method.
#>
#> Verbal interpretation of DML procedure:
#>
#> -- Average treatment effects were estimated using DML with 5-fold cross-fitting. In order to reduce the variance that stems from sample splitting, we repeated the procedure 1 times. Estimates are combined using the median as the final estimate, incorporating variation across experiments into the standard error as described in Chernozhukov et al. (2018). The outcome regression uses from the R package ; the treatment regression uses Random Forest from the R package ranger.
# Neural Net Args
nnet.args <- list(method = "nnet",
tuneGrid = data.frame(size = 2, decay = 0.02),
maxit = 1000, maxNWts = 10000)
# nnet
dml.ranger <- dml(y, d, x, model = "plm", reg = nnet.args)
#> Warning: The reference category for the variable 'dep' contains no
#> observations.
#> Debiased Machine Learning
#>
#> Model: Partially Linear
#> Target: ate
#> Cross-Fitting: 5 folds, 1 reps
#> ML Method: outcome (yreg0:nnet, yreg1:nnet), treatment (nnet)
#> Tuning: dirty
#>
#>
#> ====================================
#> Tuning parameters using all the data
#> ====================================
#>
#> - Tuning Model for D.
#> -- Best Tune:
#> size decay
#> 1 2 0.02
#>
#> - Tuning Model for Y (partially linear).
#> -- Best Tune:
#> size decay
#> 1 2 0.02
#>
#>
#> ======================================
#> Repeating 5-fold cross-fitting 1 times
#> ======================================
#>
#> -- Rep 1 -- Folds: 1 2 3 4 5
summary(dml.ranger)
#>
#> Debiased Machine Learning
#>
#> Model: Partially Linear
#> Cross-Fitting: 5 folds, 1 reps
#> ML Method: outcome (yreg0:nnet, yreg1:nnet, R2 = 1.248%), treatment (nnet, R2 = 0%)
#> Tuning: dirty
#>
#> Average Treatment Effect:
#>
#> Estimate Std. Error t value P(>|t|)
#> ate.all -0.07067 0.03537 -1.998 0.0457 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> Note: DML estimates combined using the median method.
#>
#> Verbal interpretation of DML procedure:
#>
#> -- Average treatment effects were estimated using DML with 5-fold cross-fitting. In order to reduce the variance that stems from sample splitting, we repeated the procedure 1 times. Estimates are combined using the median as the final estimate, incorporating variation across experiments into the standard error as described in Chernozhukov et al. (2018). The outcome regression uses from the R package ; the treatment regression uses Neural Network from the R package nnet.