coef, se, confint, print and summary methods for DML

The print and summary methods provide descriptions of the results obtained with the function dml.

The coef function extracts the coefficients.

The se function extracts the standard errors.

The confint function extracts the standard errors.

# S3 method for dml
summary(object, combine.method = "median", ...)

# S3 method for dml
coef(object, combine.method = "median", ...)

se(object, ...)

# S3 method for dml
se(object, combine.method = "median", ...)

# S3 method for dml
confint(object, params = NULL, level = 0.95, combine.method = "median", ...)

# S3 method for summary_dml
print(x, digits = max(3L, getOption("digits") - 3L), interpret = T, ...)

# S3 method for dml
print(
  x,
  digits = max(3L, getOption("digits") - 3L),
  combine.method = "median",
  ...
)

Arguments

object: an object of class dml.
combine.method: method to combine the results of each repetition of the DML fit. Options are mean and median. Default is median.
...: arguments passed to other methods.
params: character vector with the names of parameters.
level: confidence level. Default is 0.95.
x: an object of class dml.
digits: minimal number of significant digits.

Examples

# loads package
library(dml.sensemakr)

## loads data
data("pension")

# set the outcome
y <- pension$net_tfa  # net total financial assets

# set the treatment
d <- pension$e401    # 401K eligibility

# set the covariates (a matrix)
x <- model.matrix(~ -1 + age + inc  + educ+ fsize + marr + twoearn + pira + hown, data = pension)

## compute income quartiles for group ATE.
g1 <- cut(x[,"inc"], quantile(x[,"inc"], c(0, 0.25,.5,.75,1), na.rm = TRUE),
          labels = c("q1", "q2", "q3", "q4"), include.lowest = T)

# run DML (nonparametric model)
## 2 folds (change as needed)
## 1 repetition (change as needed)
dml.401k <- dml(y, d, x, model = "npm", groups = g1, cf.folds = 2, cf.reps = 1)
#> Debiased Machine Learning
#> 
#>  Model: Nonparametric 
#>  Target: ate 
#>  Cross-Fitting: 2 folds, 1 reps 
#>  ML Method: outcome (ranger), treatment (ranger)
#>  Tuning: dirty 
#> 
#> 
#> ====================================
#> Tuning parameters using all the data
#> ====================================
#> 
#> - Tuning Model for D.
#> -- Best Tune:
#>   mtry min.node.size splitrule
#> 1    2             5  variance
#> 
#> - Tuning Model for Y (non-parametric).
#> -- Best Tune:
#>   mtry min.node.size splitrule
#> 1    3             5  variance
#> 
#> 
#> ======================================
#> Repeating 2-fold cross-fitting 1 times
#> ======================================
#> 
#> -- Rep 1 -- Folds: 1  2  
#> 


summary(dml.401k)
#> 
#> Debiased Machine Learning
#> 
#>  Model: Nonparametric 
#>  Cross-Fitting: 2 folds, 1 reps 
#>  ML Method: outcome (ranger, R2 = 25.532%), treatment (ranger, R2 = 10.749%)
#>  Tuning: dirty 
#> 
#> Average Treatment Effect: 
#> 
#>         Estimate Std. Error t value  P(>|t|)    
#> ate.all     8143       1229   6.628 3.41e-11 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Group Average Treatment Effect: 
#> 
#>         Estimate Std. Error t value  P(>|t|)    
#> gate.q1   4782.1      914.6   5.228 1.71e-07 ***
#> gate.q2   2858.5     1222.4   2.338 0.019366 *  
#> gate.q3   7092.4     1980.3   3.581 0.000342 ***
#> gate.q4  17835.6     4224.2   4.222 2.42e-05 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Note: DML estimates combined using the median method.
#> 
#> Verbal interpretation of DML procedure:
#> 
#> -- Average treatment effects were estimated using DML with 2-fold cross-fitting. In order to reduce the variance that stems from sample splitting, we repeated the procedure 1 times. Estimates are combined using the median as the final estimate, incorporating variation across experiments into the standard error as described in Chernozhukov et al. (2018). The outcome regression uses Random Forest from the R package ranger; the treatment regression uses Random Forest from the R package ranger.
summary(dml.401k, combine.method = "mean")
#> 
#> Debiased Machine Learning
#> 
#>  Model: Nonparametric 
#>  Cross-Fitting: 2 folds, 1 reps 
#>  ML Method: outcome (ranger, R2 = 25.532%), treatment (ranger, R2 = 10.749%)
#>  Tuning: dirty 
#> 
#> Average Treatment Effect: 
#> 
#>         Estimate Std. Error t value  P(>|t|)    
#> ate.all     8143       1229   6.628 3.41e-11 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Group Average Treatment Effect: 
#> 
#>         Estimate Std. Error t value  P(>|t|)    
#> gate.q1   4782.1      914.6   5.228 1.71e-07 ***
#> gate.q2   2858.5     1222.4   2.338 0.019366 *  
#> gate.q3   7092.4     1980.3   3.581 0.000342 ***
#> gate.q4  17835.6     4224.2   4.222 2.42e-05 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Note: DML estimates combined using the mean method.
#> 
#> Verbal interpretation of DML procedure:
#> 
#> -- Average treatment effects were estimated using DML with 2-fold cross-fitting. In order to reduce the variance that stems from sample splitting, we repeated the procedure 1 times. Estimates are combined using the median as the final estimate, incorporating variation across experiments into the standard error as described in Chernozhukov et al. (2018). The outcome regression uses Random Forest from the R package ranger; the treatment regression uses Random Forest from the R package ranger.
coef(dml.401k)
#>   ate.all   gate.q1   gate.q2   gate.q3   gate.q4 
#>  8142.648  4782.077  2858.477  7092.441 17835.622 
coef(dml.401k, combine.method = "mean")
#>   ate.all   gate.q1   gate.q2   gate.q3   gate.q4 
#>  8142.648  4782.077  2858.477  7092.441 17835.622 
se(dml.401k)
#>   ate.all   gate.q1   gate.q2   gate.q3   gate.q4 
#> 1228.5995  914.6267 1222.4003 1980.3223 4224.2183 
confint(dml.401k, combine.method = "mean")
#>             2.5 %    97.5 %
#> ate.all 5734.6374 10550.659
#> gate.q1 2989.4417  6574.712
#> gate.q2  462.6164  5254.338
#> gate.q3 3211.0806 10973.801
#> gate.q4 9556.3062 26114.938