Source code for sensemakr.sensitivity_plots

"""
Provides sensitivity contour plots and extreme scenario sensitivity plots.

They can be used on an object of class `Sensemakr`, directly in an OLS `statsmodel,`
or by providing the required statistics manually.
"""
# Code for producing sensitivity contour plots and other plots
import matplotlib.pyplot as plt
from . import sensitivity_statistics
from . import sensitivity_bounds
from . import bias_functions
import sys
import numpy as np
from scipy.stats import t
import pandas as pd


plot_env = {'lim': 0.4, 'lim_y': 0.4, 'reduce': None, 'sensitivity_of': None, 'treatment': None}
# plot_env_ext = {'lim': 0.4, 'lim_y': 0.4, 'reduce': None, 'treatment': None}
plt.rcParams['figure.dpi'] = 96
plt.rcParams['savefig.dpi'] = 300

[docs] def ovb_contour_plot(sense_obj=None, sensitivity_of='estimate', model=None, treatment=None, estimate=None, se=None, dof=None, benchmark_covariates=None, kd=1, ky=None, r2dz_x=None, r2yz_dx=None, bound_label=None, reduce=True, estimate_threshold=0, t_threshold=2, lim=None, lim_y=None, col_contour="black", col_thr_line="red", label_text=True, label_bump_x=None, label_bump_y=None, xlab=None, ylab=None, plot_margin_fraction=0.05, round_dig=3, n_levels=None): r""" Contour plots of omitted variable bias for sensitivity analysis. The main inputs are a statsmodel object, the treatment variable and the covariates used for benchmarking the strength of unobserved confounding. The horizontal axis of the plot shows hypothetical values of the partial R2 of the unobserved confounder(s) with the treatment. The vertical axis shows hypothetical values of the partial R2 of the unobserved confounder(s) with the outcome. The contour levels represent the adjusted estimates (or t-values) of the treatment effect. The reference points are the bounds on the partial R2 of the unobserved confounder if it were k times "as strong" as the observed covariates used for benchmarking (see arguments kd and ky). The dotted red line show the chosen critical threshold (for instance, zero): confounders with such strength (or stronger) are sufficient to invalidate the research conclusions. All results are exact for single confounders and conservative for multiple/nonlinear confounders. See Cinelli and Hazlett (2020) for details. Parameters ---------- sense_obj : sensemakr object a sensemakr object to plot. sensitivity_of : string either "estimate" or "t-value". (Default value = 'estimate'). model : statsmodels OLSResults object a fitted statsmodels OLSResults object. treatment : string a string with the name of the "treatment" variable, e.g. the independent variable of interest. estimate : float a float with the estimate of the coefficient for the independent variable of interest. se : float a float with the standard error of the regression. dof : int an int with the degrees of freedom of the regression. benchmark_covariates : string or list of strings a string or list of strings with the names of the variables to use for benchmarking. kd : float or list of floats a float or list of floats. Parameterizes how many times stronger the confounder is related to the treatment in comparison to the observed benchmark covariate. Default value is 1 (confounder is as strong as benchmark covariate). ky : float or list of floats a float or list of floats. Parameterizes how many times stronger the confounder is related to the outcome in comparison to the observed benchmark covariate. Default value is the same as kd. r2dz_x : float or list of floats a float or list of floats. Hypothetical partial R2 of unobserved confounder Z with treatment D, given covariates X. r2yz_dx : float or list of floats a float or list of floats. Hypothetical partial R2 of unobserved confounder Z with outcome Y, given covariates X and treatment D. bound_label : string label of the bound variable. reduce: boolean whether to reduce (True, default) or increase (False) the estimate due to putative confounding, default is True. estimate_threshold : float threshold line to emphasize when contours correspond to estimate, default is 0. t_threshold : float threshold line to emphasize when contours correspond to t-value, default is 2. lim : float x axis maximum. lim_y : float y axis maximum. col_contour : string of color color of the contour line, default is "black". col_thr_line : string of color color of the threshold line, default is "red". label_text : boolean whether to include label text. label_bump_x : float x-axis position of label above 0. label_bump_y : float y-axis position of label above 0. xlab : string x-axis label text. ylab : string y-axis label text. plot_margin_fraction : float margin fraction added to the top of lim and lim_y. round_dig : int rounding digit of the display numbers, default is 3. n_levels : int maximum number of countours in the plot. Return ------- plot a contour plot of omitted variable bias for the corresponding model/sense_obj. Examples -------- >>> # Load example dataset: >>> import sensemakr as smkr >>> darfur = smkr.load_darfur() >>> # Fit a statsmodels OLSResults object ("fitted_model") >>> import statsmodels.formula.api as smf >>> model = smf.ols(formula='peacefactor ~ directlyharmed + age + farmer_dar + herder_dar + pastvoted + hhsize_darfur + female + village', data=darfur) >>> fitted_model = model.fit() >>> # Contours directly from OLS object >>> ## Plot contour of the fitted model with directlyharmed as treatment and "female" as benchmark_covariates. >>> smkr.ovb_contour_plot(model=fitted_model,treatment='directlyharmed',benchmark_covariates='female') >>> ## Plot contour of the fitted model with directlyharmed as treatment and "female" as benchmark_covariates kd=[1,2,3] >>> smkr.ovb_contour_plot(model=fitted_model,treatment='directlyharmed',benchmark_covariates='female',kd=[1,2,3]) >>> ## Plot contour of the fitted model with manual benchmark >>> smkr.ovb_contour_plot(model=fitted_model,treatment='directlyharmed',r2dz_x=0.1) >>> # Contours from Sensemakr object >>> sensitivity = smkr.Sensemakr(fitted_model, treatment = "directlyharmed", benchmark_covariates = "female", kd = [1, 2, 3]) >>> smkr.ovb_contour_plot(sense_obj=sensitivity, sensitivity_of='estimate') """ if sensitivity_of not in ["estimate", "t-value"]: sys.exit('Error: "sensitivity_of" argument is required and must be "estimate" or "t-value".') if sense_obj is not None: # treatment, estimate, se, dof, r2dz_x, r2yz_dx, bound_label, reduce, thr, t_thr treatment, estimate, se, dof, r2dz_x, r2yz_dx, bound_label, reduce, estimate_threshold, t_threshold,benchmark_covariates, kd,ky = \ extract_from_sense_obj(sense_obj) elif model is not None and treatment is not None: estimate, se, dof, r2dz_x, r2yz_dx = extract_from_model( model, treatment, benchmark_covariates, kd, ky, r2dz_x, r2yz_dx) elif estimate is None or se is None or dof is None: sys.exit('Error: must provide a Sensemakr object, a statsmodels OLSResults object and treatment, or' 'an estimate, standard error, and degrees of freedom.') estimate, r2dz_x, r2yz_dx, lim, lim_y, label_bump_x, label_bump_y= check_params( estimate, r2dz_x, r2yz_dx, lim, lim_y, label_bump_x, label_bump_y) plot_env['lim'] = lim plot_env['lim_y'] = lim_y plot_env['reduce'] = reduce plot_env['sensitivity_of'] = sensitivity_of plot_env['treatment'] = treatment grid_values_x = np.arange(0, lim, lim / 400) grid_values_y = np.arange(0, lim_y, lim_y / 400) bound_value = None if sensitivity_of == 'estimate': # call adjusted_estimate using grid values as r2dz_x and r2yz_dx as well as passed-in estimate, se, and dof z_axis = [[bias_functions.adjusted_estimate(grid_values_x[j], grid_values_y[i], estimate=estimate, se=se, dof=dof) for j in range(len(grid_values_x))] for i in range(len(grid_values_y))] threshold = estimate_threshold plot_estimate = estimate if r2dz_x is not None: bound_value = bias_functions.adjusted_estimate(r2dz_x, r2yz_dx, estimate=estimate, se=se, dof=dof, reduce=reduce) else: # sensitivity_of is 't-value' # call adjusted_estimate using grid values as r2dz_x and r2yz_dx as well as passed-in estimate, se, and dof z_axis = [[bias_functions.adjusted_t(grid_values_x[j], grid_values_y[i], estimate=estimate, se=se, dof=dof, reduce=reduce, h0=estimate_threshold) for j in range(len(grid_values_x))] for i in range(len(grid_values_y))] threshold = t_threshold plot_estimate = (estimate - estimate_threshold) / se if r2dz_x is not None: bound_value = bias_functions.adjusted_t(r2dz_x, r2yz_dx, estimate=estimate, se=se, dof=dof, reduce=reduce, h0=estimate_threshold) # TODO: see which of these params we want to include in function args list fig, ax = plt.subplots(1, 1, figsize=(6, 6)) # Make n_levels maximum number of contour if(n_levels): n_levels=n_levels-1 # draw all contours CS = ax.contour(grid_values_x, grid_values_y, z_axis, colors=col_contour, linewidths=1.0, linestyles="solid", levels=n_levels) # remove contour line at threshold level round_thr = round(threshold, 0) cs_levels = CS.levels.tolist() if round_thr in cs_levels: threshold_index = cs_levels.index(round_thr) CS.collections[threshold_index].remove() # There's a conflict in matplotlib 3.8.2 with this remove method and ax.clabel # I'm currently commenting the line ax.clabel out but maybe there's a different way to do remove #ax.clabel(CS, inline=1, fontsize=8, fmt="%1.3g", colors="gray", levels=np.delete(CS.levels, threshold_index)) else: ax.clabel(CS, inline=1, fontsize=8, fmt="%1.3g", colors="gray", levels=CS.levels) # draw red critical contour line CS = ax.contour(grid_values_x, grid_values_y, z_axis, colors=col_thr_line, linewidths=1.0, linestyles=[(0, (7, 3))], levels=[threshold]) ax.clabel(CS, inline=1, fontsize=8, fmt="%1.3g", colors="gray") # Plot point for unadjusted estimate / t_statistic ax.scatter([0], [0], c='k', marker='^') ax.annotate("Unadjusted\n({:1.3f})".format(plot_estimate), (0.0 + label_bump_x, 0.0 + label_bump_y)) # Plot labeling and limit-setting if xlab is None: xlab = r"Partial $R^2$ of confounder(s) with the treatment" if ylab is None: ylab = r"Partial $R^2$ of confounder(s) with the outcome" plt.xlabel(xlab) plt.ylabel(ylab) plt.xlim(-(lim / 15.0), lim) plt.ylim(-(lim_y / 15.0), lim_y) # add bounds if r2dz_x is not None: r2dz_x, r2yz_dx = sensitivity_statistics.check_r2(r2dz_x, r2yz_dx) if(np.isscalar(kd)): kd=[kd] if(ky is None): ky=kd if bound_label is None: bound_label=[] for i in range(len(kd)): bound_label.append(sensitivity_bounds.label_maker(benchmark_covariate=benchmark_covariates, kd=kd[i], ky=ky[i])) if(np.isscalar(r2dz_x)): bound_label.append(sensitivity_bounds.label_maker(benchmark_covariate=None, kd=1, ky=1)) elif(len(r2dz_x)>len(bound_label)): for i in range(len(r2dz_x)-len(kd)): bound_label.append(sensitivity_bounds.label_maker(benchmark_covariate=None, kd=1, ky=1)) add_bound_to_contour(r2dz_x=r2dz_x, r2yz_dx=r2yz_dx, bound_value=bound_value, bound_label=bound_label, sensitivity_of=sensitivity_of, label_text=label_text, label_bump_x=label_bump_x, label_bump_y=label_bump_y, round_dig=round_dig) # add margin to top and right side of plot x_plot_margin = plot_margin_fraction * lim y_plot_margin = plot_margin_fraction * lim_y x0, x1, y0, y1 = plt.axis() plt.axis((x0, x1 + x_plot_margin, y0, y1 + y_plot_margin)) plt.tight_layout()
[docs] def add_bound_to_contour(model=None, benchmark_covariates=None, kd=1, ky=None, reduce=None, treatment=None, bounds=None, r2dz_x=None, r2yz_dx=None, bound_value=None, bound_label=None, sensitivity_of=None, label_text=True, label_bump_x=None, label_bump_y=None, round_dig=3): r""" Add bound label to the contour plot of omitted variable bias for sensitivity analysis. The main inputs are a statsmodel object, the treatment variable and the covariates used for benchmarking the strength of unobserved confounding. The reference points are the bounds on the partial R2 of the unobserved confounder if it were k times ''as strong'' as the observed covariate used for benchmarking (see arguments kd and ky). Parameters ---------- model : statsmodels OLSResults object a fitted statsmodels OLSResults object for the restricted regression model you have provided. benchmark_covariates : string a string or list of strings with the names of the variables to use for benchmark bounding. kd : float or list of floats a float or list of floats with each being a multiple of the strength of association between a benchmark variable and the treatment variable to test with benchmark bounding (Default value = 1). ky : float or list of floats same as kd except measured in terms of strength of association with the outcome variable. reduce : boolean whether to reduce (True, default) or increase (False) the estimate due to putative confounding. treatment : string a string with the name of the "treatment" variable, e.g. the independent variable of interest. bounds : pandas dataframe A pandas dataframe with bounds on the strength of confounding according to some benchmark covariates, as computed by the function ovb_bounds. r2dz_x : float or list of floats a float or list of floats with the partial R^2 of a putative unobserved confounder "z" with the treatment variable "d", with observed covariates "x" partialed out, as implied by z being kd-times as strong as the benchmark_covariates. r2yz_dx : float or list of floats a float or list of floats with the partial R^2 of a putative unobserved confounder "z" with the outcome variable "y", with observed covariates "x" and the treatment variable "d" partialed out, as implied by z being ky-times as strong as the benchmark_covariates. bound_value : float the value of the reference point. bound_label : string a string that label the reference point. sensitivity_of : string either "estimate" or "t-value". label_text : boolean whether to include label text. label_bump_x : float x-axis position of label above 0. label_bump_y : float y-axis position of label above 0. round_dig : int rounding digit of the display numbers, default=3. Return ------- add on existing plot add a bound label to the existing contour plot. Examples -------- >>> # Load example dataset: >>> import sensemakr as smkr >>> darfur = smkr.load_darfur() >>> # Fit a statsmodels OLSResults object ("fitted_model"): >>> import statsmodels.formula.api as smf >>> model = smf.ols(formula='peacefactor ~ directlyharmed + age + farmer_dar + herder_dar + pastvoted + hhsize_darfur + female + village', data=darfur) >>> fitted_model = model.fit() >>> # Runs sensemakr for sensitivity analysis >>> sensitivity = smkr.Sensemakr(fitted_model, treatment = "directlyharmed", benchmark_covariates = "female", kd = [1, 2, 3]) >>> # Plot contour of the fitted model with directlyharmed as treatment and "female" as benchmark_covariates. >>> smkr.ovb_contour_plot(model=fitted_model,treatment='directlyharmed',benchmark_covariates='female') >>> # Add bound to contour. >>> smkr.add_bound_to_contour(model=fitted_model,treatment='directlyharmed',benchmark_covariates='female',kd=[2,3]) """ if ((model is None or benchmark_covariates is None) and bounds is None and (r2dz_x is None or r2yz_dx is None)): sys.exit('Error: add_bound_to_contour requires either a statsmodels OLSResults object and names of benchmark ' 'covariates, or a Pandas DataFrame with bounding information, ' 'or partial R^2 parameters r2dz_x and r2yz_dx.') if plot_env['reduce'] is None: sys.exit('Error: must have a current contour plot before adding bounds.') if treatment is None: treatment = plot_env['treatment'] if sensitivity_of is None: sensitivity_of = plot_env['sensitivity_of'] if label_bump_x is None: label_bump_x = plot_env['lim'] / 30.0 if label_bump_y is None: label_bump_y = plot_env['lim_y'] /30.0 if reduce is None: reduce = plot_env['reduce'] if(np.isscalar(kd)): kd=[kd] if(ky is None): ky=kd if(np.isscalar(ky)): ky=[ky] if model is not None: if treatment != plot_env['treatment']: print('Warning: treatment variable provided does not equal treatment of previous contour plot.') bounds = sensitivity_bounds.ovb_bounds(model=model, treatment=treatment, benchmark_covariates=benchmark_covariates, kd=kd, ky=ky, adjusted_estimates=True, reduce=reduce) if sensitivity_of == 'estimate': bound_value = bounds['adjusted_estimate'].copy() else: bound_value = bounds['adjusted_t'].copy() if bound_label is None: bound_label = bounds['bound_label'].copy() if model is None: if (bounds is not None) and (bound_label is None): bound_label=list(bounds['bound_label']) if bounds is not None: r2dz_x = bounds['r2dz_x'] r2yz_dx = bounds['r2yz_dx'] if np.isscalar(r2dz_x): r2dz_x = [r2dz_x] if np.isscalar(r2yz_dx): r2yz_dx = [r2yz_dx] if np.isscalar(bound_value): bound_value = [bound_value] for i in range(len(r2dz_x)): plt.scatter(r2dz_x[i], r2yz_dx[i], c='red', marker='D', edgecolors='black') if label_text: if(np.isscalar(bound_label)): bound_label=[bound_label] if (bound_value is not None) and (bound_label is not None): bound_value[i] = round(bound_value[i], round_dig) label = str(bound_label[i]) + '\n(' + str(bound_value[i]) + ')' else: label = bound_label[i] plt.annotate(label, (r2dz_x[i] + label_bump_x, r2yz_dx[i] + label_bump_y))
[docs] def ovb_extreme_plot(sense_obj=None, model=None, treatment=None, estimate=None, se=None, dof=None, benchmark_covariates=None, kd=1,ky=None, r2dz_x=None, r2yz_dx=[1, 0.75, 0.5], reduce=True, threshold=0, lim=None, lim_y=None, xlab=None, ylab=None): r""" Extreme scenario plots of omitted variable bias for sensitivity analysis. The main inputs are a statsmodel object, the treatment variable and the covariates used for benchmarking the strength of unobserved confounding. The horizontal axis shows the partial R2 of the unobserved confounder(s) with the treatment. The vertical axis shows the adjusted treatment effect estimate. The partial R2 of the confounder with the outcome is represented by different curves for each scenario, as given by the parameter r2yz_dx. The red marks on horizontal axis are bounds on the partial R2 of the unobserved confounder kd times as strong as the covariates used for benchmarking. The dotted red line represent the threshold for the effect estimate deemed to be problematic (for instance, zero). See Cinelli and Hazlett (2020) for details. Parameters ---------- sense_obj : sensemakr object a sensemakr object. model : statsmodels OLSResults object a fitted statsmodels OLSResults object for the restricted regression model you have provided. treatment : string a string with the name of the "treatment" variable, e.g. the independent variable of interest. estimate : float a float with the estimate of the coefficient for the independent variable of interest. se : float a float with the standard error of the regression. dof : float an int with the degrees of freedom of the regression. benchmark_covariates : string or list of strings a string or list of strings with the names of the variables to use for benchmark bounding. kd : float or list of floats a float or list of floats with each being a multiple of the strength of association between a benchmark variable and the treatment variable to test with benchmark bounding (Default value = 1). ky : float or list of floats same as kd except measured in terms of strength of association with the outcome variable. r2dz_x : float or list of floats a float or list of floats with the partial R^2 of a putative unobserved confounder "z" with the treatment variable "d", with observed covariates "x" partialed out, as implied by z being kd-times as strong as the benchmark_covariates. r2yz_dx : float or list of floats a float or list of floats with the partial R^2 of a putative unobserved confounder "z" with the outcome variable "y", with observed covariates "x" and the treatment variable "d" partialed out, as implied by z being ky-times as strong as the benchmark_covariates, default=[1,0.75,0.5]. reduce : boolean whether to reduce (True, default) or increase (False) the estimate due to putative confounding, default=True. threshold : float threshold line to emphasize when drawing estimate, default=0. lim : float range of x-axis. lim_y : float range of y-axis. xlab : string x-axis label text. ylab : string y-axis label text. Return -------- plot an extreme value plot of omitted variable bias for the corresponding model/sense_obj. Examples -------- >>> # Load example dataset: >>> import sensemakr as smkr >>> darfur = smkr.load_darfur() >>> # Fit a statsmodels OLSResults object ("fitted_model"): >>> import statsmodels.formula.api as smf >>> model = smf.ols(formula='peacefactor ~ directlyharmed + age + farmer_dar + herder_dar + pastvoted + hhsize_darfur + female + village', data=darfur) >>> fitted_model = model.fit() >>> # Runs sensemakr for sensitivity analysis >>> sensitivity = smkr.Sensemakr(fitted_model, treatment = "directlyharmed", benchmark_covariates = "female", kd = [1, 2, 3]) >>> # Plot extreme value of the fitted model with directlyharmed as treatment and "female" as benchmark_covariates. >>> smkr.ovb_extreme_plot(model=fitted_model,treatment='directlyharmed',benchmark_covariates='female') >>> # Plot extreme value of the fitted model with directlyharmed as treatment and "female" as benchmark_covariates kd=[1,2]. >>> smkr.ovb_extreme_plot(model=fitted_model,treatment='directlyharmed',benchmark_covariates='female',kd=[1,2]) >>> # Plot extreme value of the fitted model with manual benchmark >>> smkr.ovb_extreme_plot(model=fitted_model,treatment='directlyharmed',r2dz_x=0.1) >>> # Plot extreme value of the sensemakr object >>> smkr.ovb_extreme_plot(sense_obj=sensitivity) """ if sense_obj is not None: # treatment, estimate, se, dof, r2dz_x, r2yz_dx, bound_label, reduce, thr, t_thr treatment, estimate, se, dof, r2dz_x, dum, bound_label, reduce, estimate_threshold, t_threshold,benchmark_covariates,kd,ky = \ extract_from_sense_obj(sense_obj) elif model is not None and treatment is not None: estimate, se, dof, r2dz_x, dum = extract_from_model( model, treatment, benchmark_covariates, kd, None, r2dz_x, r2yz_dx) elif estimate is None or se is None or dof is None: sys.exit('Error: must provide a Sensemakr object, a statsmodels OLSResults object and treatment, or' 'an estimate, standard error, and degrees of freedom.') estimate, r2dz_x,r2yz_dx,lim = check_params_extreme( estimate, r2dz_x, r2yz_dx, lim) r2d_values = np.arange(0, lim, 0.001) fig, ax = plt.subplots(1, 1, figsize=(8, 4.8)) for i in range(len(r2yz_dx)): y=bias_functions.adjusted_estimate(r2d_values, r2yz_dx[i], estimate=estimate, se=se, dof=dof) # Initialize Plot if(i==0): ax.plot(r2d_values,y,label='%s%%' % int(round(r2yz_dx[i]*100)),linewidth=1.5, linestyle="solid",color='black') ax.axhline(y=threshold,color='r',linestyle='--') lim_y1=np.max(y)+np.abs(np.max(y))/15 lim_y2=np.min(y)-np.abs(np.min(y))/15 # Add rugs if(r2dz_x is not None): if(np.isscalar(r2dz_x)): r2dz_x=[r2dz_x] for rug in r2dz_x: ax.axvline(x=rug,ymin=0, ymax=0.022,color='r',linewidth=2.5, linestyle="solid") else: ax.plot(r2d_values,y,label='%s%%' % int(round(r2yz_dx[i]*100)),linewidth=np.abs(2.1-0.5*i), linestyle="--",color='black') # Set font size params = {'axes.labelsize': 12, 'axes.titlesize': 12, 'legend.title_fontsize':12, 'legend.fontsize':12, 'xtick.labelsize':12, 'ytick.labelsize':12} plt.rcParams.update(params) ax.legend(ncol=len(r2yz_dx),frameon=False) ax.get_legend().set_title(r"Partial $R^2$ of confounder(s) with the outcome") ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) # Plot labeling and limit-setting if xlab is None: xlab = r"Partial $R^2$ of confounder(s) with the treatment" if ylab is None: ylab = r"Adjusted effect estimate" plt.xlabel(xlab) plt.ylabel(ylab) plt.xlim(-(lim / 35.0), lim+(lim / 35.0)) if lim_y is None: plt.ylim(lim_y2 , lim_y1) else: plt.ylim(-(lim_y / 15.0), lim_y) plt.tight_layout()
# Extracts sensitivity and bounding parameters from a given Sensemakr object def extract_from_sense_obj(sense_obj): """ A helper function to extract parameters from sensemakr object. Parameters ---------- sense_obj : sensemakr object sensemakr object Returns ------- """ treatment = sense_obj.treatment estimate = sense_obj.estimate q = sense_obj.q reduce = sense_obj.reduce alpha = sense_obj.alpha se = sense_obj.se dof = sense_obj.dof benchmark_covariates=sense_obj.benchmark_covariates kd=sense_obj.kd ky=sense_obj.ky if reduce: thr = estimate * (1 - q) else: thr = estimate * (1 + q) t_thr = abs(t.ppf(alpha / 2, dof - 1)) * np.sign(sense_obj.sensitivity_stats['t_statistic']) if sense_obj.bounds is None: r2dz_x = None r2yz_dx = None bound_label = "" else: r2dz_x = sense_obj.bounds['r2dz_x'] r2yz_dx = sense_obj.bounds['r2yz_dx'] bound_label = sense_obj.bounds['bound_label'] return treatment, estimate, se, dof, r2dz_x, r2yz_dx, bound_label, reduce, thr, t_thr, benchmark_covariates,kd,ky # Extracts estimate, standard error, degrees of freedom, and parial R^2 values from a specified model+treatment pair def extract_from_model(model, treatment, benchmark_covariates, kd, ky, r2dz_x, r2yz_dx): """ A helper function to extract parameters from model. Parameters ---------- model : statsmodels OLSResults object a fitted statsmodels OLSResults object for the restricted regression model you have provided. treatment : string a string with the name of the "treatment" variable, e.g. the independent variable of interest. benchmark_covariates : string or list of strings a string or list of strings with the names of the variables to use for benchmark bounding. kd : float or list of floats a float or list of floats with each being a multiple of the strength of association between a benchmark variable and the treatment variable to test with benchmark bounding (Default value = 1). ky : float or list of floats same as kd except measured in terms of strength of association with the outcome variable. r2dz_x : float or list of floats a float or list of floats with the partial R^2 of a putative unobserved confounder "z" with the treatment variable "d", with observed covariates "x" partialed out, as implied by z being kd-times as strong as the benchmark_covariates. r2yz_dx : float or list of floats a float or list of floats with the partial R^2 of a putative unobserved confounder "z" with the outcome variable "y", with observed covariates "x" and the treatment variable "d" partialed out, as implied by z being ky-times as strong as the benchmark_covariates. Returns ------- """ if ky is None: ky = kd check_multipliers(ky, kd) if type(treatment) is not str: sys.exit('Error: treatment must be a single string.') model_data = sensitivity_statistics.model_helper(model, covariates=treatment) estimate = model_data['estimate'] se = model_data['se'] dof = model_data['dof'] try: estimate, se = float(estimate), float(se) except: sys.exit('Error: The estimated effect and standard error must be numeric.') if benchmark_covariates is None: # no benchmark bounding to do return estimate, se, dof, r2dz_x, r2yz_dx else: bench_bounds = sensitivity_bounds.ovb_bounds( model, treatment, benchmark_covariates=benchmark_covariates, kd=kd, ky=ky, adjusted_estimates=False) if r2dz_x is None: bounds = bench_bounds else: if(r2yz_dx is None): r2yz_dx=r2dz_x if(np.isscalar(r2dz_x)): bounds = pd.DataFrame(data={'r2dz_x': [r2dz_x], 'r2yz_dx': [r2yz_dx]}) bounds = pd.concat([bench_bounds, bounds]).reset_index() else: bounds = pd.DataFrame(data={'r2dz_x': r2dz_x, 'r2yz_dx': r2yz_dx}) bounds = pd.concat([bench_bounds, bounds]).reset_index() return estimate, se, dof, bounds['r2dz_x'], bounds['r2yz_dx'] # Checks to make sure given parameters are valid and sets some default parameter values if not specified by the user def check_params(estimate, r2dz_x, r2yz_dx, lim, lim_y, label_bump_x, label_bump_y): """ A helper function to check plot arguments. Parameters ---------- estimate : float a float with the estimate of the coefficient for the independent variable of interest. r2dz_x : float or list of floats a float or list of floats. Hypothetical partial R2 of unobserved confounder Z with treatment D, given covariates X. r2yz_dx : float or list of floats a float or list of floats. Hypothetical partial R2 of unobserved confounder Z with outcome Y, given covariates X and treatment D. lim : float x axis maximum. lim_y : float y axis maximum. label_bump_x : float x-axis position of label above 0. label_bump_y : float y-axis position of label above 0. Returns ------- """ check_estimate(estimate) if r2yz_dx is None: r2yz_dx = r2dz_x r2dz_x, r2yz_dx = sensitivity_statistics.check_r2(r2dz_x, r2yz_dx) if lim is None: if r2dz_x is None: lim = 0.4 else: lim = min(np.max(np.append(r2dz_x * 1.2, 0.4)), 1 - 10 ** -12) #lim = min(np.max(list(r2dz_x * 1.2) + [0.4]), 1 - 10 ** -12) if lim_y is None: if r2yz_dx is None: lim_y = 0.4 else: lim_y = min(np.max(np.append(r2yz_dx * 1.2, 0.4)), 1 - 10 ** -12) #lim_y = min(np.max(list(r2yz_dx * 1.2) + [0.4]), 1 - 10 ** -12) if label_bump_x is None: label_bump_x = lim / 30.0 if label_bump_y is None: label_bump_y = lim_y / 30.0 if lim > 1.0: lim = 1 - 10 ** -12 print('Warning: Contour limit larger than 1 was set to 1.') elif lim < 0: lim = 0.4 print('Warning: Contour limit less than 0 was set to 0.4.') if lim_y > 1.0: lim_y = 1 - 10 ** -12 print('Warning: Contour limit larger than 1 was set to 1.') elif lim_y < 0: lim_y = 0.4 print('Warning: Contour limit less than 0 was set to 0.4.') # if list_par is None: # list_par = {'mar': [4, 4, 1, 1]} return estimate, r2dz_x, r2yz_dx, lim, lim_y, label_bump_x, label_bump_y # Checks to make sure given parameters are valid and sets some default parameter values if not specified by the user def check_params_extreme(estimate, r2dz_x, r2yz_dx, lim): """ A helper function to check plot arguments. Parameters ---------- estimate : float a float with the estimate of the coefficient for the independent variable of interest. r2dz_x : float or list of floats a float or list of floats. Hypothetical partial R2 of unobserved confounder Z with treatment D, given covariates X. r2yz_dx : float or list of floats a float or list of floats. Hypothetical partial R2 of unobserved confounder Z with outcome Y, given covariates X and treatment D. lim : float x axis maximum. Returns ------- """ check_estimate(estimate) r2dz_x, r2yz_dx = sensitivity_statistics.check_r2(r2dz_x, r2yz_dx) if lim is None: if r2dz_x is None: lim = 0.1 else: lim = min(np.max(np.append(r2dz_x * 1.2, 0.1)), 1 - 10 ** -12) #lim = min(np.max(list(r2dz_x * 1.2) + [0.4]), 1 - 10 ** -12) if lim > 1.0: lim = 1 - 10 ** -12 print('Warning: Contour limit larger than 1 was set to 1.') elif lim < 0: lim = 0.4 print('Warning: Contour limit less than 0 was set to 0.4.') # if list_par is None: # list_par = {'mar': [4, 4, 1, 1]} return estimate, r2dz_x, r2yz_dx, lim # Parameter validators def check_estimate(estimate): """ Make sure that the estimate is a single floating point number. Parameters ---------- estimate : float a float with the estimate of the coefficient for the independent variable of interest. Returns ------- """ if estimate is None: sys.exit('Error: You must supply either a `model` and `treatment_covariate` to ' 'extract an estimate or a directly supplied `estimate` argument') if type(estimate) is not float: sys.exit('Error: The estimated effect must be numeric.') def check_multipliers(ky, kd): """ Make sure ky and kd are both numbers or equal-length lists of numbers. Parameters ---------- kd : float or list of floats a float or list of floats with each being a multiple of the strength of association between a benchmark variable and the treatment variable to test with benchmark bounding (Default value = 1). ky : float or list of floats same as kd except measured in terms of strength of association with the outcome variable. Returns ------- """ if type(ky) not in [int, float, list] or type(kd) not in [int, float, list]: sys.exit('Error: ky and kd must be numeric.') if ((type(ky) is int or type(ky) is float) and type(kd) is list) or ( (type(kd) is int or type(kd) is float) and type(ky) is list): sys.exit('Error: ky and kd must be both floats or ints or lists of the same length.') if type(ky) is list and type(kd) is list: if len(ky) != len(kd): sys.exit('Error: ky and kd must be the same length.') if not all(type(i) in [int, float] for i in ky) or not all(type(i) in [int, float] for i in kd): sys.exit('Error: ky and kd must contain only ints or floats.')