시뮬레이션 데이터 생성 과정

choyunjeong·2025년 1월 11일

1. Simulation Scenario

# (01.DataGenerator.R 참고)
lstParams <- lstScenarios[[1]]

\\[10pt]

- 치료 분율

  • {(33:33:33), (10:10:80), (10:45:45)}\{(33:33:33),\ (10:10:80),\ (10:45:45)\}

- 공변량 겹침

  • 공변량과 치료할당이 상관관계가 강할수록 치료할당이 랜덤이 아닌 공변량에 의해 결정됨.
    - good overlap (공변량과 치료할당 간 상관관계가 적음).
    - poor overlap (공변량과 치료할당 간 상관관계가 큼).

- 치료 효과

  • null effect
  • non-null effect

\\[40pt]

2. covariate matrix X

- create covariate matrix\text{create covariate matrix}

CreateX <- function(N) {

    ## Continuous
    X1     <- rnorm(n = N, mean = 0, sd = 1)
    X2     <- rlnorm(n = N, meanlog = 0, sdlog = 0.5)
    X3     <- rnorm(n = N, mean = 0, sd = 10)

    ## Binary
    ##  Dependent on X1; mean(pX4) = 0.5
    oddsX4 <- exp(2 * X1)
    pX4    <- oddsX4 / (1 + oddsX4)
    X4     <- rbinom(n = N, size = 1, prob = pX4)
    ##  Independent
    X5     <- rbinom(n = N, size = 1, prob = 0.2)

    ## Multinomial
    X6     <- as.numeric(c(1:5) %*% rmultinom(n    = N, 
                                              size = 1, 
                                              prob = c(0.5,0.3,0.1,0.05,0.05)))

    ## Induced variables
    X7     <- sin(X1)
    X8     <- X2^2
    X9     <- X3 * X4
    X10    <- X4 * X5

    ## Output a matrix
    cbind(X1, X2, X3, X4, X5, X6, X7, X8, X9, X10)
}

X <- CreateX(6000)

\\[10pt]

X1i=Normal(0,12)X2i=Log-Normal(0,0.52)X3i=Normal(0,102)X4i=Bernoulli(pi=e2X1i/(1+e2X1i))X5i=Bernoulli(p=0.2)X6i=Multinomial(p=(0.5,0.3,0.1,0.05,0.05)T)X7i=sin(X1i)X8i=X2i2X9i=X3i×X4iX10i=X4i×X5i\begin{aligned} X_{1i} &= \text{Normal}(0,1^2) \\[5pt] X_{2i} &= \text{Log-Normal}(0,0.5^2)\\[5pt] X_{3i} &= \text{Normal}(0,10^2) \\[5pt] X_{4i} &= \text{Bernoulli}(p_i = e^{2X_{1i}}/(1+e^{2X_{1i}})) \\[5pt] X_{5i} &= \text{Bernoulli}(p = 0.2) \\[5pt] X_{6i} &= \text{Multinomial}(p =(0.5,0.3,0.1,0.05,0.05)^T) \\[5pt] X_{7i} &= \text{sin}(X_{1i}) \\[5pt] X_{8i} &= X_{2i}^2 \\[5pt] X_{9i} &= X_{3i}\times X_{4i} \\[5pt] X_{10i} &= X_{4i}\times X_{5i} \\[5pt] \end{aligned}

\\[40pt]

3. Treatment assignment mechanism given a multinomial logistic model

- Log probability ratio\text{Log probability ratio}

logPr1v0 <- lstParams$alpha10 +  X %*% lstParams$alpha1X

logPr2v0 <- lstParams$alpha20 +  X %*% lstParams$alpha2X

\\[10pt]

ηT1i=log{P(Ti=1Xi=xi)P(Ti=0Xi=xi)}=α10+α1XTXiηT2i=log{P(Ti=2Xi=xi)P(Ti=0Xi=xi)}=α20+α2XTXiα10, α20 determine treatment prevalanceα1X, α2X determine covariate-treatment association\begin{aligned} \eta_{T_{1i}} &= \text{log}\left\{\dfrac{P(T_i=1|X_i=x_i)}{P(T_i=0|X_i=x_i)}\right\} = \alpha_{10}+\alpha_{1X}^T X_i \\[10pt] \eta_{T_{2i}} &= \text{log}\left\{\dfrac{P(T_i=2|X_i=x_i)}{P(T_i=0|X_i=x_i)}\right\} = \alpha_{20}+\alpha_{2X}^T X_i \\[20pt] &\alpha_{10},\ \alpha_{20} \text{ determine treatment prevalance} \\ &\alpha_{1X},\ \alpha_{2X} \text{ determine covariate-treatment association} \end{aligned}

\\[20pt]

- Probability ratio\text{Probability ratio}

pr1v0    <- exp(logPr1v0)

pr2v0    <- exp(logPr2v0)

\\[10pt]

{P(Ti=1Xi=xi)P(Ti=0Xi=xi)}=exp(ηT1i){P(Ti=2Xi=xi)P(Ti=0Xi=xi)}=exp(ηT2i)\begin{aligned} \left\{\dfrac{P(T_i=1|X_i=x_i)}{P(T_i=0|X_i=x_i)}\right\} = \exp(\eta_{T_{1i}}) \\[10pt] \left\{\dfrac{P(T_i=2|X_i=x_i)}{P(T_i=0|X_i=x_i)}\right\} = \exp(\eta_{T_{2i}}) \end{aligned}

\\[20pt]

- Individual treatment probabilities (True propensity score)\text{Individual treatment probabilities (True propensity score)}

denom    <- 1 + pr1v0 + pr2v0

p0       <- 1     / denom
p1       <- pr1v0 / denom
p2       <- pr2v0 / denom

\\[10pt]

P(Ti=0Xi=xi)=11+exp(ηT1i)+exp(ηT2i)P(Ti=1Xi=xi)=exp(ηT1i)1+exp(ηT1i)+exp(ηT2i)P(Ti=2Xi=xi)=exp(ηT2i)1+exp(ηT1i)+exp(ηT2i)P(T_i=0|X_i=x_i)=\dfrac{1}{1+\text{exp}(\eta_{T_{1i}})+\text{exp}(\eta_{T_{2i}})} \\[10pt] P(T_i=1|X_i=x_i)=\dfrac{\text{exp}(\eta_{T_{1i}})}{1+\text{exp}(\eta_{T_{1i}})+\text{exp}(\eta_{T_{2i}})} \\[10pt] P(T_i=2|X_i=x_i)=\dfrac{\text{exp}(\eta_{T_{2i}})}{1+\text{exp}(\eta_{T_{1i}})+\text{exp}(\eta_{T_{2i}})}

\\[20pt]

- Treatment assignments by mutinomial logistic\text{Treatment assignments by mutinomial logistic}

# NxK matrix
pMat  <- cbind(p0, p1, p2)

# KxN matrix
pLst  <- as.data.frame(t(pMat))

# Loop over individuals
lstT  <- lapply(pLst, function(pVec) {
  c(0,1,2) %*% rmultinom(n = 1, size = 1, prob = pVec)
})

# Vector of individual treatment assignment
Tr    <- unlist(lstT)

dfTrModel <- data.frame(pT0 = p0,         # True propensity score for Tr = 0
                        pT1 = p1,         # True propensity score for Tr = 1
                        pT2 = p2,         # True propensity score for Tr = 2
                        Tr  = Tr)         # Trinomial treatment assignment

\\[40pt]

4. Outcome assignment mechanism given a log-linear model

- Counterfactual log-linear (log-probability) predictors\text{Counterfactual log-linear (log-probability) predictors}

lp0  <- lstParams$beta0 + (X %*% lstParams$betaX) + sum(c(0,0) * lstParams$betaT)
lp1  <- lstParams$beta0 + (X %*% lstParams$betaX) + sum(c(1,0) * lstParams$betaT)
lp2  <- lstParams$beta0 + (X %*% lstParams$betaX) + sum(c(0,1) * lstParams$betaT)

\\[10pt]

ηYi=log(P(Yi=1Ti=ti, Xi=xi))=β0+βXTXi+βT1I(ti=1)+βT2I(ti=2)\eta_{Y_i} = \log(P(Y_i=1|T_i=t_i,\ X_i=x_i))=\beta_{0}+\beta_{X}^TX_i+\beta_{T_1}I(t_i=1)+\beta_{T_2}I(t_i=2)

\\[20pt]

- Counterfactual linear (probability) predictors\text{Counterfactual linear (probability) predictors}

# Treatment dummy variables
TMatAll <- cbind(as.numeric(dfTrModel$Tr == 0),
                 as.numeric(dfTrModel$Tr == 1),
                 as.numeric(dfTrModel$Tr == 2))
                 
# Assing linear predictor based on treatment assignment
lp <- rowSums(cbind(lp0, lp1, lp2) * TMatAll)

# function for transform to P(Y=1) (return with exponentiated lp)
TransformLp <- function(lp) {

    # Log-probability model
    ## Use lp as log probability and do exp(.) transformaation (log-linear model)
    expLp <- exp(lp)

    ## Trim probability greater than 1
    pY <- pmin(expLp, 1)

    ## pY in [0,1]
    stopifnot(all(0 <= pY & pY <=1))

    ## Return with exponentiated lp
    list(expLp = expLp, pY = pY)

}

# Transform to P(Y=1)
transformLp <- TransformLp(lp)

# Counterfactual versions
pY0  <- TransformLp(lp0)$pY
pY1  <- TransformLp(lp1)$pY
pY2  <- TransformLp(lp2)$pY

\\[10pt]

pYi=P(Yi=1Ti=ti, Xi=xi)=exp(ηY1i)p_{Y_i} = P(Y_i=1|T_i=t_i,\ X_i=x_i)=\exp(\eta_{Y_{1i}})

\\[20pt]

- Outcome was assigned using a Bernoulli random number generating process\text{Outcome was assigned using a Bernoulli random number generating process}

# Sample binary Y
Y    <- rbinom(n = length(transformLp$pY), size = 1, prob = transformLp$pY)


dfOutModel <-  data.frame(lp    = lp,                 # Linear predictor
               			  expLp = transformLp$expLp,  # Exponential of lp
                          pY    = transformLp$pY,     # True DRS
                          pY0   = pY0,                # True counterfactual DRS 0
                          pY1   = pY1,                # True counterfactual DRS 1
                          pY2   = pY2,                # True counterfactual DRS 2
                          Y     = Y)                  # Binary outcome
}

\\[40pt]

5. Create a data frame containing covariates, treatment, and outcome

# Return generated data with true PS and DRS
df      <- as.data.frame(X)
df      <- cbind(df, dfTrModel)
df      <- cbind(df, dfOutModel)
df

참고

  • Yoshida K, Hernández-Díaz S, Solomon DH, et al. Matching weights to simultaneously compare three treatment groups: comparison to three-way matching. Epidemiology. 2017;28(3):387-395. https://github.com/kaz-yos/mw

0개의 댓글