1. Simulation Scenario
# (01.DataGenerator.R 참고)
lstParams <- lstScenarios[[1]]
- 치료 분율
- {(33:33:33), (10:10:80), (10:45:45)}
- 공변량 겹침
- 공변량과 치료할당이 상관관계가 강할수록 치료할당이 랜덤이 아닌 공변량에 의해 결정됨.
- good overlap (공변량과 치료할당 간 상관관계가 적음).
- poor overlap (공변량과 치료할당 간 상관관계가 큼).
- 치료 효과
- null effect
- non-null effect
2. covariate matrix X
- create covariate matrix
CreateX <- function(N) {
## Continuous
X1 <- rnorm(n = N, mean = 0, sd = 1)
X2 <- rlnorm(n = N, meanlog = 0, sdlog = 0.5)
X3 <- rnorm(n = N, mean = 0, sd = 10)
## Binary
## Dependent on X1; mean(pX4) = 0.5
oddsX4 <- exp(2 * X1)
pX4 <- oddsX4 / (1 + oddsX4)
X4 <- rbinom(n = N, size = 1, prob = pX4)
## Independent
X5 <- rbinom(n = N, size = 1, prob = 0.2)
## Multinomial
X6 <- as.numeric(c(1:5) %*% rmultinom(n = N,
size = 1,
prob = c(0.5,0.3,0.1,0.05,0.05)))
## Induced variables
X7 <- sin(X1)
X8 <- X2^2
X9 <- X3 * X4
X10 <- X4 * X5
## Output a matrix
cbind(X1, X2, X3, X4, X5, X6, X7, X8, X9, X10)
}
X <- CreateX(6000)
X1iX2iX3iX4iX5iX6iX7iX8iX9iX10i=Normal(0,12)=Log-Normal(0,0.52)=Normal(0,102)=Bernoulli(pi=e2X1i/(1+e2X1i))=Bernoulli(p=0.2)=Multinomial(p=(0.5,0.3,0.1,0.05,0.05)T)=sin(X1i)=X2i2=X3i×X4i=X4i×X5i
3. Treatment assignment mechanism given a multinomial logistic model
- Log probability ratio
logPr1v0 <- lstParams$alpha10 + X %*% lstParams$alpha1X
logPr2v0 <- lstParams$alpha20 + X %*% lstParams$alpha2X
ηT1iηT2i=log{P(Ti=0∣Xi=xi)P(Ti=1∣Xi=xi)}=α10+α1XTXi=log{P(Ti=0∣Xi=xi)P(Ti=2∣Xi=xi)}=α20+α2XTXiα10, α20 determine treatment prevalanceα1X, α2X determine covariate-treatment association
- Probability ratio
pr1v0 <- exp(logPr1v0)
pr2v0 <- exp(logPr2v0)
{P(Ti=0∣Xi=xi)P(Ti=1∣Xi=xi)}=exp(ηT1i){P(Ti=0∣Xi=xi)P(Ti=2∣Xi=xi)}=exp(ηT2i)
- Individual treatment probabilities (True propensity score)
denom <- 1 + pr1v0 + pr2v0
p0 <- 1 / denom
p1 <- pr1v0 / denom
p2 <- pr2v0 / denom
P(Ti=0∣Xi=xi)=1+exp(ηT1i)+exp(ηT2i)1P(Ti=1∣Xi=xi)=1+exp(ηT1i)+exp(ηT2i)exp(ηT1i)P(Ti=2∣Xi=xi)=1+exp(ηT1i)+exp(ηT2i)exp(ηT2i)
- Treatment assignments by mutinomial logistic
# NxK matrix
pMat <- cbind(p0, p1, p2)
# KxN matrix
pLst <- as.data.frame(t(pMat))
# Loop over individuals
lstT <- lapply(pLst, function(pVec) {
c(0,1,2) %*% rmultinom(n = 1, size = 1, prob = pVec)
})
# Vector of individual treatment assignment
Tr <- unlist(lstT)
dfTrModel <- data.frame(pT0 = p0, # True propensity score for Tr = 0
pT1 = p1, # True propensity score for Tr = 1
pT2 = p2, # True propensity score for Tr = 2
Tr = Tr) # Trinomial treatment assignment
4. Outcome assignment mechanism given a log-linear model
- Counterfactual log-linear (log-probability) predictors
lp0 <- lstParams$beta0 + (X %*% lstParams$betaX) + sum(c(0,0) * lstParams$betaT)
lp1 <- lstParams$beta0 + (X %*% lstParams$betaX) + sum(c(1,0) * lstParams$betaT)
lp2 <- lstParams$beta0 + (X %*% lstParams$betaX) + sum(c(0,1) * lstParams$betaT)
ηYi=log(P(Yi=1∣Ti=ti, Xi=xi))=β0+βXTXi+βT1I(ti=1)+βT2I(ti=2)
- Counterfactual linear (probability) predictors
# Treatment dummy variables
TMatAll <- cbind(as.numeric(dfTrModel$Tr == 0),
as.numeric(dfTrModel$Tr == 1),
as.numeric(dfTrModel$Tr == 2))
# Assing linear predictor based on treatment assignment
lp <- rowSums(cbind(lp0, lp1, lp2) * TMatAll)
# function for transform to P(Y=1) (return with exponentiated lp)
TransformLp <- function(lp) {
# Log-probability model
## Use lp as log probability and do exp(.) transformaation (log-linear model)
expLp <- exp(lp)
## Trim probability greater than 1
pY <- pmin(expLp, 1)
## pY in [0,1]
stopifnot(all(0 <= pY & pY <=1))
## Return with exponentiated lp
list(expLp = expLp, pY = pY)
}
# Transform to P(Y=1)
transformLp <- TransformLp(lp)
# Counterfactual versions
pY0 <- TransformLp(lp0)$pY
pY1 <- TransformLp(lp1)$pY
pY2 <- TransformLp(lp2)$pY
pYi=P(Yi=1∣Ti=ti, Xi=xi)=exp(ηY1i)
- Outcome was assigned using a Bernoulli random number generating process
# Sample binary Y
Y <- rbinom(n = length(transformLp$pY), size = 1, prob = transformLp$pY)
dfOutModel <- data.frame(lp = lp, # Linear predictor
expLp = transformLp$expLp, # Exponential of lp
pY = transformLp$pY, # True DRS
pY0 = pY0, # True counterfactual DRS 0
pY1 = pY1, # True counterfactual DRS 1
pY2 = pY2, # True counterfactual DRS 2
Y = Y) # Binary outcome
}
5. Create a data frame containing covariates, treatment, and outcome
# Return generated data with true PS and DRS
df <- as.data.frame(X)
df <- cbind(df, dfTrModel)
df <- cbind(df, dfOutModel)
df
참고
- Yoshida K, Hernández-Díaz S, Solomon DH, et al. Matching weights to simultaneously compare three treatment groups: comparison to three-way matching. Epidemiology. 2017;28(3):387-395. https://github.com/kaz-yos/mw