4  Intuition for marginal models

PBC3 trial in liver cirrhosis

Read data

Code show/hide
pbc3 <- read.csv("data/pbc3.csv")
pbc3$followup<-pbc3$days/365.25
pbc3$fail <- ifelse(pbc3$status != 0, 1, 0) # event/failure indicator
pbc3$tment_char <- ifelse(pbc3$tment == 0, "Placebo", "CyA")
# Add transformations of covariates 
pbc3$albnorm <- with(pbc3, (alb-35)*(alb>35))
pbc3$alb10 <- with(pbc3, alb/10)
pbc3$alb2 <- with(pbc3, alb10*alb10)

pbc3$bilihigh <- with(pbc3, (bili-17.1)*(bili>17.1))
pbc3$bilitoohigh <- with(pbc3, (bili-34.2)*(bili>34.2))
pbc3$bilimuchtoohigh <- with(pbc3, (bili-51.3)*(bili>51.3))
pbc3$bili100 <- with(pbc3, bili/100)
pbc3$bili2 <- with(pbc3, bili100*bili100)

pbc3$log2bili <- with(pbc3, log2(bili))
pbc3$log2bili2 <- with(pbc3, log2bili*log2bili)

pbc3$logbilihigh <- with(pbc3, (log2bili-log2(17.1))*(bili>17.1))
pbc3$logbilitoohigh <- with(pbc3, (log2bili-log2(34.2))*(bili>34.2))
pbc3$logbilimuchtoohigh <- with(pbc3, (log2bili-log2(51.3))*(bili>51.3))
Code show/hide
proc import out=pbc3
    datafile="data/pbc3.csv"
    dbms=csv replace;
run;
    
data pbc3; 
    set pbc3;
    followup=days/365.25; /* time in years */
    albnorm=(alb-35)*(alb>35);
    alb10=alb/10; alb2=alb10*alb10;
    bilihigh=(bili-17.1)*(bili>17.1);
    bilitoohigh=(bili-34.2)*(bili>34.2);
    bilimuchtoohigh=(bili-51.3)*(bili>51.3);
    bili100=bili/100; bili2=bili100*bili100;
    log2bili=log2(bili);
    logbilihigh=(log2bili-log2(17.1))*(bili>17.1);
    logbilitoohigh=(log2bili-log2(34.2))*(bili>34.2);
    logbilimuchtoohigh=(log2bili-log2(51.3))*(bili>51.3);
    log2bili2=log2bili*log2bili;
run;

Figure 4.2

Code show/hide
# General plotting style 
library(ggplot2)
theme_general <- theme_bw() +
  theme(text = element_text(size = 20), 
        axis.text.x = element_text(size = 20), 
        axis.text.y = element_text(size = 20),
        legend.position = "bottom", 
        legend.title=element_blank(),
        legend.text = element_text(size = 20),
        legend.key.size = unit(2,"line"))

# Kaplan-Meier estimate per treatment
# Please note conf.type="log-log"
library(survival)
kmfit <- survfit(Surv(days, status != 0) ~ tment, data = pbc3, conf.type="log-log")

# Collect data for plot
# Note that the standard errors produced by survfit are for the cumulative hazard
kmdata <- data.frame(surv = kmfit$surv, 
                     time = kmfit$time, 
                     tment = c(rep(names(kmfit$strata)[1], kmfit$strata[1]), 
                               rep(names(kmfit$strata)[2], kmfit$strata[2])))

# Create Figure 4.2
fig4.2 <- ggplot(aes(x = time / 365.25, y = surv, linetype = tment), data = kmdata) + 
  geom_step(linewidth = 1) + 
  scale_linetype_discrete("Treatment", labels = c("Placebo", "CyA")) + 
  xlab("Time since randomization (years)") + 
  ylab("Survival probability") +
  scale_x_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     limits = c(0, 6), breaks = seq(0, 6, by = 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.05)), limits = c(0,1)) +
  theme_general

fig4.2

Code show/hide
* Kaplan-Meier plot per treatment;
* Using proc lifetest;
proc lifetest data=pbc3 notable plots=(survival(nocensor));
  time followup*status(0);
    strata tment;
run;

* Using proc phreg;
proc phreg data=pbc3;
    model days*status(0)=;
    strata tment;
    baseline out=survdat survival=km / method=pl;
run;

data survdat;
    set survdat; 
    daysyears = days/365.25; 
run; 

proc gplot data=survdat;
plot km*daysyears=tment/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 6 by 1 minor=none label=('Years');
    axis2 order=0 to 1 by 0.1 minor=none label=(a=90 'Survival probability');
    symbol1  v=none i=stepjl c=red;
    symbol2  v=none i=stepjl c=blue;
run;

In-text, p. 119: Kaplan-Meier estimates at 2 years

Code show/hide
# 95 c.i. uses conf.type="log-log" - see above
summary(kmfit,times=2*365.25)
Call: survfit(formula = Surv(days, status != 0) ~ tment, data = pbc3, 
    conf.type = "log-log")

                tment=0 
        time       n.risk      n.event     survival      std.err lower 95% CI 
    730.5000     106.0000      27.0000       0.8322       0.0296       0.7645 
upper 95% CI 
      0.8819 

                tment=1 
        time       n.risk      n.event     survival      std.err lower 95% CI 
    730.5000     111.0000      24.0000       0.8458       0.0292       0.7782 
upper 95% CI 
      0.8941 
Code show/hide
proc lifetest data=pbc3 timelist=2;
  time followup*status(0);
  strata tment;
run;

proc print data=survdat;
  where days<=2*365.25;
run;

Figure 4.4

Code show/hide
# Poisson model fit (like in Chapter 2)
# Cuts
cuts <- c(0, 2, 4) * 365.25
# event/failure indicator
pbc3$fail <- ifelse(pbc3$status != 0, 1, 0)
# Make the data ready using survSplit
pbc3mult <- survSplit(Surv(days, fail) ~ ., 
                      pbc3,
                      cut = cuts[-1], 
                      episode = "timegroup")

# Risk time
pbc3mult$risktime <- pbc3mult$days - cuts[pbc3mult$timegroup] 

# Summarize
library(dplyr)
sumdata <- pbc3mult %>% 
  group_by(tment, timegroup) %>% 
  summarise(fail = sum(fail), 
            risktime = sum(days - cuts[timegroup])#,
            #logrisktime = sum(log(days))
  )
sumdata <- as.data.frame(sumdata)

# Placebo KM data from figure 4.2 model fit
tment1 <- subset(kmdata, tment == "tment=1")
# Estimated hazard per time group
sumdata$hazard_timegroup <- sumdata$fail / sumdata$risktime
# Add a numeric version of the treatment to the NA estimates
kmdata$tmentnum <- ifelse(kmdata$tment == "tment=0", 0, 1)
# Add piecewise constant hazard to data
kmdata$pwch <- NULL

# Between time 0 and 2
kmdata$pwch[kmdata$time <= 2 * 365.25] <- kmdata$time[kmdata$time <= 2  * 365.25] * 
  (sumdata$hazard_timegroup[1] * (1-kmdata$tmentnum[kmdata$time <= 2  * 365.25]) + 
     sumdata$hazard_timegroup[4] * (kmdata$tmentnum[kmdata$time <= 2 * 365.25]))

# Between time 2 and 4
kmdata$pwch[kmdata$time > 2  * 365.25 & kmdata$time <= 4 * 365.25] <- 2  * 365.25 * 
  (sumdata$hazard_timegroup[1]
   * (1-kmdata$tmentnum[kmdata$time > 2  * 365.25& kmdata$time <= 4 * 365.25]) + 
     sumdata$hazard_timegroup[4] 
   * (kmdata$tmentnum[kmdata$time > 2  * 365.25& kmdata$time <= 4 * 365.25])) + 
  (kmdata$time[kmdata$time > 2 * 365.25 & kmdata$time <= 4 * 365.25] - 2 * 365.25) * 
  (sumdata$hazard_timegroup[2] 
   * (1-kmdata$tmentnum[kmdata$time > 2  * 365.25& kmdata$time <= 4 * 365.25]) + 
     sumdata$hazard_timegroup[5] 
   * (kmdata$tmentnum[kmdata$time > 2 * 365.25 & kmdata$time <= 4 * 365.25]))

# After time 4
kmdata$pwch[kmdata$time > 4 * 365.25] <- 2 * 365.25 * 
  (sumdata$hazard_timegroup[1] * (1-kmdata$tmentnum[kmdata$time > 4 * 365.25]) + 
     sumdata$hazard_timegroup[4] * (kmdata$tmentnum[kmdata$time > 4 * 365.25])) + 
  2 * 365.25 *
  (sumdata$hazard_timegroup[2] * (1-kmdata$tmentnum[kmdata$time > 4 * 365.25]) + 
     sumdata$hazard_timegroup[5] * (kmdata$tmentnum[kmdata$time > 4 * 365.25])) + 
  (kmdata$time[kmdata$time > 4 * 365.25] - 4 * 365.25) * 
  (sumdata$hazard_timegroup[3] * (1-kmdata$tmentnum[kmdata$time > 4 * 365.25]) + 
     sumdata$hazard_timegroup[6] * (kmdata$tmentnum[kmdata$time > 4 * 365.25]))

# Change to estimated survival (plug-in formula)
kmdata$pwcs <- exp(-kmdata$pwch)
# Reformat for plot
piecepdata <- data.frame(surv = c(kmdata$surv, kmdata$pwcs), 
                         time = rep(kmdata$time, 2),
                         tmentnum = rep(kmdata$tmentnum, 2),
                         type = c(rep("Kaplan-Meier", length(kmdata$time)), 
                                  rep("Piece-wise exponential", length(kmdata$time))))
# Only for treatment 1
piecepdata1 <- subset(piecepdata, tmentnum == 1)

# Create Figure 4.4
fig4.4 <- ggplot(aes(x = time / 365.25, y = surv, linetype = type), 
                data = subset(piecepdata1, type == "Kaplan-Meier")) + 
  geom_step(linewidth = 1) + 
  geom_line(aes(x = time/ 365.25, y = surv, linetype = type), linewidth = 1,
            data = subset(piecepdata1, type == "Piece-wise exponential")) + 
  labs(linetype = "Type") + 
  xlab("Time since randomization (years)") + 
  ylab("Survival probability") + 
  scale_x_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     limits = c(0, 6),
                     breaks = seq(0, 6, 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     limits = c(0, 1.0),
                     breaks = seq(0, 1.0, 0.1)) +
  theme_general
fig4.4

Code show/hide
data survdat; 
    set survdat;
    if days<=2 * 365.25 then
    pwch=exp(-(days*(27.0000000/104856*(1-tment)+24.0000000/107931.5*tment)));
    if 2 * 365.25 <days<=4 * 365.25 then
    pwch=exp(-(2* 365.25*(27.0000000/104856*(1-tment)+24.0000000/107931.5*tment)
    +(days-2* 365.25)*(17.0000000/49673*(1-tment)+18.0000000/50284*tment)));
    if 4 * 365.25 <days then
    pwch=exp(-(2* 365.25 *(27.0000000/104856*(1-tment)+24.0000000/107931.5*tment)
    +(2* 365.25)*(17.0000000/49673*(1-tment)+18.0000000/50284*tment)
    +(days-4* 365.25)*(2.0000000/8642*(1-tment)+2.0000000/7599*tment)));
run;

data survdat;
    set survdat; 
    daysyears = days/365.25; 
run; 

proc gplot data=survdat; 
    where tment=1;
    plot (km pwch)*daysyears/overlay haxis=axis1 vaxis=axis2;
    axis1 order=0 to 6 by 1 minor=none label=('Years');
    axis2 order=0 to 1 by 0.1 minor=none label=(a=90 'Survival probability');
    symbol1  v=none i=stepjl r=1 c=red;
    symbol2 v=none i=join r=1 c=blue;
run;
quit;

Figure 4.5

Code show/hide
# Cox model
coxfit <- coxph(Surv(days, status != 0) ~ tment + alb + log2bili, data = pbc3, 
                 method = "breslow")
# Unique days times 
fu <- sort(unique(pbc3$days))
# Data for prediction
preddata <- data.frame(tment = c(rep(0, length(fu)), rep(1, length(fu))), 
                       alb = rep(38, length(fu) * 2),
                       log2bili = rep(log2(45), length(fu) * 2), 
                       days = c(fu, fu),
                       status = rep(1, length(fu) * 2))

# Linear predictor
preds <- predict(coxfit, newdata = preddata, type = "survival")
preddata$preds <- preds

# Create Figure 4.5
fig4.5 <- ggplot(aes(x = days / 365.25, y = preds, linetype = as.factor(tment)), 
                data = preddata) + 
  geom_step(linewidth = 1) + 
  scale_linetype_discrete("Treatment", labels = c("Placebo", "CyA")) + 
  xlab("Time since randomization (years)") + 
  ylab("Estimated survival function") + 
  scale_x_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     limits = c(0, 6),
                     breaks = seq(0, 6, 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     limits = c(0, 1.0),
                     breaks = seq(0, 1.0, 0.1)) +
  theme_general 
fig4.5

Code show/hide
data cov;
    tment=0; alb=38; log2bili=log2(45); output;
    tment=1; alb=38; log2bili=log2(45); output;
run;

proc phreg data=pbc3;
    model days*status(0)=tment alb log2bili/rl;
    baseline out=predsurv survival=surv covariates=cov/ method=breslow;
run;

data predsurv;
    set predsurv; 
    daysyears = days/365.25; 
run; 

proc gplot data=predsurv;
    plot surv*daysyears=tment/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 6 by 1 minor=none label=('Years');
    axis2 order=0 to 1 by 0.1 minor=none label=(a=90 'Estimated survival function');
    symbol1  v=none i=stepjl c=blue;
    symbol2  v=none i=stepjl c=red;
run;
quit;

Figure 4.6

Code show/hide
# Add log(-log(S(t)))
preddata$logminlogsurv <- with(preddata, log(-log(preds)))

# Create Figure 4.6
fig4.6 <- ggplot(aes(x = days / 365.25, y = logminlogsurv, linetype = as.factor(tment)), 
                data = preddata) + 
  geom_step(linewidth = 1) + 
  scale_linetype_discrete("Treatment", labels = c("Placebo", "CyA")) + 
  xlab("Time since randomization (years)") + 
  ylab("log(-log(survival function))") + 
  scale_x_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     limits = c(0, 6),
                     breaks = seq(0, 6, 1)) +
  scale_y_continuous(expand = expansion(mult = c(0.001, 0.05))) +
  theme_general

fig4.6

Code show/hide
data predsurv2; 
    set predsurv; 
    logminlogsurv = log(-log(surv)); 
run; 

data predsurv2;
    set predsurv2; 
    daysyears = days/365.25; 
run; 

proc gplot data=predsurv2;
    plot logminlogsurv*daysyears=tment/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 6 by 1 minor=none label=('Years');
    axis2 order=-7 to 0 by 1 minor=none label=(a=90 'log(-log(survival function))');
    symbol1  v=none i=stepjl c=blue;
    symbol2  v=none i=stepjl c=red;
run;
quit;

Figure 4.7

Code show/hide
# g-formula
# We want to predict responses if all subjects had been given both CyA and placebo (tment = 0,1)
# We need to have n survival curves * 2
# make a double data set with extra Z
pbc3_counterfact <- pbc3
pbc3_counterfact$tment <- ifelse(pbc3_counterfact$tment == 1, 0, 1) # opposite treatment
pbc3_double <- rbind(pbc3, pbc3_counterfact)

# Baseline survival
coxfit <- coxph(Surv(days, status != 0) ~ tment + alb + log2bili, data = pbc3, 
                 method = "breslow")
pred <- survfit(coxfit, newdata = data.frame(tment = 0, alb = 0, log2bili = 0))
allsurv <-
  lapply(1:nrow(pbc3_double),
       function(i)
         pred$surv ^
         exp(coef(coxfit)[1] * pbc3_double$tment[i] +
               coef(coxfit)[2] * pbc3_double$alb[i] +
               coef(coxfit)[3] * pbc3_double$log2bili[i]))
potout <-
  data.frame(surv = unlist(allsurv),
           tment = rep(pbc3_double$tment, each = length(pred$time)),
           time = rep(pred$time, times = nrow(pbc3)*2))

# Average over values
library(dplyr)
sumdata <- potout %>%
  group_by(tment, time) %>%
  summarise(average_pred = mean(surv, na.rm = TRUE),
            .groups = c("keep"))
sumdata <- as.data.frame(sumdata)

# Create Figure 4.7
fig4.7 <- ggplot(aes(x = time / 365.25, y = average_pred, linetype = as.factor(tment)),
                data = sumdata) +
  geom_step(linewidth = 1) +
  scale_linetype_discrete("Treatment", labels = c("Placebo", "CyA")) +
  xlab("Time since randomization (years)") +
  ylab("Estimated survival function (g-formula)") +
  scale_x_continuous(expand = expansion(mult = c(0.001, 0.05)),
                     limits = c(0, 6),
                     breaks = seq(0, 6, 1)) +
  scale_y_continuous(expand = expansion(mult = c(0.001, 0.05)),
                     limits = c(0, 1.0),
                     breaks = seq(0, 1.0, 0.1)) +
  theme_general 

fig4.7

Code show/hide
proc phreg data=pbc3;
    class tment;
    model days*status(0)=tment alb log2bili / rl;
    baseline out=gsurv survival=surv stderr=sd /
    method=breslow diradj group=tment;
run;
data gsurv;
    set gsurv; 
    daysyears = days/365.25; 
run; 
proc gplot data=gsurv;
    plot surv*daysyears=tment / haxis=axis1 vaxis=axis2;
    axis1 order=0 to 6 by 1 minor=none label=('Years');
    axis2 order=0 to 1 by 0.1 minor=none
  label=(a=90 'Estimated survival function (g-formula)');
    symbol1  v=none i=stepjl c=blue;
    symbol2  v=none i=stepjl c=red;
run;
quit;

In-text, p. 122: g-formula at 2 years from Cox

Using riskRegression package

Code show/hide
library(riskRegression)
subpbc<-subset(pbc3, !is.na(alb))
subpbc$tment<-relevel(factor(subpbc$tment),ref="0")
cfit <- coxph(Surv(days, fail) ~ tment + alb + log2bili, data = subpbc, 
                 method = "breslow",y=TRUE,x=TRUE)
atecfit<-ate(cfit, data = subpbc, treatment = "tment", times = 2*365.25,
             cause=1, verbose=F)
summary(atecfit,type="meanRisk",se=T)
     Average treatment effect 

 - Treatment            : tment (2 levels: "0" "1")
 - Event                : fail (cause: 1, censoring: 0)
 - Time  [min;max]      : days [1;2150]
 - Eval. time          : 730.5
      number at risk 0     102
      number at risk 1     110

 Estimation procedure 
 - Estimator  : G-formula
 - Uncertainty: Gaussian approximation 
                where the variance is estimated via the influence function 

 Testing procedure
 - Null hypothesis     : given two treatments (A,B) and a specific timepoint, equal risks 
 - Confidence level    : 0.95

 Results: 
 - Standardized risk between time zero and 'time', reported on the scale [0;1] (probability scale)
   (average risk when treating all subjects with one treatment)

 time tment  risk     se          ci
  730     0 0.201 0.0271 [0.15;0.25]
  730     1 0.133 0.0217 [0.09;0.18]

 risk            : estimated standardized risk 
 ci              : pointwise confidence intervals 
Code show/hide
summary(atecfit,type="diffRisk",se=T)
     Average treatment effect 

 - Treatment            : tment (2 levels: "0" "1")
 - Event                : fail (cause: 1, censoring: 0)
 - Time  [min;max]      : days [1;2150]
 - Eval. time          : 730.5
      number at risk 0     102
      number at risk 1     110

 Estimation procedure 
 - Estimator  : G-formula
 - Uncertainty: Gaussian approximation 
                where the variance is estimated via the influence function 

 Testing procedure
 - Null hypothesis     : given two treatments (A,B) and a specific timepoint, equal risks 
 - Confidence level    : 0.95

 Results: 
 - Difference in standardized risk (B-A) between time zero and 'time' 
                reported on the scale [-1;1] (difference between two probabilities)
 (difference in average risks when treating all subjects with the experimental treatment (B),
                                vs. treating all subjects with the reference treatment (A))

 time tment=A tment=B difference     se            ci p.value
  730       0       1    -0.0681 0.0259 [-0.12;-0.02] 0.00855

 difference      : estimated difference in standardized risks 
 ci              : pointwise confidence intervals 
 p.value         : (unadjusted) p-value 
Code show/hide
# Survival instead of failure risk
1-atecfit$meanRisk$estimate
[1] 0.7989143 0.8670633
Code show/hide
# Bootstrap
atecfitB<-ate(cfit, data = subpbc, treatment = "tment", times = 2*365.25,
             cause=1, verbose=F, B=100)
summary(atecfitB,type="meanRisk",se=T)
     Average treatment effect 

 - Treatment            : tment (2 levels: "0" "1")
 - Event                : fail (cause: 1, censoring: 0)
 - Time  [min;max]      : days [1;2150]
 - Eval. time          : 730.5
      number at risk 0     102
      number at risk 1     110

 Estimation procedure 
 - Estimator  : G-formula
 - Uncertainty: Percentile bootstrap based on 100 bootstrap samples
                that were drawn with replacement from the original data.

 Testing procedure
 - Null hypothesis     : given two treatments (A,B) and a specific timepoint, equal risks 
 - Confidence level    : 0.95

 Results: 
 - Standardized risk between time zero and 'time', reported on the scale [0;1] (probability scale)
   (average risk when treating all subjects with one treatment)

 time tment  risk risk.boot     se          ci
  730     0 0.201     0.199 0.0261 [0.15;0.26]
  730     1 0.133     0.132 0.0192 [0.10;0.17]

 risk            : estimated standardized risk 
 risk.boot       : average value over the bootstrap samples 
 ci              : pointwise confidence intervals 
Code show/hide
summary(atecfitB,type="diffRisk",se=T)
     Average treatment effect 

 - Treatment            : tment (2 levels: "0" "1")
 - Event                : fail (cause: 1, censoring: 0)
 - Time  [min;max]      : days [1;2150]
 - Eval. time          : 730.5
      number at risk 0     102
      number at risk 1     110

 Estimation procedure 
 - Estimator  : G-formula
 - Uncertainty: Percentile bootstrap based on 100 bootstrap samples
                that were drawn with replacement from the original data.

 Testing procedure
 - Null hypothesis     : given two treatments (A,B) and a specific timepoint, equal risks 
 - Confidence level    : 0.95

 Results: 
 - Difference in standardized risk (B-A) between time zero and 'time' 
                reported on the scale [-1;1] (difference between two probabilities)
 (difference in average risks when treating all subjects with the experimental treatment (B),
                                vs. treating all subjects with the reference treatment (A))

 time tment=A tment=B difference difference.boot     se            ci p.value
  730       0       1    -0.0681         -0.0668 0.0239 [-0.12;-0.02]       0

 difference      : estimated difference in standardized risks 
 difference.boot : average value over the bootstrap samples 
 ci              : pointwise confidence intervals 
 p.value         : (unadjusted) p-value 

Using mets package

Code show/hide
library(mets)
cfitmets <- phreg(Surv(days, fail) ~ tment + alb + log2bili, data = subpbc)
summary(survivalG(cfitmets, subpbc, time = 2*365.25))
risk:
      Estimate Std.Err   2.5%  97.5%    P-value
risk0   0.7989 0.02722 0.7455 0.8522 2.325e-189
risk1   0.8671 0.02171 0.8245 0.9096  0.000e+00

Average Treatment effects (G-estimator) :
   Estimate Std.Err    2.5%  97.5%  P-value
p1   0.0682 0.02622 0.01681 0.1196 0.009297

Average Treatment effect ratio (G-estimator) :
     Estimate    Std.Err     2.5%    97.5%    P-value
[p1] 1.085375 0.03484226 1.017085 1.153664 0.01427275
Code show/hide
/* at 2 years */
proc phreg data=pbc3;
    class tment;
    model days*status(0)=tment alb log2bili/rl;
    baseline out=gsurv survival=surv stderr=sd outdiff=gsurvdiff timepoint=730 /
        method=breslow diradj group=tment;
run;
proc print data=gsurv;run;

  Obs           tment            days      surv        sd

   1                0             730    0.79891    0.024757
   2                1             730    0.86706    0.019178

proc print data=gsurvdiff;run;


   Obs           tment          tment2            days     SurvDiff     StdErr

    1                0               1             730    -0.068148    0.026252

* Bootstrap;
data bootpbc;
    do sampnum = 1 to 1000; /* nboot=1000*/
      do i = 1 to 349;      /*nobs=349*/
          x=round(ranuni(0)*349); 
            set pbc3
            point=x;
            output;
        end;
    end;
    stop;
run;
/* g-formel */
proc phreg data=bootpbc noprint;
    by sampnum;
    class tment;
    model days*status(0)=tment alb log2bili;
    baseline out=gsurvboot survival=surv outdiff=gsurvdiff timepoint=730 /
    method=breslow diradj group=tment;
run;
            
proc means data=gsurvboot mean stddev;
    class tment;
    var surv;
run;
                  The MEANS Procedure

   Analysis Variable : surv Survivor Function Estimate

                     N
         tment     Obs            Mean         Std Dev
    ----------------------------------------------------
             0    1000       0.7988766       0.0267078

             1    1000       0.8670747       0.0211279
    ----------------------------------------------------

            
proc means data=gsurvdiff mean stddev;
    class tment;
    var survdiff;
run;
                 The MEANS Procedure

             Analysis Variable : SurvDiff

                    N
        tment     Obs            Mean         Std Dev
    ----------------------------------------------------
            0    1000      -0.0681980       0.0265769
    ----------------------------------------------------

Figure 4.8

Code show/hide
# Product representation based on Breslow estimator
# Survival prediction
preds <- basehaz(coxfit, centered = F)

# For tment=0, alb=38, log2bili=log2(45)
dA_tment0 <- diff(c(0, preds$hazard 
                    * exp(coef(coxfit)[1]*0 + coef(coxfit)[2]*38 + coef(coxfit)[3] * log2(45) )))
surv_tment0 <- cumprod(1 - dA_tment0)

# For tment=1, alb=38, log2bili=log2(45)
dA_tment1 <- diff(c(0, preds$hazard 
                    * exp(coef(coxfit)[1]*1 + coef(coxfit)[2]*38 + coef(coxfit)[3] * log2(45) )))
surv_tment1 <- cumprod(1 - dA_tment1)

pdata <- data.frame(surv = c(surv_tment0, surv_tment1),
                    time = c(preds$time, preds$time), 
                    tment = c(rep("0", length(preds$time)),
                              rep("1", length(preds$time))))

# Create Figure 4.8
fig4.8 <- ggplot(aes(x = time / 365.25, y = surv, linetype = as.factor(tment)), 
                data = pdata) + 
  geom_step(linewidth = 1) + 
  scale_linetype_discrete("Treatment", labels = c("Placebo", "CyA")) + 
  xlab("Time since randomization (years)") + 
  ylab("Estimated survival function") + 
  scale_x_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     limits = c(0, 6),
                     breaks = seq(0, 6, 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     limits = c(0, 1.0),
                     breaks = seq(0, 1.0, 0.1)) +
  theme_general

fig4.8

Code show/hide
proc phreg data=pbc3;
    *class tment;
    model days*status(0)=tment alb log2bili/rl;
    baseline out=predsurvpl survival=surv covariates=cov / method=pl;
run;

data predsurvpl;
    set predsurvpl; 
    daysyears = days/365.25; 
run; 

proc gplot data=predsurvpl;
    plot surv*daysyears=tment/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 6 by 1 minor=none label=('Years');
    axis2 order=0 to 1 by 0.1 minor=none label=(a=90 'Estimated survival function');
    symbol1  v=none i=stepjl c=blue;
    symbol2  v=none i=stepjl c=red;
run;
quit;

Table 4.1

Non-parametric using survival package

Code show/hide
library(survival)
np_km <- survfit(Surv(days/365.25, status != 0) ~ tment, data = pbc3)
print(np_km,rmean=3)
Call: survfit(formula = Surv(days/365.25, status != 0) ~ tment, data = pbc3)

          n events rmean* se(rmean) median 0.95LCL 0.95UCL
tment=0 173     46   2.61    0.0633     NA    4.51      NA
tment=1 176     44   2.68    0.0565     NA    4.66      NA
    * restricted mean with upper limit =  3 

Non-parametric using mets package

Code show/hide
# non-parametric could also be done using mets package
library(mets)
out1 <- phreg(Surv(days/365.25,fail)~strata(tment),data=pbc3)
rm1 <- resmean.phreg(out1,times=3)
summary(rm1)
  strata times    rmean   se.rmean years.lost
1      0     3 2.606095 0.06325665  0.3939052
2      1     3 2.677657 0.05654602  0.3223425
Code show/hide
resmeanIPCW(Event(days/365.25,fail)~1, data=subset(pbc3,tment==0),time=3,model="linear")

   n events
 173     36

 173 clusters
coeffients:
            Estimate  Std.Err     2.5%    97.5% P-value
(Intercept) 2.605894 0.063305 2.481818 2.729969       0

exp(coeffients):
            Estimate   2.5%  97.5%
(Intercept)   13.543 11.963 15.332
Code show/hide
resmeanIPCW(Event(days/365.25,fail)~1, data=subset(pbc3,tment==1),time=3,model="linear")

   n events
 176     32

 176 clusters
coeffients:
            Estimate  Std.Err     2.5%    97.5% P-value
(Intercept) 2.677458 0.056607 2.566511 2.788405       0

exp(coeffients):
            Estimate   2.5%  97.5%
(Intercept)   14.548 13.020 16.255
Code show/hide
# or
options(contrasts=c("contr.treatment", "contr.poly"))
out<-resmeanIPCW(Event(days/365.25,fail)~-1+factor(tment),data=pbc3,time=3,model="linear",
            cens.model = ~strata(tment))
estimate(out)
               Estimate Std.Err  2.5% 97.5% P-value
factor(tment)0    2.606 0.06330 2.482 2.730       0
factor(tment)1    2.677 0.05661 2.567 2.788       0

Code for rest of table

Code show/hide
#### Non-parametric using RISCA package
kmdata <- data.frame(surv = np_km$surv, 
                      time = np_km$time, 
                     strata = c(rep(names(np_km$strata[1]), np_km$strata[[1]]), 
                              rep(names(np_km$strata[2]), np_km$strata[[2]])))
# Restrict to each treat
kmdata0 <- subset(kmdata, strata == "tment=0")
kmdata1 <- subset(kmdata, strata == "tment=1")

# rmst
library(RISCA)
rmst0 <- rmst(times = kmdata0$time, surv.rates = kmdata0$surv, max.time = 3, type = "s")
rmst1 <- rmst(times = kmdata1$time, surv.rates = kmdata1$surv, max.time = 3, type = "s")

# Cox model, alb = 38, bili = 45
# Cox model fit with covariates tment, alb and log2bili
coxfit <- coxph(Surv(days, status != 0) ~ tment + alb + log2bili, data = pbc3, 
                method = "breslow")

# Unique followup times 
fu <- sort(unique(pbc3$days))

# Data for prediction
preddata1 <- data.frame(tment = c(rep(0, length(fu)), rep(1, length(fu))), 
                       alb = rep(38, length(fu) * 2),
                       log2bili = rep(log2(45), length(fu) * 2), 
                       days = c(fu, fu),
                       status = rep(1, length(fu) * 2)
)

# Linear predictor
preds1 <- predict(coxfit, newdata = preddata1, type = "survival")

preddata1$preds <- preds1

cox10 <- subset(preddata1, tment == "0")
cox11 <- subset(preddata1, tment == "1")

# Rmst 
rmstcox10 <- rmst(times = cox10$days, surv.rates = cox10$preds, max.time = 3 * 365.25, type = "s")
rmstcox11 <- rmst(times = cox11$days, surv.rates = cox11$preds, max.time = 3 * 365.25, type = "s")

# Cox for alb = 20 and bili = 90 

# Data for prediction
preddata2 <- data.frame(tment = c(rep(0, length(fu)), rep(1, length(fu))), 
                        alb = rep(20, length(fu) * 2),
                        log2bili = rep(log2(90), length(fu) * 2), 
                        days = c(fu, fu),
                        status = rep(1, length(fu) * 2)
)

# Linear predictor
preds2 <- predict(coxfit, newdata = preddata2, type = "survival")

preddata2$preds <- preds2

cox20 <- subset(preddata2, tment == "0")
cox21 <- subset(preddata2, tment == "1")

# Rmst 
rmstcox20 <- rmst(times = cox20$days, surv.rates = cox20$preds, max.time = 3 * 365.25, type = "s")
rmstcox21 <- rmst(times = cox21$days, surv.rates = cox21$preds, max.time = 3 * 365.25, type = "s")

# Cox model, g-formula

# We want to predict responses if all subjects had been given both CyA and placebo (tment = 0,1)
# We need to have n survival curves * 2
# make a double data set with extra Z
pbc3_counterfact <- pbc3
pbc3_counterfact$tment <- ifelse(pbc3_counterfact$tment == 1, 0, 1) # opposite treatment
pbc3_double <- rbind(pbc3, pbc3_counterfact)

# Baseline survival
pred <- survfit(coxfit, newdata = data.frame(tment = 0, alb = 0, log2bili = 0))

allsurv <-
  lapply(1:nrow(pbc3_double),
         function(i)
           pred$surv ^
           exp(coef(coxfit)[1] * pbc3_double$tment[i] +
                 coef(coxfit)[2] * pbc3_double$alb[i] +
                 coef(coxfit)[3] * pbc3_double$log2bili[i]))

potout <-
  data.frame(surv = unlist(allsurv),
             tment = rep(pbc3_double$tment, each = length(pred$time)),
             time = rep(pred$time, times = nrow(pbc3)*2)
  )

# Average over values
require(dplyr)
sumdata <- potout %>%
  group_by(tment, time) %>%
  summarise(average_pred = mean(surv, na.rm = TRUE),
            .groups = c("keep"))
sumdata <- as.data.frame(sumdata)


# Split data per group
coxg0 <- subset(sumdata, tment == "0")
coxg1 <- subset(sumdata, tment == "1")

# Rmst 
rmstcoxg0 <- rmst(times = coxg0$time, surv.rates = coxg0$average_pred, max.time = 3 * 365.25, type = "s")
rmstcoxg1 <- rmst(times = coxg1$time, surv.rates = coxg1$average_pred, max.time = 3 * 365.25, type = "s")

##### BOOTSTRAP FOR SE'S #####

# Resample data sets 

B<-200
bootdata <- list()
kmres <- cox1res <- cox2res <- coxgres <- list()

#colnames(kmres) <- colnames(cox1res) <- colnames(cox2res) <- colnames(coxgres) <- c("rmst0", "rmst1")


for (b in 1:B){
  bootdata[[b]] <- pbc3[sample(1:nrow(pbc3), size = nrow(pbc3), replace = T),]
  
  ###### KM #######
  np_km <- survfit(Surv(days, status != 0) ~ tment, data = bootdata[[b]])
  
  kmdata <- data.frame(surv = np_km$surv, 
                       time = np_km$time, 
                       strata = c(rep(names(np_km$strata[1]), np_km$strata[[1]]), 
                                  rep(names(np_km$strata[2]), np_km$strata[[2]])))
  # Restrict to each treat
  kmdata0 <- subset(kmdata, strata == "tment=0")
  kmdata1 <- subset(kmdata, strata == "tment=1")
  
  # rmst
  rmst0 <- rmst(times = kmdata0$time, surv.rates = kmdata0$surv, max.time = 3 * 365.25, type = "s")
  rmst1 <- rmst(times = kmdata1$time, surv.rates = kmdata1$surv, max.time = 3 * 365.25, type = "s")

  kmres[[b]] <- c(rmst0, rmst1)
  
  
  ###### Cox 1 #######
  
  coxfit <- coxph(Surv(days, status != 0) ~ tment + alb + log2bili, data = bootdata[[b]], 
                  method = "breslow")
  
  # Unique followup times 
  fu <- sort(unique(pbc3$days))
  
  # Data for prediction
  preddata1 <- data.frame(tment = c(rep(0, length(fu)), rep(1, length(fu))), 
                          alb = rep(38, length(fu) * 2),
                          log2bili = rep(log2(45), length(fu) * 2), 
                          days = c(fu, fu),
                          status = rep(1, length(fu) * 2))
  
  # Linear predictor
  preds1 <- predict(coxfit, newdata = preddata1, type = "survival")
  preddata1$preds <- preds1
  cox10 <- subset(preddata1, tment == "0")
  cox11 <- subset(preddata1, tment == "1")
  
  # Rmst 
  rmstcox10 <- rmst(times = cox10$days, surv.rates = cox10$preds, max.time = 3 * 365.25, type = "s")
  rmstcox11 <- rmst(times = cox11$days, surv.rates = cox11$preds, max.time = 3 * 365.25, type = "s")
    cox1res[[b]] <- c(rmstcox10, rmstcox11)
  
  ###### Cox 2 #######

  # Cox for alb = 20 and bili = 90   
  # Data for prediction
  preddata2 <- data.frame(tment = c(rep(0, length(fu)), rep(1, length(fu))), 
                          alb = rep(20, length(fu) * 2),
                          log2bili = rep(log2(90), length(fu) * 2), 
                          days = c(fu, fu),
                          status = rep(1, length(fu) * 2))
  
  # Linear predictor
  preds2 <- predict(coxfit, newdata = preddata2, type = "survival")
  preddata2$preds <- preds2
  cox20 <- subset(preddata2, tment == "0")
  cox21 <- subset(preddata2, tment == "1")
  
  # Rmst 
  rmstcox20 <- rmst(times = cox20$days, surv.rates = cox20$preds, max.time = 3 * 365.25, type = "s")
  rmstcox21 <- rmst(times = cox21$days, surv.rates = cox21$preds, max.time = 3 * 365.25, type = "s")
  cox2res[[b]] <- c(rmstcox20, rmstcox21) 
  
  
  ##### Cox - g formula #######
  
  # We want to predict responses if all subjects had been given both CyA and placebo (tment = 0,1)
  # We need to have n survival curves * 2
  # make a double data set with extra Z
  pbc3_counterfact <- bootdata[[b]]
  pbc3_counterfact$tment <- ifelse(pbc3_counterfact$tment == 1, 0, 1) # opposite treatment
  pbc3_double <- rbind(bootdata[[b]], pbc3_counterfact)
  
  # Baseline survival
  pred <- survfit(coxfit, newdata = data.frame(tment = 0, alb = 0, log2bili = 0))
  
  allsurv <-
    lapply(1:nrow(pbc3_double),
           function(i)
             pred$surv ^
             exp(coef(coxfit)[1] * pbc3_double$tment[i] +
                   coef(coxfit)[2] * pbc3_double$alb[i] +
                   coef(coxfit)[3] * pbc3_double$log2bili[i]))
  
  potout <-
    data.frame(surv = unlist(allsurv),
               tment = rep(pbc3_double$tment, each = length(pred$time)),
               time = rep(pred$time, times = nrow(pbc3)*2)
    )
  
  # Average over values
  require(dplyr)
  sumdata <- potout %>%
    group_by(tment, time) %>%
    summarise(average_pred = mean(surv, na.rm = TRUE),
              .groups = c("keep"))
  sumdata <- as.data.frame(sumdata)

  # Split data per group
  coxg0 <- subset(sumdata, tment == "0")
  coxg1 <- subset(sumdata, tment == "1")
  
  # Rmst 
  rmstcoxg0 <- rmst(times = coxg0$time, surv.rates = coxg0$average_pred, max.time = 3 * 365.25, type = "s")
  rmstcoxg1 <- rmst(times = coxg1$time, surv.rates = coxg1$average_pred, max.time = 3 * 365.25, type = "s")
  coxgres[[b]] <- c(rmstcoxg0, rmstcoxg1)
}

kmreso <- do.call("rbind", kmres)
cox1reso <- do.call("rbind", cox1res)
cox2reso <- do.call("rbind", cox2res)
coxgreso <- do.call("rbind", coxgres)

Cox (38,45)

Code show/hide
apply(cox1reso, 2, mean)/365.25
[1] 2.527231 2.720892
Code show/hide
apply(cox1reso, 2, sd)/365.25
[1] 0.06340501 0.04929527

Cox (20,90)

Code show/hide
apply(cox2reso, 2, mean)/365.25
[1] 0.9566561 1.3641107
Code show/hide
apply(cox2reso, 2, sd)/365.25
[1] 0.2510078 0.2445435

Cox g-formula

Code show/hide
apply(coxgreso, 2, mean)/365.25
[1] 2.548957 2.703833
Code show/hide
apply(coxgreso, 2, sd)/365.25
[1] 0.05726278 0.04401984

Cox g-formula using RISCA package

Code show/hide
pbc3$followup<-pbc3$days/365.25
coxf <- coxph(Surv(followup, fail) ~ tment + alb + log2bili,
                data = pbc3, method = "breslow")
library(RISCA)
gc.ate <- gc.survival(object=coxf, data=pbc3, 
                      group="tment", times="followup", failures="fail",
                      max.time=3, iterations=1000, effect="ATE")
rbind(Placebo=gc.ate$RMST0,CyA=gc.ate$RMST1)
        estimate  std.error ci.lower ci.upper
Placebo 2.553795 0.05834140 2.439448 2.668142
CyA     2.705696 0.04488675 2.617719 2.793672

Non-parametric using PROC RMSTREG

Code show/hide
proc sort data=pbc3 out=pbc3sorted;
    by tment;
run;
proc rmstreg data=pbc3sorted tau=3;
  by tment;
  model followup*status(0)= / 
        link=linear method=ipcw(strata=tment);
run;

* Bootstrapping using 'point='; 
data bootpbc;
    do sampnum = 1 to 1000; /* nboot=1000*/
        do i = 1 to 349;      /*nobs=349*/
            x=round(ranuni(0)*349); 
            set pbc3
            point=x;
            output;
        end;
    end;
    stop;
run;
proc sort data=bootpbc out=boot;
  by sampnum tment;
run;
proc rmstreg data=boot tau=3;
  by sampnum tment;
  model followup*status(0)= / 
        link=linear method=ipcw(strata=tment);
    ods output parameterestimates=pe;
run;
proc means data=pe mean stddev;
  class tment;
    var estimate;
run;

Macro and bootstrap data set

Code show/hide
* Bootstrapping using 'point='; 
data bootpbc;
    do sampnum = 1 to 1000; /* nboot=1000*/
        do i = 1 to 349;      /*nobs=349*/
            x=round(ranuni(0)*349); 
            set pbc3
            point=x;
            output;
        end;
    end;
    stop;
run;

* AUC under stepcurves; 
%macro areastepby(data,byvar,trt,grp,time,y,tau);
    data select;
        set &data;
        where &trt=&grp;
    run;

    data select;
        set select;
        by &byvar;
        retain mu oldt oldy;
        if first.&byvar then do;
            oldt=0; oldy=1; mu=0;
        end;
        if &time>&tau then do;
            &time=&tau; &y=oldy;
        end;
        if not first.&byvar then mu+oldy*(&time-oldt);
        if last.&byvar then do;
        if &time<&tau then mu+(&tau-&time)*&y; end;
        oldy=&y; oldt=&time;
    run;

    data last; set select;
        by  &byvar;
        if last.&byvar;
    run;
%mend areastepby;

Non-parametric using macro

Code show/hide
proc phreg data=bootpbc noprint;
    by sampnum;
    model followup*status(0)=;
    strata tment;
    baseline out=survdat survival=km / method=pl;
run;

%areastepby(survdat,sampnum,tment,0,followup,km,3);
title "Placebo";
proc means data=last mean stddev;
    var mu;
run;
%areastepby(survdat,sampnum,tment,1,followup,km,3);
title "CyA";
proc means data=last mean stddev;
    var mu;
run;

Cox (38,45) using macro

Code show/hide
data cov;
    tment=0; alb=38; log2bili=log2(45); output;
    tment=1; alb=38; log2bili=log2(45); output;
run;
proc phreg data=bootpbc noprint;
    by sampnum;
    model followup*status(0)=tment alb log2bili/rl;
    baseline out=predsurv survival=surv covariates=cov/ method=breslow;
run;

%areastepby(predsurv,sampnum,tment,0,followup,surv,3);
title "Placebo";
proc means data=last mean stddev;
    var mu;
run;
%areastepby(predsurv,sampnum,tment,1,followup,surv,3);
title "CyA";
proc means data=last mean stddev;
    var mu;
run;

Cox (20,90) using macro

Code show/hide
data cov;
    tment=0; alb=20; log2bili=log2(90); output;
    tment=1; alb=20; log2bili=log2(90); output;
run;
proc phreg data=bootpbc noprint;
    by sampnum;
    model followup*status(0)=tment alb log2bili/rl;
    baseline out=predsurv2 survival=surv covariates=cov/ method=breslow;
run;

%areastepby(predsurv2,sampnum,tment,0,followup,surv,3);
title "Placebo";
proc means data=last mean stddev;
    var mu;
run;
%areastepby(predsurv2,sampnum,tment,1,followup,surv,3);
title "CyA";
proc means data=last mean stddev;
    var mu;
run;

Cox g-formula using macro

Code show/hide
proc phreg data=bootpbc noprint;
    by sampnum;
    class tment (ref='0');
    model followup*status(0)=tment alb log2bili/rl;
    baseline out=gsurv survival=surv stderr=se/ method=breslow diradj group=tment;
run;

%areastepby(gsurv,sampnum,tment,0,followup,surv,3);
title "Placebo";
proc means data=last mean stddev;
    var mu;
run;
%areastepby(gsurv,sampnum,tment,1,followup,surv,3);
title "CyA";
proc means data=last mean stddev;
    var mu;
run;

Figure 4.10

Code show/hide
# Cumulative incidences are estimated using Aalen-Johansen

# Overall survival 
overall_surv <- survfit(Surv(days, status != 0) ~ 1, 
                        data = subset(pbc3, tment_char == "Placebo"))

# Cause 1 survival: transplantation
library(mets)
cause1_cif <- cif(Event(days, status) ~ 1, 
                  data = subset(pbc3, tment_char == "Placebo"), 
                  cause = 1)

# Cause 2 survival: death w/o transplantation
cause2_cif <- cif(Event(days, status) ~ 1, 
                  data = subset(pbc3, tment_char == "Placebo"), 
                  cause = 2)

# Get them on the same time scale - book keeping

alltimes <- overall_surv$time
cause1st <- stepfun(x = cause1_cif$cumhaz[,1], y = c(0, cause1_cif$cumhaz[,2]))
cause1times <- cause1st(v = alltimes)

cause2st <- stepfun(x = cause2_cif$cumhaz[,1], y = c(0, cause2_cif$cumhaz[,2]))
cause2times <- cause2st(v = alltimes)


# Collect the data 
data_comb <- data.frame(cif = c(overall_surv$surv + cause1times + cause2times, 
                                cause1times + cause2times, 
                                cause1times),
                        time = c(alltimes, alltimes, alltimes),
                        type = c(rep("Overall", length(alltimes)), 
                                 rep("Transplantation + death without transplantation", length(alltimes)), 
                                 rep("Transplantation", length(alltimes)))
                        )

# Create Figure 4.10
fig4.10 <- ggplot(aes(x = time / 365.25, y = cif, linetype = type), 
                data = data_comb) + 
  geom_step(linewidth = 1) + 
  scale_linetype_discrete("Type") + 
  xlab("Time since randomization (years)") + 
  ylab("Stacked cumulative incidence and survival") + 
  scale_x_continuous(expand = expansion(mult = c(0.001, 0.05)), 
                     limits = c(0, 6), 
                     breaks = seq(0, 6, 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.001, 0.05)), 
                     limits = c(0, 1.0), 
                     breaks = seq(0, 1.0, 0.1)) +
  theme_general + 
  guides(linetype = guide_legend(nrow = 3, byrow = TRUE)) + 
  theme(legend.position="bottom",
        legend.box="vertical",
        text = element_text(size=21), 
        legend.key.width = unit(1, "cm"))

fig4.10

Code show/hide
proc phreg data=pbc3 noprint;
model days*status(0)=;
baseline out=overallsurv survival=surv / method=;
run;
proc sort;by tment days;run;
proc phreg data=pbc3 noprint;
model days*status(0)=/eventcode=1;
strata tment;
baseline out=cuminc1 cif=cif1;
run;
proc sort;by tment days;run;
proc phreg data=pbc3 noprint;
model days*status(0)=/eventcode=2;
strata tment;
baseline out=cuminc2 cif=cif2;
run;
proc sort;by tment days;run;
data plot0; 
merge overallsurv cuminc1 cuminc2; 
by tment days;
if tment=0;
fail=1-surv;
run;

data plot0ok; 
set plot0;
by days;
retain last0 last1 last2;
if fail=. then c0=last0; if fail ne . then c0=fail;
if cif1=. then c1=last1; if cif1 ne . then c1=cif1;
if cif2=. then c2=last2; if cif2 ne . then c2=cif2;
output;
last0=c0; last1=c1; last2=c2;
run;
data plot0ok; 
set plot0ok;
cum1=c1; 
cum2=c1+c2; 
cum3=c1+c2+(1-c0);
run;
proc print;run;

data plot0ok; 
set plot0ok; 
daysyears = days/365.25; 
run; 

proc gplot 
data=plot0ok;
plot cum1*daysyears cum2*daysyears cum3*daysyears /overlay haxis=axis1 vaxis=axis2;
axis1 order=0 to 6 by 1 minor=none label=('Years');
axis2 order=0 to 1 by 0.1 minor=none label=(a=90 'Stacked cumulative incidence and survival');
symbol1  v=none i=stepjl c=blue;
symbol2  v=none i=stepjl c=red;
symbol3  v=none i=stepjl c=black;
run;
quit;

Figure 4.11

Code show/hide
# Cumulative incidences are (wrongly) estimated using Kaplan-Meier

# Overall survival 
overall_surv <- survfit(Surv(days, status != 0) ~ 1, 
                        data = subset(pbc3, tment_char == "Placebo"))

# Cause 1 survival: transplantation, KM
cause1_cif_w <- survfit(Surv(days, status == 1) ~ 1, 
                        data = subset(pbc3, tment_char == "Placebo"))

# Cause 2 survival: death w/o transplantation, KM
cause2_cif_w <- survfit(Surv(days, status == 2) ~ 1, 
                      data = subset(pbc3, tment_char == "Placebo"))

# Get them on the same time scale - book keeping
alltimes <- overall_surv$time
cause1st <- stepfun(x = cause1_cif_w$time, y = c(0, cause1_cif_w$surv))
cause1times <- cause1st(v = alltimes)

cause2st <- stepfun(x = cause2_cif_w$time, y = c(0, cause2_cif_w$surv))
cause2times <- cause2st(v = alltimes)

# Collect the data 
data_comb <- data.frame(cif_w = c(overall_surv$surv + 1-cause1times + 1-cause2times, 
                                1-cause1times + 1-cause2times, 
                                1-cause1times),
                        time = c(alltimes, alltimes, alltimes),
                        type = c(rep("Overall", length(alltimes)), 
                                 rep("Transplantation + death without transplantation", length(alltimes)), 
                                 rep("Transplantation", length(alltimes)))
)



# Create Figure
fig4.11 <- ggplot(aes(x = time / 365.25, y = cif_w, linetype = type), 
                 data = data_comb) + 
  geom_step(linewidth = 1) + 
  scale_linetype_discrete("Type") + 
  xlab("Time since randomization (years)") + 
  ylab("Stacked cumulative incidence and survival") + 
  scale_x_continuous(expand = expansion(mult = c(0.001, 0.05)), 
                     limits = c(0, 6), 
                     breaks = seq(0, 6, 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.001, 0.05)), 
                     limits = c(0, 1.1), 
                     breaks = seq(0, 1.1, 0.1)) +
  theme_general + 
  guides(linetype = guide_legend(nrow = 3, byrow = TRUE)) + 
  theme(legend.box="vertical",
        text = element_text(size=21), 
        legend.key.width = unit(1, "cm"))
fig4.11

Code show/hide
proc phreg data=pbc3 noprint;
    model days*status(0 2)=;
    strata tment;
    baseline out=cuminc1wrng survival=s1wrng;
run;

proc phreg data=pbc3 noprint;
    model days*status(0 1)=;
    baseline out=cuminc2wrng survival=s2wrng;
    strata tment;
run;

data plot0wrng; 
    merge overallsurv cuminc1wrng cuminc2wrng; 
    where tment=0; by days;
run;

data plot0wrng; 
    set plot0wrng;
    by days;
    retain last1 last2;
    if s1wrng=. then c1=last1; if s1wrng ne . then c1=1-s1wrng;
    if s2wrng=. then c2=last2; if s2wrng ne . then c2=1-s2wrng;
    output;
    last1=c1; last2=c2;
run;

data plot0wrng; 
    set plot0wrng;
    cum1=c1; cum2=c1+c2; cum3=c1+c2+surv; one=1;
run;

data plot0wrng; 
    set plot0wrng; 
    daysyears = days/365.25; 
run; 

proc gplot 
    data=plot0wrng;
    plot cum1*daysyears cum2*daysyears cum3*daysyears one*daysyears
         /overlay haxis=axis1 vaxis=axis2;
    axis1 order=0 to 6 by 1 minor=none label=('Years');
    axis2 order=0 to 1.1 by 0.1 minor=none label=(a=90 'Stacked cumulative incidence and survival');
    symbol1  v=none i=stepjl c=blue;
    symbol2  v=none i=stepjl c=red;
    symbol3  v=none i=stepjl c=black;
    symbol4  v=none i=stepjl l=2 c=black;
run;
quit;

Figure 4.12

Figure 4.12 (a)

Code show/hide
# Overall
overall_cox <- coxph(Surv(days, status != 0) ~ tment + alb + log2bili + sex + age,
                        data = subset(pbc3, !is.na(alb)),
                     method = "breslow", eps=1e-9)

# Cause 1: transplantation
cause1_cox <- coxph(Surv(days, status == 1) ~ tment + alb + log2bili + sex + age,
                    data = subset(pbc3, !is.na(alb)), 
                    method = "breslow", eps=1e-9)

# Cause 2: death w/o transplantation
cause2_cox <- coxph(Surv(days, status == 2) ~ tment + alb + log2bili + sex + age,
                    data = subset(pbc3, !is.na(alb)), 
                    method = "breslow", eps=1e-9)


# for tment = placebo, age = 40, alb = 38, bili = log2(45) and sex = F
newd <- data.frame(tment = 0, age = 40, 
                   alb = 38, log2bili = log2(45), 
                   sex = 0)
est_cause1 <- survfit(cause1_cox, ctype = 2, newdata = newd)$cumhaz 

est_cause2 <- survfit(cause2_cox, ctype = 2, newdata = newd)$cumhaz 

est_overall <- survfit(overall_cox, ctype = 2, newdata = newd)$surv

# Calculate S, and F1 and F2
alltimes <- basehaz(overall_cox, centered = F)$time

A <- cumsum(diff(c(0, est_cause1 + est_cause2)))
dA <- diff(c(0, A))
S <- cumprod(1-dA) #exp(-cumsum(dA)) 
A1 <- est_cause1
A2 <- est_cause2

dat <- data.frame(cbind(alltimes, A, dA, S, A1, A2))
dat2 <- subset(dat, dA > 0)
dat2$lagS <- with(dat2, c(0, S[-length(S)])) 

dat2$F1 <- with(dat2, cumsum(lagS * diff(c(0, A1))))
dat2$F2 <- with(dat2, cumsum(lagS * diff(c(0, A2))))


# Collect the data
data_comb <- with(dat2, 
                  data.frame(cif_w = c(rep(1, length(alltimes)),
                                       F1 + F2,
                                       F1),
                             time = c(alltimes, alltimes, alltimes),
                             type = c(rep("Overall", length(alltimes)),
                                      rep("Transplantation + death without transplantation", length(alltimes)),
                                      rep("Transplantation", length(alltimes)))
                  ))


# Create Figure
fig4.12a <- ggplot(aes(x = time / 365.25, y = cif_w, linetype = type),
                 data = data_comb) +
  geom_step(linewidth = 1) +
  scale_linetype_discrete("Type") +
  xlab("Time since randomization (years)") +
  ylab("Stacked cumulative incidence and survival") +
  scale_x_continuous(expand = expansion(mult = c(0.001, 0.05)),
                     limits = c(0, 6),
                     breaks = seq(0, 6, 1)) +
  scale_y_continuous(expand = expansion(mult = c(0.001, 0.05)),
                     limits = c(0, 1.0),
                     breaks = seq(0, 1.0, 0.1)) +
  theme_general +
  guides(linetype = guide_legend(nrow = 3, byrow = TRUE)) + 
  theme(legend.box="vertical",
        text = element_text(size=22), 
        legend.key.width = unit(1, "cm"))

fig4.12a

Figure 4.12 (b)

Code show/hide
# for tment = placebo, age = 40, alb = 20, bili = log2(90) and sex = F
newd <- data.frame(tment = 0, age = 40, 
                    alb = 20, log2bili = log2(90), 
                    sex = 0)

est_cause1 <- survfit(cause1_cox, ctype = 2, newdata = newd)$cumhaz

est_cause2 <- survfit(cause2_cox, ctype = 2, newdata = newd)$cumhaz

est_overall <- survfit(overall_cox, ctype = 2, newdata = newd)$surv


# Calculate S, and F1 and F2
alltimes <- basehaz(overall_cox, centered = F)$time

A <- cumsum(diff(c(0, est_cause1 + est_cause2)))
dA <- diff(c(0, A))
S <- cumprod(1-dA) #exp(-cumsum(dA)) 
A1 <- est_cause1
A2 <- est_cause2

dat <- data.frame(cbind(alltimes, A, dA, S, A1, A2))
dat2 <- subset(dat, dA > 0)
dat2$lagS <- with(dat2, c(0, S[-length(S)])) 

dat2$F1 <- with(dat2, cumsum(lagS * diff(c(0, A1))))
dat2$F2 <- with(dat2, cumsum(lagS * diff(c(0, A2))))


# Collect the data
data_comb <- with(dat2, 
                  data.frame(cif_w = c(rep(1, length(alltimes)),
                                  F1 + F2,
                                  F1),
                        time = c(alltimes, alltimes, alltimes),
                        type = c(rep("Overall", length(alltimes)),
                                 rep("Transplantation + death without transplantation", length(alltimes)),
                                 rep("Transplantation", length(alltimes)))))

# Create Figure
fig4.12b <- ggplot(aes(x = time / 365.25, y = cif_w, linetype = type),
                 data = data_comb) +
  geom_step(linewidth = 1) +
  scale_linetype_discrete("Type") +
  xlab("Time since randomization (years)") +
  ylab("Stacked cumulative incidence and survival") +
  scale_x_continuous(expand = expansion(mult = c(0.001, 0.05)),
                     limits = c(0, 6),
                     breaks = seq(0, 6, 1)) +
  scale_y_continuous(expand = expansion(mult = c(0.001, 0.05)),
                     limits = c(0, 1.0),
                     breaks = seq(0, 1.0, 0.1)) +
  theme_general +
  guides(linetype = guide_legend(nrow = 3, byrow = TRUE)) + 
  theme(legend.box="vertical",
        text = element_text(size=22), 
        legend.key.width = unit(1, "cm"))


fig4.12b

Figure 4.12 (c)

Code show/hide
# for tment = placebo, age = 60, alb = 38, bili = log2(45) and sex = F
newd <- data.frame(tment = 0, age = 60, 
                   alb = 38, log2bili = log2(45), 
                   sex = 0)

est_cause1 <- survfit(cause1_cox, ctype = 2, newdata = newd)$cumhaz

est_cause2 <- survfit(cause2_cox, ctype = 2, newdata = newd)$cumhaz

est_overall <- survfit(overall_cox, ctype = 2, newdata = newd)$surv


# Calculate S, and F1 and F2
alltimes <- basehaz(overall_cox, centered = F)$time

A <- cumsum(diff(c(0, est_cause1 + est_cause2)))
dA <- diff(c(0, A))
S <- cumprod(1-dA) #exp(-cumsum(dA)) 
A1 <- est_cause1
A2 <- est_cause2

dat <- data.frame(cbind(alltimes, A, dA, S, A1, A2))
dat2 <- subset(dat, dA > 0)
dat2$lagS <- with(dat2, c(0, S[-length(S)])) 

dat2$F1 <- with(dat2, cumsum(lagS * diff(c(0, A1))))
dat2$F2 <- with(dat2, cumsum(lagS * diff(c(0, A2))))


# Collect the data
data_comb <- with(dat2, 
                  data.frame(cif_w = c(rep(1, length(alltimes)), F1 + F2, F1),
                             time = c(alltimes, alltimes, alltimes),
                             type = c(rep("Overall", length(alltimes)),
                                      rep("Transplantation + death without transplantation",
                                          length(alltimes)),
                                      rep("Transplantation", length(alltimes)))))

# Create Figure
fig4.12c <- ggplot(aes(x = time / 365.25, y = cif_w, linetype = type),
                 data = data_comb) +
  geom_step(linewidth = 1) +
  scale_linetype_discrete("Type") +
  xlab("Time since randomization (years)") +
  ylab("Stacked cumulative incidence and survival") +
  scale_x_continuous(expand = expansion(mult = c(0.001, 0.05)),
                     limits = c(0, 6),
                     breaks = seq(0, 6, 1)) +
  scale_y_continuous(expand = expansion(mult = c(0.001, 0.05)),
                     limits = c(0, 1.0),
                     breaks = seq(0, 1.0, 0.1)) +
  theme_general + 
  guides(linetype = guide_legend(nrow = 3, byrow = TRUE)) + 
  theme(legend.position="bottom",
        legend.box="vertical",
        text = element_text(size=22), 
        legend.key.width = unit(1, "cm"))


fig4.12c

Macro and double data set

The SAS macro below can alternatively be loaded using this code:

Code show/hide
filename cumincpr url 'https://biostat.ku.dk/pka/CUMINC/CumInc.sas';
%inc cumincpr;
Code show/hide
%macro CumInc(Data,Strata,Time,Surv);

/* Number of Stratas */
  
  proc sort data=&Data;
by &Strata;

data nstrat;
set &Data end=last;
by &Strata;
firstS=first.&Strata;
retain nStrata 0;
nStrata+firstS;
if last then call symput('nStrata',nStrata);
drop firstS;
run;


/* Observations in ciData are deleted */
  
  
  data newData; 
set &Data;
start=(&Time eq 0);   
retain komb 0;
komb+start;
med=0;
%do k=0 %to (&nStrata-1) %by 1;
med=med+(komb eq (1+&k*(&nStrata+1)));
%end;
if (med eq 1);  
drop start med komb;
run;

/* Time vector */
  
  proc sort data=newData;
by &Time;

data Time (keep=&Time); 
set newData;
%do i=1 %to &nStrata %by 1;
if &Time=lag(&Time) then delete;
%end;


/* Strata vector */
  
  proc sort data=newData;
by &Strata &Time;

data temp1;
set newData; by &Strata;
retain stratum 0;
stratum+first.&Strata;

/*  A=sum(A_1,...,A_nStrata) */
  
  %do i=1 %to &nStrata %by 1;
data data&i;
set temp1;
if stratum=&i;

data data&i (keep = &Time A&i);
merge data&i Time; by &Time;  
retain Surv&i;
if not (&Surv=.) then Surv&i=&Surv;
A&i=-log(Surv&i);

%end;


data A_and_S;
A=0;
%do i=1 %to &nStrata %by 1;
merge data&i; by &Time;
A+A&i;
%end;

dA=A-lag(A);
if dA eq . then dA=0;

retain S 1;
S+S*(1-dA)-S;
lagS=lag(S);
run;



/* Output */
  
  data temp2 (keep = &Strata &Time);
set temp1; 
proc sort data=temp2; by &Time;

data temp3;
merge temp2 A_and_S; by &Time;
proc sort data=temp3; by &Strata &Time;


data data0 (keep = &Time p stratum);
set temp3; by &Strata;
*   lagS=lag(S);

if first.&Strata then do;
lagS=1; end;

retain stratum 0;
stratum+first.&Strata;

%do i=1 %to &nStrata %by 1;
lagA&i=lag(A&i);
if ((stratum eq &i) and (first.&Strata)) then lagA&i=0;
if (stratum ne &i) then lagA&i=0;

l&i=(stratum=&i)*(lagS*(A&i-lagA&i));
retain p&i 0;
p&i+l&i; 
if (stratum ne &i) then p&i=0;
%end;

p=0;
%do i=1 %to &nStrata %by 1;
p=p+p&i; %end;
run;





/* put data into the right form */
  
  %do i=1 %to &nStrata;
data data&i;
set data0;
if (stratum eq &i);
p0&i=p; drop p stratum;

proc sort data=data&i;
by &Time;
%end;

data data;
set data1;
%do i=1 %to &nStrata;
data data;
merge data data&i;
by &Time;
%end;


* and complete the p-s;

proc sort data=data;
 by &Time;

data data; 
 set data end=last;
  by &Time;
   n=_N_;
  if last then call symput('nobs',n);
  drop n;

data data;
 set data;
  %do i=1 %to &nStrata;
   %do j=1 %to &nobs;
    dummy=lag(p0&i);
    if (p0&i eq .) then p0&i=dummy;
   %end;
  %end;
  drop dummy;
  p00=1;
  %do i=1 %to &nStrata;
   p00=p00-p0&i;
  %end;
run;
%mend CumInc;


* Cuminc macro requires duplication; 
data pbc32; 
    set pbc3 pbc3;
    h=1+(_N_ gt 349);
    time=days;
    d=(status=1)*(h=1)+(status=2)*(h=2);
    tment1=tment*(h=1); tment2=tment*(h=2);
    sex1=sex*(h=1); sex2=sex*(h=2);
    age1=age*(h=1); age2=age*(h=2);
    alb1=alb*(h=1); alb2=alb*(h=2);
    log2bili1=log2bili*(h=1); log2bili2=log2bili*(h=2);
run;

Figure 4.12 (a)

Code show/hide
data cov;
  input tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 log2bili1 log2bili2;
  datalines; 
  0 0 0 0 40 0 38 0 45 0
  0 0 0 0 0 40 0 38 0 45
  ;
run;
data cov; 
  set cov;
    if log2bili1>0 then log2bili1=log2(log2bili1);
    if log2bili2>0 then log2bili2=log2(log2bili2);
run;
proc phreg data=pbc32 noprint;
    model time*d(0)=tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 
                    log2bili1 log2bili2/rl;
    strata h;
    baseline out=cidata covariates=cov survival=surv/nomean method=ch;
run;
%cuminc(cidata,h,time,surv);
data data; set data; tment=0; run;
data plot1; 
    set data; 
    cum1=p01; cum2=p01+p02; cum3=1;
    p = p01+p02+p00;
run;
data plot1; 
    set plot1; 
    daysyears = time/365.25; 
run; 
proc gplot data=plot1;
    plot cum1*daysyears cum2*daysyears cum3*daysyears/overlay haxis=axis1 vaxis=axis2;
    axis1 order=0 to 6 by 1 minor=none label=('Years');
    axis2 order=0 to 1 by 0.1 minor=none label=(a=90 'Stacked cumulative incidence and survival');
    symbol1  v=none i=stepjl c=blue;
    symbol2  v=none i=stepjl c=red;
    symbol3  v=none i=stepjl c=black;
run;
quit;

Figure 4.12 (b)

Code show/hide
data cov;
    input tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 log2bili1 log2bili2;
    datalines; 
    0 0 0 0 40 0 20 0 90 0
    0 0 0 0 0 40 0 20 0 90
    ;
run;
data cov; 
  set cov;
    if log2bili1>0 then log2bili1=log2(log2bili1);
    if log2bili2>0 then log2bili2=log2(log2bili2);
run;
proc phreg data=pbc32 noprint;
    model time*d(0)=tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 
                    log2bili1 log2bili2/rl;
    strata h;
    baseline out=cidata covariates=cov survival=surv/nomean method=ch;
run;
%cuminc(cidata,h,time,surv);
data data; set data; tment=0; run;
data plot2plac; set data; 
    cum1=p01; cum2=p01+p02; cum3=1;
run;
data plot2plac; 
    set plot2plac; 
    daysyears = time/365.25; 
run; 
proc gplot data=plot2plac;
    plot cum1*daysyears cum2*daysyears cum3*daysyears/overlay haxis=axis1 vaxis=axis2;
    axis1 order=0 to 6 by 1 minor=none label=('Years');
    axis2 order=0 to 1 by 0.1 minor=none label=(a=90 'Stacked cumulative incidence and survival');
    symbol1  v=none i=stepjl c=blue;
    symbol2  v=none i=stepjl c=red;
    symbol3  v=none i=stepjl c=black;
run;
quit;

Figure 4.12 (c)

Code show/hide
data cov;
    input tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 log2bili1 log2bili2;
    datalines; 
    0 0 0 0 60 0 38 0 45 0
    0 0 0 0 0 60 0 38 0 45
    ;
run;
data cov;
    set cov;
    if log2bili1>0 then log2bili1=log2(log2bili1);
    if log2bili2>0 then log2bili2=log2(log2bili2);
run;
proc phreg data=pbc32 noprint;
    model time*d(0)=tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 
                    log2bili1 log2bili2/rl;
    strata h;
    baseline out=cidata covariates=cov survival=surv/nomean method=ch;
run;
%cuminc(cidata,h,time,surv);
data data; set data; tment=0; run;
data plot3plac; set data; 
    cum1=p01; cum2=p01+p02; cum3=1;
run;
data plot3plac; 
    set plot3plac; 
    daysyears = time/365.25; 
run; 
proc gplot data=plot3plac;
    plot cum1*daysyears cum2*daysyears cum3*daysyears/overlay haxis=axis1 vaxis=axis2;
    axis1 order=0 to 6 by 1 minor=none label=('Years');
    axis2 order=0 to 1 by 0.1 minor=none label=(a=90 'Stacked cumulative incidence and survival');
    symbol1  v=none i=stepjl c=blue;
    symbol2  v=none i=stepjl c=red;
    symbol3  v=none i=stepjl c=black;
run;
quit;

Table 4.2

Scenario 1 (no adjustment)

Code show/hide
library("survRM2")

# No adjustment 
coxsurv <- coxph(Surv(days, status != 0) ~ strata(tment), data = pbc3, 
                      method = "breslow")

# death w/o transplant
cox1 <- coxph(Surv(days, status == 2) ~ strata(tment), data = pbc3, 
              method = "breslow")

# transplant
cox2 <- coxph(Surv(days, status == 1) ~ strata(tment), data = pbc3, 
              method = "breslow")

time <- basehaz(cox1, center = F)$time
surv <- exp(-basehaz(coxsurv, center = F)$hazard)
lamd1 <- basehaz(cox1, center = F)$hazard
lamd2 <- basehaz(cox2, center = F)$hazard
strat <- basehaz(cox1, center = F)$strata
  
# Cumulative incidence
F01_plac <- cumsum(surv[strat == "tment=0"] * diff(c(0,lamd1[strat == "tment=0"])))
F01_cya <- cumsum(surv[strat == "tment=1"] * diff(c(0,lamd1[strat == "tment=1"])))

F02_plac <- cumsum(surv[strat == "tment=0"] * diff(c(0,lamd2[strat == "tment=0"])))
F02_cya <- cumsum(surv[strat == "tment=1"] * diff(c(0,lamd2[strat == "tment=1"])))

timep <- time[strat == "tment=0"]
timec <- time[strat == "tment=1"]

# Transplantation
scenario1.T <- c(Placebo=sum(diff(c(0, timep[timep <= 3 * 365.25])) * F02_plac[timep <= 3 * 365.25])/365.25,
CyA=sum(diff(c(0, timec[timec <= 3 * 365.25])) * F02_cya[timec <= 3 * 365.25])/365.25)

# Death without transplantation
scenario1.D <- c(Placebo=sum(diff(c(0, timep[timep <= 3 * 365.25])) * F01_plac[timep <= 3 * 365.25])/365.25,
CyA=sum(diff(c(0, timec[timec <= 3 * 365.25])) * F01_cya[timec <= 3 * 365.25])/365.25)
scenario1.T;scenario1.D 
   Placebo        CyA 
0.14268790 0.08567096 
  Placebo       CyA 
0.2499754 0.2365865 

Scenario 1 (no adjustment) using mets package

Code show/hide
library(mets)
rmc1 <- cif.yearslost(Event(days/365.25,status)~strata(tment), 
                      data=pbc3, time=3)
summary(rmc1)
  strata times     intF11    intF12  se.intF11  se.intF12 total.years.lost
1      0     3 0.14274534 0.2511599 0.04083718 0.05260692        0.3939052
2      1     3 0.08637516 0.2359673 0.03029502 0.05031025        0.3223425

Scenario 2

Code show/hide
coxsurv <- coxph(Surv(days/365.25, status != 0) ~ tment + sex + age + alb + log2bili,
                 data = pbc3, method = "breslow")
# death w/o transplant
cox1 <- coxph(Surv(days/365.25, status == 2) ~ tment + sex + age + alb + log2bili,
              data = pbc3, method = "breslow")
# transplant
cox2 <- coxph(Surv(days/365.25, status == 1) ~ tment + sex + age + alb + log2bili,
              data = pbc3, method = "breslow")

# Sex = F, age = 40, alb = 38, bili = 45
# Predictions 
newd <- data.frame(tment = c(0, 1),
                   sex = c(0, 0),
                   age = c(40, 40),
                   alb = c(38, 38), 
                   log2bili = log2(c(45, 45)))

# predictions
time <- basehaz(cox1, center = F)$time
surv <- exp(-basehaz(coxsurv, center = F)$hazard)
lamd1 <- basehaz(cox1, center = F)$hazard
lamd2 <- basehaz(cox2, center = F)$hazard

lpsurv <- predict(coxsurv, newd, type = "lp", reference = "zero")
lpcox1 <- predict(cox1, newd, type = "lp", reference = "zero")
lpcox2 <- predict(cox2, newd, type = "lp", reference = "zero")

# Linear predictors
surv_plac <- surv^exp(lpsurv[1])
surv_cya <- surv^exp(lpsurv[2])

lamd1_plac <- lamd1 * exp(lpcox1[1])
lamd1_cya <- lamd1 * exp(lpcox1[2])

lamd2_plac <- lamd2 * exp(lpcox2[1])
lamd2_cya <- lamd2 * exp(lpcox2[2])

# Cumulative incidence
F01_plac <- cumsum(surv_plac * diff(c(0,lamd1_plac)))
F01_cya  <- cumsum(surv_cya * diff(c(0,lamd1_cya)))

F02_plac <- cumsum(surv_plac * diff(c(0,lamd2_plac)))
F02_cya  <- cumsum(surv_cya * diff(c(0,lamd2_cya)))

timep <- timec <- time

# Transplantation
scenario2.T <- c(Placebo=sum(diff(c(0, timep[timep <= 3])) * F02_plac[timep <= 3]),
CyA=sum(diff(c(0, timec[timec <= 3])) * F02_cya[timec <= 3]))

# Death without transplantation
scenario2.D <- c(Placebo=sum(diff(c(0, timep[timep <= 3])) * F01_plac[timep <= 3]),
                 CyA    =sum(diff(c(0, timec[timec <= 3])) * F01_cya[timec  <= 3])
                 )
scenario2.T;scenario2.D 
  Placebo       CyA 
0.2204065 0.1161289 
   Placebo        CyA 
0.09008897 0.06091087 

Scenario 3

Code show/hide
# Sex = F, age = 40, alb = 20, bili = 90 #
# Predictions 
newd <- data.frame(tment = c(0, 1),
                   sex = c(0, 0),
                   age = c(40, 40),
                   alb = c(20, 20), 
                   log2bili = log2(c(90, 90)))

# predictions
time <- basehaz(cox1, center = F)$time
surv <- exp(-basehaz(coxsurv, center = F)$hazard)
lamd1 <- basehaz(cox1, center = F)$hazard
lamd2 <- basehaz(cox2, center = F)$hazard

lpsurv <- predict(coxsurv, newd, type = "lp", reference = "zero")
lpcox1 <- predict(cox1, newd, type = "lp", reference = "zero")
lpcox2 <- predict(cox2, newd, type = "lp", reference = "zero")

# Linear predictors
surv_plac <- surv^exp(lpsurv[1])
surv_cya <- surv^exp(lpsurv[2])

lamd1_plac <- lamd1 * exp(lpcox1[1])
lamd1_cya <- lamd1 * exp(lpcox1[2])

lamd2_plac <- lamd2 * exp(lpcox2[1])
lamd2_cya <- lamd2 * exp(lpcox2[2])

# Cumulative incidence
F01_plac <- cumsum(surv_plac * diff(c(0,lamd1_plac)))
F01_cya <- cumsum(surv_cya * diff(c(0,lamd1_cya)))

F02_plac <- cumsum(surv_plac * diff(c(0,lamd2_plac)))
F02_cya <- cumsum(surv_cya * diff(c(0,lamd2_cya)))

timep <- timec <- time

# Transplantation
scenario3.T <- c(Placebo=sum(diff(c(0, timep[timep <= 3])) * F02_plac[timep <= 3]),
                 CyA    =sum(diff(c(0, timec[timec <= 3])) * F02_cya[timec  <= 3])
                 )

# Death without transplantation
scenario3.D <- c(Placebo=sum(diff(c(0, timep[timep <= 3])) * F01_plac[timep <= 3]),
                 CyA    =sum(diff(c(0, timec[timec <= 3])) * F01_cya[timec  <= 3]))
scenario3.T;scenario3.D
Placebo     CyA 
1.69057 1.06836 
  Placebo       CyA 
0.4232820 0.3293008 

Scenario 4

Code show/hide
##### Sex = F, age = 60, alb = 38, bili = 45 #####

# Predictions 
newd <- data.frame(tment = c(0, 1),
                   sex = c(0, 0),
                   age = c(60, 60),
                   alb = c(38, 38), 
                   log2bili = log2(c(45, 45)))

# predictions
lpsurv <- predict(coxsurv, newd, type = "lp", reference = "zero")
lpcox1 <- predict(cox1, newd, type = "lp", reference = "zero")
lpcox2 <- predict(cox2, newd, type = "lp", reference = "zero")

time <- basehaz(cox1, center = F)$time
surv <- exp(-basehaz(coxsurv, center = F)$hazard)
lamd1 <- basehaz(cox1, center = F)$hazard
lamd2 <- basehaz(cox2, center = F)$hazard

# Linear predictors
surv_plac <- surv^exp(lpsurv[1])
surv_cya <- surv^exp(lpsurv[2])

lamd1_plac <- lamd1 * exp(lpcox1[1])
lamd1_cya <- lamd1 * exp(lpcox1[2])

lamd2_plac <- lamd2 * exp(lpcox2[1])
lamd2_cya <- lamd2 * exp(lpcox2[2])

# Cumulative incidence
F01_plac <- cumsum(surv_plac * diff(c(0,lamd1_plac)))
F01_cya <- cumsum(surv_cya * diff(c(0,lamd1_cya)))

F02_plac <- cumsum(surv_plac * diff(c(0,lamd2_plac)))
F02_cya <- cumsum(surv_cya * diff(c(0,lamd2_cya)))

timep <- timec <- time

# Transplantation
scenario4.T <- c(Placebo=sum(diff(c(0, timep[timep <= 3])) * F02_plac[timep <= 3]), 
                 CyA    =sum(diff(c(0, timec[timec <= 3])) * F02_cya[timec  <= 3])
                 )

# Death without transplantation
scenario4.D <- c(Placebo=sum(diff(c(0, timep[timep <= 3])) * F01_plac[timep <= 3]),
                 CyA    =sum(diff(c(0, timec[timec <= 3])) * F01_cya[timec  <= 3])
                 )
scenario4.T;scenario4.D
   Placebo        CyA 
0.07879677 0.04263005 
  Placebo       CyA 
0.3678765 0.2544924 

Macro for area under CIF

Code show/hide
%macro areastep(data,trt,grp,time,y,tau);
    data select; set &data; where &trt=&grp;
    run;
    data select; set select;by &trt;
        retain mu;
        if first.&trt then mu=0;
        if &time>&tau then do;
        &time=&tau; &y=lag(&y); end;
        mu+lag(&y)*(&time-lag(&time));
        if last.&trt then do;
        if &time<&tau then mu+(&tau-&time)*&y; end;
    run;
    data last;
      set select;
        by &trt;
        if last.&trt;
    run;
    proc print; run;
%mend areastep;

Scenario 1 (no adjustment)

Code show/hide
* No adjustment: Transplantation; 
proc phreg data=pbc3 noprint;
    model followup*status(0)=/eventcode=1;
    strata tment;
    baseline out=cuminc1 cif=cif1 stdcif=std1;
run;
title "Placebo";
%areastep(cuminc1,tment,0,followup,cif1,3);
title "CyA";
%areastep(cuminc1,tment,1,followup,cif1,3);

* No adjustment: Death wo transplant;
proc phreg data=pbc3 noprint;
    model followup*status(0)=/eventcode=2;
    strata tment;
    baseline out=cuminc2 cif=cif2 stdcif=std2;
run;
title "Placebo";
%areastep(cuminc2,tment,0,followup,cif2,3);
title "CyA";
%areastep(cuminc2,tment,1,followup,cif2,3);

Scenario 2

Code show/hide
*** age=40 alb=38 bili=45 ***; 
* CyA; 
data cov;
    input tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 log2bili1 log2bili2;
    datalines; 
    1 0 0 0 40 0 38 0 45 0
    0 1 0 0 0 40 0 38 0 45
    ;
run;

data cov; set cov;
    if log2bili1>0 then log2bili1=log2(log2bili1);
    if log2bili2>0 then log2bili2=log2(log2bili2);
run;

proc phreg data=pbc32 noprint;
    model time*d(0)=tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 
    log2bili1 log2bili2/rl;
    strata h;
    baseline out=cidata covariates=cov survival=surv/nomean method=ch;
run;
%cuminc(cidata,h,time,surv);
data dataC; 
    set data; 
    tment=1; 
run;
%areastep(dataC,tment,1,time,p01,3);
%areastep(dataC,tment,1,time,p02,3);

* Placebo; 
data cov;
    input tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 log2bili1 log2bili2;
    datalines; 
    0 0 0 0 40 0 38 0 45 0
    0 0 0 0 0 40 0 38 0 45
    ;
run;
data cov; set cov;
    if log2bili1>0 then log2bili1=log2(log2bili1);
    if log2bili2>0 then log2bili2=log2(log2bili2);
run;
proc phreg data=pbc32 noprint;
    model time*d(0)=tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 
                                    log2bili1 log2bili2;
    strata h;
    baseline out=cidata covariates=cov survival=surv/nomean method=ch;
run;
%cuminc(cidata,h,time,surv);
data dataP; 
set data; 
tment=0; 
run;
%areastep(dataP,tment,0,time,p01,3);
%areastep(dataP,tment,0,time,p02,3);

Scenario 3

Code show/hide
*** age=40 alb=20 bili=90 ***; 
* CyA; 
data cov;
    input tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 log2bili1 log2bili2;
    datalines; 
    1 0 0 0 40 0 20 0 90 0
    0 1 0 0 0 40 0 20 0 90
    ;
run;

data cov; set cov;
    if log2bili1>0 then log2bili1=log2(log2bili1);
    if log2bili2>0 then log2bili2=log2(log2bili2);
run;

proc phreg data=pbc32 noprint;
    model time*d(0)=tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 
    log2bili1 log2bili2/rl;
    strata h;
    baseline out=cidata covariates=cov survival=surv/nomean method=ch;
run;

%cuminc(cidata,h,time,surv);

data dataC; 
    set data; 
    tment=1; 
run;

%areastep(dataC,tment,1,time,p01,3);
%areastep(dataC,tment,1,time,p02,3);

* Placebo; 
data cov;
    input tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 log2bili1 log2bili2;
    datalines; 
    0 0 0 0 40 0 20 0 90 0
    0 0 0 0 0 40 0 20 0 90
    ;
run;

data cov; set cov;
    if log2bili1>0 then log2bili1=log2(log2bili1);
    if log2bili2>0 then log2bili2=log2(log2bili2);
run;
proc phreg data=pbc32 noprint;
    model time*d(0)=tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 
    log2bili1 log2bili2/rl;
    strata h;
    baseline out=cidata covariates=cov survival=surv/nomean method=ch;
run;
%cuminc(cidata,h,time,surv);
data dataP; 
    set data; 
    tment=0; 
run;
%areastep(dataP,tment,0,time,p01,3);
%areastep(dataP,tment,0,time,p02,3);

Scenario 4

Code show/hide
*** age=60 alb=38 bili=45 ***; 
* CyA; 
data cov;
    input tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 log2bili1 log2bili2;
    datalines; 
    1 0 0 0 60 0 38 0 45 0
    0 1 0 0 0 60 0 38 0 45
    ;
run;
data cov; set cov;
    if log2bili1>0 then log2bili1=log2(log2bili1);
    if log2bili2>0 then log2bili2=log2(log2bili2);
run;
proc phreg data=pbc32 noprint;
    model time*d(0)=tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 
    log2bili1 log2bili2/rl;
    strata h;
    baseline out=cidata covariates=cov survival=surv/nomean method=ch;
run;
%cuminc(cidata,h,time,surv);
data dataC; 
    set data; 
    tment=1; 
run;
%areastep(dataC,tment,1,time,p01,3);
%areastep(dataC,tment,1,time,p02,3);

* Placebo; 
data cov;
    input tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 log2bili1 log2bili2;
    datalines; 
    0 0 0 0 60 0 38 0 45 0
    0 0 0 0 0 60 0 38 0 45
    ;
run;
data cov; set cov;
    if log2bili1>0 then log2bili1=log2(log2bili1);
    if log2bili2>0 then log2bili2=log2(log2bili2);
run;
proc phreg data=pbc32 noprint;
    model time*d(0)=tment1 tment2 sex1 sex2 age1 age2 alb1 alb2 
    log2bili1 log2bili2/rl;
    strata h;
    baseline out=cidata covariates=cov survival=surv/nomean method=ch;
run;
%cuminc(cidata,h,time,surv);
data dataP; 
    set data; 
    tment=0; 
run;
%areastep(dataP,tment,0,time,p01,3);
%areastep(dataP,tment,0,time,p02,3);

Table 4.4

Using survRM2 package

Here we get the same as in SAS PROC RMSTREG

Code show/hide
library("survRM2")
pbcsub <- subset(pbc3,!is.na(alb))
time   <- pbcsub$days/365.25
status <- pbcsub$status!=0
arm    <- pbcsub$tment=="1"
alb    <- pbcsub$alb
logbili<- log2(pbcsub$bili)
xx <- cbind(alb, logbili)
rmst2(time, status, arm, tau=3, covariates=xx)

The truncation time: tau = 3  was specified. 

Summary of between-group contrast (adjusted for the covariates) 
                      Est. lower .95 upper .95     p
RMST (arm=1)-(arm=0) 0.168     0.016     0.320 0.031
RMST (arm=1)/(arm=0) 1.065     1.004     1.130 0.037
RMTL (arm=1)/(arm=0) 0.548     0.364     0.823 0.004


Model summary (difference of RMST) 
            coef se(coef)      z     p lower .95 upper .95
intercept  2.376    0.381  6.241 0.000     1.630     3.122
arm        0.168    0.078  2.160 0.031     0.016     0.320
alb        0.031    0.008  4.058 0.000     0.016     0.045
logbili   -0.214    0.034 -6.207 0.000    -0.281    -0.146


Model summary (ratio of RMST) 
            coef se(coef)      z     p exp(coef) lower .95 upper .95
intercept  0.882    0.151  5.827 0.000     2.415     1.795     3.249
arm        0.063    0.030  2.089 0.037     1.065     1.004     1.130
alb        0.011    0.003  3.859 0.000     1.012     1.006     1.017
logbili   -0.085    0.016 -5.499 0.000     0.918     0.891     0.947


Model summary (ratio of time-lost) 
            coef se(coef)      z     p exp(coef) lower .95 upper .95
intercept -0.142    0.802 -0.177 0.860     0.868     0.180     4.183
arm       -0.602    0.208 -2.895 0.004     0.548     0.364     0.823
alb       -0.095    0.017 -5.469 0.000     0.909     0.878     0.941
logbili    0.524    0.061  8.540 0.000     1.689     1.498     1.906

Using mets package

However, this does not align exactly with survRM2 package?

Code show/hide
library(mets)
options(contrasts=c("contr.treatment", "contr.poly"))
resmeanIPCW(Event(followup,fail)~factor(tment)+alb+log2bili,
            data=pbcsub, time=3, model="lin",
            cens.model=~strata(tment))

   n events
 343     67

 343 clusters
coeffients:
                Estimate   Std.Err      2.5%     97.5% P-value
(Intercept)     2.722803  1.326849  0.122227  5.323380  0.0402
factor(tment)1  0.144855  0.085063 -0.021866  0.311576  0.0886
alb             0.022829  0.027178 -0.030438  0.076096  0.4009
log2bili       -0.223035  0.093775 -0.406830 -0.039240  0.0174

exp(coeffients):
               Estimate     2.5%    97.5%
(Intercept)    15.22293  1.13001 205.0758
factor(tment)1  1.15587  0.97837   1.3656
alb             1.02309  0.97002   1.0791
log2bili        0.80009  0.66576   0.9615
Code show/hide
proc rmstreg data=pbc3 tau=3;
  model followup*status(0)=tment alb log2bili / 
        link=linear method=ipcw(strata=tment);
run;

In-text, p. 136: RMST g-formula and bootstrap

Code show/hide
pbcsub <- subset(pbc3,!is.na(alb))
time   <- pbcsub$days/365.25
status <- pbcsub$status!=0
arm    <- pbcsub$tment=="1"
alb    <- pbcsub$alb
logbili<- log2(pbcsub$bili)
xx <- cbind(alb, logbili)
trydata<-as.data.frame(cbind(arm,time,status,xx))

boot.fun <- function(dat, index){
bdata <- dat[index,]
obj<-rmst2(bdata$time,bdata$status,bdata$arm,tau=3,
           covariates=cbind(bdata$alb,bdata$logbili))
rmst0<-obj$RMST.difference.adjusted[1,1]+
  obj$RMST.difference.adjusted[2,1]*0+
  obj$RMST.difference.adjusted[3,1]*bdata$alb+
  obj$RMST.difference.adjusted[4,1]*bdata$logbili
rmst1<-obj$RMST.difference.adjusted[1,1]+
  obj$RMST.difference.adjusted[2,1]*1+
  obj$RMST.difference.adjusted[3,1]*bdata$alb+
  obj$RMST.difference.adjusted[4,1]*bdata$logbili
diff<-rmst1-rmst0
res<-cbind(mean(rmst0),mean(rmst1),mean(diff))
return(res)
}

library(boot)
B<-200
bootres <- boot(trydata, boot.fun, R = B)

# mean and SD
rbind(
  cbind(Placebo=mean(bootres$t[,1]),
        CyA=mean(bootres$t[,2]),
        Diff=mean(bootres$t[,3])),
  cbind(Placebo=sqrt(var(bootres$t[,1])),
        CyA=sqrt(var(bootres$t[,2])),
        Diff=sqrt(var(bootres$t[,3])))
)
        Placebo        CyA       Diff
[1,] 2.56815001 2.72999845 0.16184844
[2,] 0.06209124 0.05416741 0.07844583
Code show/hide
/* bootstrap sample */
data bootpbc;
    do sampnum = 1 to 1000; /* nboot=1000*/
    do i = 1 to 349; /*nobs=349*/
    x=round(ranuni(0)*349); /*nobs=349*/
    set pbc3
    point=x;
    output;
    end;
    end;
    stop;
run;
proc rmstreg data=bootpbc tau=3;
  model followup*status(0)=tment alb log2bili / 
        link=linear method=ipcw(strata=tment);
    ods output parameterestimates=pe;
    by sampnum;
run;
proc transpose data=pe(keep=sampnum estimate) out=beta prefix=beta;
  by sampnum;
run;
proc sql;
  create table fit as
    select *
    from bootpbc as pbc, beta
    where pbc.sampnum=beta.sampnum;
quit;
data pred;
  set fit;
    pred0=beta1+beta2*0+beta3*alb+beta4*log2bili;
    pred1=beta1+beta2*1+beta3*alb+beta4*log2bili;
    diff=pred1-pred0;
run;
proc means data=pred mean stddev;
    var pred0 pred1 diff;
run;

    
           The MEANS Procedure

 Variable            Mean         Std Dev
 ----------------------------------------
 pred0          2.5596691       0.4265808
 pred1          2.7281557       0.4253781
 diff           0.1684865       0.0772077
 ----------------------------------------

Table 4.5

Using mets package

Code show/hide
library(mets)
options(contrasts=c("contr.treatment", "contr.poly"))
subpbc<-subset(pbc3, !is.na(alb))
subpbc$tment<-factor(subpbc$tment)

# Death w/o transplant
fgdeath<-cifreg(Event(days,status)~factor(tment)+alb+log2bili+sex+age, cause=2, 
                data = subpbc, propodds=NULL)
fgdeath
Call:
cifreg(formula = Event(days, status) ~ factor(tment) + alb + 
    log2bili + sex + age, data = subpbc, cause = 2, propodds = NULL)

   n events
 343     60

 343 clusters
coeffients:
                Estimate      S.E.   dU^-1/2 P-value
factor(tment)1 -0.352775  0.260075  0.267053  0.1750
alb            -0.061245  0.030848  0.028700  0.0471
log2bili        0.615653  0.088872  0.090387  0.0000
sex             0.414735  0.316821  0.312520  0.1905
age             0.087488  0.015744  0.016208  0.0000

exp(coeffients):
               Estimate    2.5%  97.5%
factor(tment)1  0.70274 0.42210 1.1700
alb             0.94059 0.88541 0.9992
log2bili        1.85087 1.55499 2.2030
sex             1.51397 0.81365 2.8171
age             1.09143 1.05826 1.1256
Code show/hide
# Transplant
fgtrans<-cifreg(Event(days,status)~factor(tment)+alb+log2bili+sex+age, cause=1, 
                data = subpbc, propodds=NULL, cox.prep=TRUE)
fgtrans
Call:
cifreg(formula = Event(days, status) ~ factor(tment) + alb + 
    log2bili + sex + age, data = subpbc, cause = 1, propodds = NULL, 
    cox.prep = TRUE)

   n events
 343     28

 343 clusters
coeffients:
                Estimate      S.E.   dU^-1/2 P-value
factor(tment)1 -0.407479  0.368430  0.403161  0.2687
alb            -0.069736  0.032685  0.038429  0.0329
log2bili        0.619024  0.101177  0.128955  0.0000
sex             0.087804  0.579890  0.551723  0.8796
age            -0.075062  0.017119  0.020770  0.0000

exp(coeffients):
               Estimate    2.5%  97.5%
factor(tment)1  0.66533 0.32317 1.3698
alb             0.93264 0.87477 0.9943
log2bili        1.85711 1.52306 2.2644
sex             1.09177 0.35037 3.4020
age             0.92769 0.89708 0.9593

Using survival package

Code show/hide
# Death w/o transplant
# Two step
fg_c2 <- finegray(Surv(days, as.factor(status)) ~ ., etype = 2,
                  data = subset(pbc3, !is.na(alb)))
fg_cox2 <- coxph(Surv(fgstart, fgstop, fgstatus) ~ 
                   tment + alb + log2bili + sex + age, 
                 weight = fgwt, data = fg_c2, eps=1e-9)
summary(fg_cox2)
Call:
coxph(formula = Surv(fgstart, fgstop, fgstatus) ~ tment + alb + 
    log2bili + sex + age, data = fg_c2, weights = fgwt, eps = 1e-09)

  n= 1004, number of events= 60 

             coef exp(coef) se(coef) robust se      z Pr(>|z|)    
tment    -0.35282   0.70270  0.26705   0.25111 -1.405   0.1600    
alb      -0.06122   0.94062  0.02870   0.02933 -2.087   0.0369 *  
log2bili  0.61556   1.85069  0.09038   0.08521  7.224 5.06e-13 ***
sex       0.41484   1.51412  0.31251   0.30590  1.356   0.1751    
age       0.08748   1.09142  0.01621   0.01507  5.804 6.47e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

         exp(coef) exp(-coef) lower .95 upper .95
tment       0.7027     1.4231    0.4296    1.1495
alb         0.9406     1.0631    0.8881    0.9963
log2bili    1.8507     0.5403    1.5660    2.1871
sex         1.5141     0.6604    0.8313    2.7577
age         1.0914     0.9162    1.0597    1.1241

Concordance= 0.829  (se = 0.024 )
Likelihood ratio test= 89.68  on 5 df,   p=<2e-16
Wald test            = 111.5  on 5 df,   p=<2e-16
Score (logrank) test = 93.71  on 5 df,   p=<2e-16,   Robust = 43.86  p=2e-08

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).
Code show/hide
# Transplant
# Two step
fg_c1 <- finegray(Surv(days, as.factor(status)) ~ ., etype = 1,
                  data = subset(pbc3, !is.na(alb)))
fg_cox1 <- coxph(Surv(fgstart, fgstop, fgstatus) ~ 
                   tment + alb + log2bili + sex + age, 
                 weight = fgwt, data = fg_c1, eps=1e-9)
summary(fg_cox1)
Call:
coxph(formula = Surv(fgstart, fgstop, fgstatus) ~ tment + alb + 
    log2bili + sex + age, data = fg_c1, weights = fgwt, eps = 1e-09)

  n= 1068, number of events= 28 

             coef exp(coef) se(coef) robust se      z Pr(>|z|)    
tment    -0.40756   0.66527  0.40311   0.34977 -1.165   0.2439    
alb      -0.06960   0.93276  0.03844   0.03005 -2.316   0.0205 *  
log2bili  0.61867   1.85646  0.12892   0.09119  6.785 1.16e-11 ***
sex       0.09043   1.09465  0.55193   0.55524  0.163   0.8706    
age      -0.07507   0.92768  0.02077   0.01590 -4.720 2.36e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

         exp(coef) exp(-coef) lower .95 upper .95
tment       0.6653     1.5031    0.3352    1.3205
alb         0.9328     1.0721    0.8794    0.9894
log2bili    1.8565     0.5387    1.5526    2.2198
sex         1.0946     0.9135    0.3687    3.2501
age         0.9277     1.0780    0.8992    0.9570

Concordance= 0.858  (se = 0.025 )
Likelihood ratio test= 52.17  on 5 df,   p=5e-10
Wald test            = 78.57  on 5 df,   p=2e-15
Score (logrank) test = 56.42  on 5 df,   p=7e-11,   Robust = 21.26  p=7e-04

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).
Code show/hide
* Death without transplantation; 
proc phreg data=pbc3;
    class sex (ref='1') tment (ref='0');
    model days*status(0)=sex tment age log2bili alb / rl eventcode=2;
run;

* Transplantation; 
proc phreg data=pbc3;
    class sex (ref='1') tment (ref='0');
    model days*status(0)=sex tment age log2bili alb / rl eventcode=1;
run;

In-text, p. 138-9: Fine-Gray and g-formula

Code show/hide
# Fine-Gray models using package mets
library(mets)
options(contrasts=c("contr.treatment", "contr.poly"))
# Transplant
subpbc<-subset(pbc3, !is.na(alb))
subpbc$trt<-factor(subpbc$tment)
fgtrans<-cifreg(Event(days,status)~trt+alb+log2bili+sex+age, cause=1, 
                data = subpbc, propodds=NULL, cox.prep=TRUE)
summary(survivalG(fgtrans, subpbc, 2*365.25))
risk:
      Estimate Std.Err    2.5%   97.5%   P-value
risk0  0.06890 0.01994 0.02983 0.10798 0.0005477
risk1  0.04895 0.01310 0.02327 0.07464 0.0001875

Average Treatment effects (G-estimator) :
   Estimate Std.Err     2.5%   97.5% P-value
p1 -0.01995 0.01915 -0.05748 0.01758  0.2975

Average Treatment effect ratio (G-estimator) :
      Estimate Std.Err      2.5%    97.5%   P-value
[p1] 0.7104376 0.21938 0.2804607 1.140415 0.1868643
Code show/hide
# Death w/o transplant
fgdeath<-cifreg(Event(days,status)~trt+alb+log2bili+sex+age, cause=2, 
                data = subpbc, propodds=NULL, cox.prep=TRUE)
summary(survivalG(fgdeath, subpbc, 2*365.25))
risk:
      Estimate Std.Err    2.5%  97.5%   P-value
risk0  0.11694 0.02164 0.07453 0.1594 6.520e-08
risk1  0.08815 0.01890 0.05111 0.1252 3.107e-06

Average Treatment effects (G-estimator) :
   Estimate Std.Err     2.5%   97.5% P-value
p1 -0.02879 0.02126 -0.07046 0.01289  0.1758

Average Treatment effect ratio (G-estimator) :
      Estimate   Std.Err      2.5%    97.5%   P-value
[p1] 0.7538432 0.1581281 0.4439178 1.063769 0.1195436
Code show/hide
/* bootstrap sample */
data bootpbc;
    do sampnum = 1 to 1000; /* nboot=1000*/
    do i = 1 to 349; /*nobs=349*/
    x=round(ranuni(0)*349); /*nobs=349*/
    set pbc3
    point=x;
    output;
    end;
    end;
    stop;
run;

/* to get baseline 'CIF' */
data cov;
    input sex tment age alb log2bili;
    datalines;
0 0 0 0 0
;
run;

/* Transplantation */
proc phreg data=bootpbc noprint
  outest=beta(keep=sampnum sex tment age log2bili alb _status_
        rename=(sex=betasex tment=betatment age=betaage log2bili=betabili alb=betaalb));
    model followup*status(0)=sex tment age log2bili alb / eventcode=1;
    baseline out=cif(keep=sampnum cif) covariates=cov cif=cif timepoint=2;
    by sampnum;
run;
/* Remove degenerated estimates with cif=1 */
data cif;
  set cif;
  where cif<1; 
run;
proc sql;
  create table fit as
    select *
    from bootpbc as pbc, cif, beta 
    where pbc.sampnum=cif.sampnum & pbc.sampnum=beta.sampnum;
quit;
data pred;
  set fit;
    pred0=1-(1-cif)**exp(betatment*0+betasex*sex+betaage*age+betaalb*alb+betabili*log2bili);
    pred1=1-(1-cif)**exp(betatment*1+betasex*sex+betaage*age+betaalb*alb+betabili*log2bili);
    diff=pred1-pred0;
run;
proc means data=pred noprint;
    var pred0 pred1 diff;
    output out=boot(where=(_stat_="MEAN"));
    by sampnum;
run;
proc means data=boot mean stddev;
    var pred0 pred1 diff;
run;

           The MEANS Procedure

 Variable            Mean         Std Dev
 ----------------------------------------
 pred0          0.0671112       0.0204778
 pred1          0.0494114       0.0134582
 diff          -0.0176998       0.0198973
 ----------------------------------------

   
/* Death without transplantation */
proc phreg data=bootpbc noprint
  outest=beta(keep=sampnum sex tment age log2bili alb _status_
        rename=(sex=betasex tment=betatment age=betaage log2bili=betabili alb=betaalb));
    model followup*status(0)=sex tment age log2bili alb / eventcode=2;
    baseline out=cif(keep=sampnum cif) covariates=cov cif=cif timepoint=2;
    by sampnum;
run;
/* Remove degenerated estimates with cif=1 */
data cif;
  set cif;
  where cif<1; 
run;
proc sql;
  create table fit as
    select *
    from bootpbc as pbc, cif, beta 
    where pbc.sampnum=cif.sampnum & pbc.sampnum=beta.sampnum;
quit;
data pred;
  set fit;
    pred0=1-(1-cif)**exp(betatment*0+betasex*sex+betaage*age+betaalb*alb+betabili*log2bili);
    pred1=1-(1-cif)**exp(betatment*1+betasex*sex+betaage*age+betaalb*alb+betabili*log2bili);
    diff=pred1-pred0;
run;
proc means data=pred noprint;
    var pred0 pred1 diff;
    output out=boot(where=(_stat_="MEAN"));
    by sampnum;
run;
proc means data=boot mean stddev;
    var pred0 pred1 diff;
run;
    

           The MEANS Procedure

 Variable            Mean         Std Dev
 ----------------------------------------
 pred0          0.0671112       0.0204778
 pred1          0.0494114       0.0134582
 diff          -0.0176998       0.0198973
 ----------------------------------------

Figure 4.17

Figure 4.17 (a)

Code show/hide
# Fine-Gray model for death w/o transplant
# Predictions 
# Placebo
newd_t0 <- data.frame(tment = 0, age = 40, 
                      alb = 38, log2bili = log2(45), 
                      sex = 0)
C2_t0 <- survfit(fg_cox2, ctype = 2, newdata = newd_t0)$cumhaz 

# CyA
newd_t1 <- data.frame(tment = 1, age = 40, 
                      alb = 38, log2bili = log2(45), 
                      sex = 0)
C2_t1 <- survfit(fg_cox2, ctype = 2, newdata = newd_t1)$cumhaz 

time <- survfit(fg_cox2, ctype = 2, newdata = newd_t1)$time

# Make data ready for plotting 
pdata <- data.frame(time = c(time, time), 
                    cif = c(C2_t0, C2_t1), 
                    tment = c(rep("Placebo", length(time)), 
                              rep("CyA", length(time))))

# Create Figure
fig4.17a <- ggplot(aes(x = time / 365.25, y = cif, linetype = tment), 
                 data = pdata) + 
  geom_step(linewidth = 1) + 
  scale_linetype_manual("Treatment", values = c("dashed", "solid"),guide = guide_legend(reverse = TRUE)) + 
  xlab("Time since randomization (years)") + 
  ylab('Cumulative incidence for death w/o transplantation') + 
  scale_x_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     limits = c(0, 6), 
                     breaks = seq(0, 6, 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.02, 0.05)), 
                     limits = c(0, 1.0), 
                     breaks = seq(0, 1.0, 0.1)) +
  theme_general + 
  theme(legend.position="bottom", legend.box = "vertical")
      #  legend.key.size = unit(1.5, 'cm'))

fig4.17a

Figure 4.17 (b)

Code show/hide
# Fine-Gray model for transplant
# Predictions 
# Placebo
C1_t0 <- survfit(fg_cox1, ctype = 2, newdata = newd_t0)$cumhaz 

# CyA
C1_t1 <- survfit(fg_cox1, ctype = 2, newdata = newd_t1)$cumhaz 

time <- survfit(fg_cox1, ctype = 2, newdata = newd_t1)$time

# Make data ready for plotting 
pdata <- data.frame(time = c(time, time), 
                    cif = c(C1_t0, C1_t1), 
                    tment = c(rep("Placebo", length(time)), 
                              rep("CyA", length(time))))

# Create Figure
fig4.17b <- ggplot(aes(x = time / 365.25, y = cif, linetype = tment), 
                 data = pdata) + 
  geom_step(linewidth = 1) + 
  scale_linetype_manual("Treatment", values = c("dashed", "solid"),guide = guide_legend(reverse = TRUE)) + 
  xlab("Time since randomization (years)") + 
  ylab('Cumulative incidence for transplantation') + 
  scale_x_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     limits = c(0, 6), 
                     breaks = seq(0, 6, 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.02, 0.05)), 
                     limits = c(0, 1.0), 
                     breaks = seq(0, 1.0, 0.1)) +
  theme_general + 
  theme(legend.box = "vertical",
        legend.key.size = unit(1.5, 'cm'))


fig4.17b

Covariate values

Code show/hide
data cov0;
    input sex tment age alb log2bili;
    log2bili=log2(log2bili);
    datalines;
    0 0 40 38 45
    ;
run;
data cov1;
    input sex tment age alb log2bili;
    log2bili=log2(log2bili);
    datalines;
    0 1 40 38 45
    ;
run;

Figure 4.17 (a)

Code show/hide
* For death without transplantation; 
proc phreg data=pbc3;
    model days*status(0)=sex tment age log2bili alb/eventcode=2;
    baseline out=cuminc20 covariates=cov0 cif=cif20;
run;
proc phreg data=pbc3;
    model days*status(0)=sex tment age log2bili alb/eventcode=2;
    baseline out=cuminc21 covariates=cov1 cif=cif21;
run;
data cuminc2; 
    merge cuminc20 cuminc21; 
    by days; 
run;
data cuminc2; 
    set cuminc2; 
    daysyears = days / 365.25; 
run; 
proc gplot data=cuminc2;
    plot cif20*daysyears cif21*daysyears/overlay haxis=axis1 vaxis=axis2;
    axis1 order=0 to 6 by 1 minor=none label=('Years');
    axis2 order=0 to 1 by 0.1 minor=none label=(a=90 'Cumulative incidences for death');
    symbol1  v=none i=stepjl c=blue;
    symbol2  v=none i=stepjl c=red;
run;
quit;

Figure 4.17 (b)

Code show/hide
* For transplantation; 
proc phreg data=pbc3;
    *class sex tment (ref='0');
    model days*status(0)=sex tment age log2bili alb/eventcode=1;
    baseline out=cuminc10 covariates=cov0 cif=cif10;
run;
proc phreg data=pbc3;
    *class sex tment (ref='0');
    model days*status(0)=sex tment age log2bili alb/eventcode=1;
    baseline out=cuminc11 covariates=cov1 cif=cif11;
run;
data cuminc1; 
    merge cuminc10 cuminc11; 
    by days; 
run;
data cuminc1; 
    set cuminc1; 
    daysyears = days / 365.25; 
run; 
proc gplot data=cuminc1;
    plot cif10*daysyears cif11*daysyears/overlay haxis=axis1 vaxis=axis2;
    axis1 order=0 to 6 by 1 minor=none label=('Years');
    axis2 order=0 to 1 by 0.1 minor=none label=(a=90 'Cumulative incidence for transpl.');
    symbol1  v=none i=stepjl c=blue;
    symbol2  v=none i=stepjl c=red;
run;
quit;

Table 4.6

Create R function rmtl.ipcw()

The function rmtl.ipcw() fit a restricted mean time lost regression model using IPCW with competing risks data.

Code show/hide
### Note: This code is modified from the original 'rmst2reg function' of the
### survRM2 package, which was authored by Hajime Uno, Lu Tian, Angel Cronin, 
### Chakib Battioui, and Miki Horiguchi, in order to account for competing risks.
### Last updated by Sarah Conner on October 22, 2020

library(survival)

rmtl.ipcw <- function(times, event, eoi=1, tau, cov=NULL, strata=FALSE, group=NULL){
  
  if(is.null(group) & strata==TRUE){stop('Please specify a factor variable to statify weights.')}
  if(is.null(cov)){print('Warning: Fitting intercept-only model.')}
  
  # Round event times to avoid issues with survival() package rounding differently
  y <- round(times,4)
  id <- 1:length(y)
  
  # Recode so delta1 reflects event of interest, delta2 reflects all competing events. Assumes 0=censoring.
  delta1 <- ifelse(event==eoi, 1, 0)
  delta2 <- ifelse(event!=0 & event!=eoi, 1, 0)
  
  # Overall quantities
  x <- cbind(int=rep(1, length(y)), cov)
  p <- length(x[1,])
  if(is.null(group)){group <- as.factor(rep(1, length(y)))}
  
  # Recode event indicators to reflect status at chosen tau
  delta1[y>tau] <- 0
  delta2[y>tau] <- 0
  
  y <- pmin(y, tau)
  y1 <- y*delta1
  
  d0 <- 1 - (delta1 + delta2) # censoring indicator
  d0[y==tau] <- 0  # If follow-up lasts til tau, the event will not count as 'censored' in IPCW weights
  weights <- NULL
  
  ## Calculate IPCW weights (option to stratify by group) ## 
  
  if(strata==TRUE){
    for(aa in 1:length(unique(group))){
      # Subset the group
      a <- unique(group)[aa]
      d0.a <- d0[group==a]
      delta1.a <- delta1[group==a]
      y.a <- y[group==a]
      x.a <- x[group==a,]
      n.a <- length(d0.a)
      orig.id.a0 <- orig.id.a <- id[group==a]
      
      # Order the event times
      id.a <- order(y.a)
      y.a <- y.a[id.a]
      d0.a <- d0.a[id.a]
      delta1.a <- delta1.a[id.a]
      x.a <- x.a[id.a,]
      orig.id.a <- orig.id.a[id.a]
      
      # Derive IPCW
      fit <- survfit(Surv(y.a, d0.a) ~ 1)
      weights.a <- (1-d0.a)/rep(fit$surv, table(y.a))
      
      # Need to assign weights accordig to original ID, not ordered by event time
      linked.weights.a <- cbind(orig.id.a, weights.a, delta1.a, d0.a, y.a)
      weights <- rbind(weights, linked.weights.a)
    }
  } else {
    
    # Order the event times
    id.a <- order(y)
    y.a <- y[id.a]
    d0.a <- d0[id.a]
    delta1.a <- delta1[id.a]
    x.a <- x[id.a,]
    orig.id.a <- id[id.a]
    
    # Derive IPCW
    fit <- survfit(Surv(y.a, d0.a) ~ 1)
    weights.a <- (1-d0.a)/rep(fit$surv, table(y.a))
    
    # Need to assign weights accordig to original ID, not ordered by event time
    linked.weights.a <- cbind(orig.id.a, weights.a, delta1.a, d0.a, y.a)
    weights <- rbind(weights, linked.weights.a)
  }
  
  
  ## Fit linear model ## 
  
  # Link weights to original data frame
  #colnames(weights) <- c('id', 'weights')
  #data <- merge(data0, weights, by='id')
  #summary(lm(tau-y ~ x-1, weights=weights, data=data))
  
  # Or, sort weights and use vectors
  w <- weights[order(weights[, 1]),2]
  lm.fit <- lm(delta1*(tau-y) ~ x-1, weights=w)
  
  
  ## Derive SE ##
  
  beta0 <- lm.fit$coef
  error <- tau - y - as.vector(x %*% beta0)
  score <- x * w * error
  
  # Kappa (sandwich variance components) stratified by group
  kappa <- NULL
  
  for(aa in 1:length(unique(group))){
    
    # Subset the group
    a <- unique(group)[aa]
    d0.a <- d0[group==a]
    delta1.a <- delta1[group==a]
    y.a <- y[group==a]
    x.a <- x[group==a,]
    n.a <- length(d0.a)
    orig.id.a0 <- orig.id.a <- id[group==a]
    score.a <- score[group==a,]
    
    # Kappa calculations for sandwich variance
    kappa.a <- matrix(0, n.a, p)
    
    for(i in 1:n.a){
      kappa1 <- score.a[i,]
    
      kappa2 <- apply(score.a[y.a>=y.a[i],,drop=F], 2, sum)*(d0.a[i])/sum(y.a>=y.a[i])
    
      kappa3 <- rep(0, p)
    
      for(k in 1:n.a){
        if(y.a[k]<=y.a[i]){
          kappa3 <- kappa3+apply(score.a[y.a>=y.a[k],,drop=F], 2, sum)*(d0.a[k])/(sum(y.a>=y.a[k]))^2
        }
      }
  
      kappa.a[i,] <- kappa1+kappa2-kappa3
    }
    kappa <- rbind(kappa, kappa.a)
  }
  
  # Transpose the kappas rbinded from each group gives pxp matrix
  gamma <- t(kappa) %*% kappa
  
  A <- t(x) %*% x
  varbeta <- solve(A) %*% gamma %*% solve(A)
  se <- sqrt(diag(varbeta))
  
  
  #--- Return results ---
  
  res <- cbind(beta=lm.fit$coef, se=se, cil=lm.fit$coef-(1.96*se), ciu=lm.fit$coef+(1.96*se), 
               z=lm.fit$coef/se, p=2*(1-pnorm(abs(lm.fit$coef/se))))
  
  allres <- list(res=res, varbeta=varbeta)
  invisible(allres)
return(res[,1])
}  

Using rmtl.ipcw() function

Code show/hide
rmtl.ipcw(pbc3$days/365.25,pbc3$status,eoi=2,tau=3,cov=pbc3$tment)
        xint         xcov 
0.2438698018 0.0001853217 
Code show/hide
rmtl.ipcw(pbc3$days/365.25,pbc3$status,eoi=1,tau=3,cov=pbc3$tment)
       xint        xcov 
 0.13827426 -0.04885493 
Code show/hide
pbcny   <- subset(pbc3,!is.na(alb))
time    <- pbcny$days/365.25
status  <- pbcny$status
arm     <- pbcny$tment
sex     <- pbcny$sex
age     <- pbcny$age
alb     <- pbcny$alb
logbili <- log2(pbcny$bili)

x <- cbind(arm, alb, logbili, sex, age)
rmtl.ipcw(time, status, eoi=2, tau=3, x)
       xint        xarm        xalb    xlogbili        xsex        xage 
-0.47710972 -0.08191763 -0.01872268  0.15948703  0.12945255  0.01324603 
Code show/hide
rmtl.ipcw(time, status, eoi=1, tau=3, x)
        xint         xarm         xalb     xlogbili         xsex         xage 
 0.595880668 -0.066527285 -0.006567316  0.071836632  0.131691403 -0.010132691 
Code show/hide
x1 <- cbind(arm, alb, logbili)
rmtl.ipcw(time, status, eoi=2, tau=3, x1)
       xint        xarm        xalb    xlogbili 
 0.79996123 -0.07930867 -0.02829702  0.12396009 
Code show/hide
rmtl.ipcw(time, status, eoi=1, tau=3, x1)
        xint         xarm         xalb     xlogbili 
-0.201485168 -0.068452390 -0.002058876  0.091436591 

Using mets package

However, does not give exactly same results as hard code

Code show/hide
library(mets)

# Death without transplantation
resmeanIPCW(Event(days/365.25,status)~factor(tment),cause=2,
            data=pbc3, time=3, model="linear")

   n events
 349     47

 349 clusters
coeffients:
                Estimate   Std.Err      2.5%     97.5% P-value
(Intercept)     0.253900  0.053258  0.149517  0.358284  0.0000
factor(tment)1 -0.019753  0.073194 -0.163211  0.123704  0.7873

exp(coeffients):
               Estimate    2.5%  97.5%
(Intercept)     1.28904 1.16127 1.4309
factor(tment)1  0.98044 0.84941 1.1317
Code show/hide
resmeanIPCW(Event(days/365.25,status)~factor(tment)+alb+log2bili+sex+age,cause=2,
            data=subset(pbc3,!is.na(alb)), time=3, model="linear")

   n events
 343     47

 343 clusters
coeffients:
                 Estimate    Std.Err       2.5%      97.5% P-value
(Intercept)    -0.7249855  0.4048719 -1.5185199  0.0685489  0.0733
factor(tment)1 -0.0804114  0.0668270 -0.2113898  0.0505670  0.2289
alb            -0.0151672  0.0063347 -0.0275830 -0.0027514  0.0167
log2bili        0.1729104  0.0323201  0.1095641  0.2362567  0.0000
sex             0.2071442  0.1068157 -0.0022107  0.4164990  0.0525
age             0.0141707  0.0030794  0.0081351  0.0202063  0.0000

exp(coeffients):
               Estimate    2.5%  97.5%
(Intercept)     0.48433 0.21904 1.0710
factor(tment)1  0.92274 0.80946 1.0519
alb             0.98495 0.97279 0.9973
log2bili        1.18876 1.11579 1.2665
sex             1.23016 0.99779 1.5166
age             1.01427 1.00817 1.0204
Code show/hide
resmeanIPCW(Event(days/365.25,status)~factor(tment)+alb+log2bili,cause=2,
            data=subset(pbc3,!is.na(alb)), time=3, model="linear")

   n events
 343     47

 343 clusters
coeffients:
                 Estimate    Std.Err       2.5%      97.5% P-value
(Intercept)     0.4548449  0.3266548 -0.1853868  1.0950766  0.1638
factor(tment)1 -0.0888451  0.0688834 -0.2238542  0.0461640  0.1971
alb            -0.0215121  0.0066239 -0.0344947 -0.0085294  0.0012
log2bili        0.1439391  0.0323606  0.0805134  0.2073648  0.0000

exp(coeffients):
               Estimate    2.5%  97.5%
(Intercept)     1.57593 0.83078 2.9894
factor(tment)1  0.91499 0.79943 1.0472
alb             0.97872 0.96609 0.9915
log2bili        1.15481 1.08384 1.2304
Code show/hide
# Transplantation
resmeanIPCW(Event(days/365.25,status)~factor(tment),cause=1,
            data=pbc3, time=3, model="lin")

   n events
 349     21

 349 clusters
coeffients:
                Estimate   Std.Err      2.5%     97.5% P-value
(Intercept)     0.143960  0.041249  0.063114  0.224806  0.0005
factor(tment)1 -0.058157  0.051128 -0.158365  0.042052  0.2553

exp(coeffients):
               Estimate    2.5%  97.5%
(Intercept)     1.15484 1.06515 1.2521
factor(tment)1  0.94350 0.85354 1.0429
Code show/hide
resmeanIPCW(Event(days/365.25,status)~factor(tment)+alb+log2bili+sex+age,cause=1,
            data=subset(pbc3,!is.na(alb)), time=3, model="lin")

   n events
 343     20

 343 clusters
coeffients:
                 Estimate    Std.Err       2.5%      97.5% P-value
(Intercept)     0.3224456  0.2826487 -0.2315356  0.8764268  0.2540
factor(tment)1 -0.0688959  0.0453965 -0.1578714  0.0200796  0.1291
alb            -0.0038059  0.0042657 -0.0121665  0.0045546  0.3723
log2bili        0.0868250  0.0236530  0.0404659  0.1331841  0.0002
sex             0.1010467  0.0785489 -0.0529064  0.2549998  0.1983
age            -0.0082742  0.0028017 -0.0137653 -0.0027831  0.0031

exp(coeffients):
               Estimate    2.5%  97.5%
(Intercept)     1.38050 0.79331 2.4023
factor(tment)1  0.93342 0.85396 1.0203
alb             0.99620 0.98791 1.0046
log2bili        1.09071 1.04130 1.1425
sex             1.10633 0.94847 1.2905
age             0.99176 0.98633 0.9972
Code show/hide
resmeanIPCW(Event(days/365.25,status)~factor(tment)+alb+log2bili,cause=1,
            data=subset(pbc3,!is.na(alb)), time=3, model="lin")

   n events
 343     20

 343 clusters
coeffients:
                  Estimate     Std.Err        2.5%       97.5% P-value
(Intercept)    -0.29824097  0.21529398 -0.72020941  0.12372746  0.1660
factor(tment)1 -0.06453816  0.04593998 -0.15457888  0.02550255  0.1601
alb            -0.00067397  0.00410667 -0.00872290  0.00737496  0.8696
log2bili        0.10093806  0.02632894  0.04933429  0.15254183  0.0001

exp(coeffients):
               Estimate    2.5%  97.5%
(Intercept)     0.74212 0.48665 1.1317
factor(tment)1  0.93750 0.85678 1.0258
alb             0.99933 0.99132 1.0074
log2bili        1.10621 1.05057 1.1648

In-text, p. 141: Time lost g-formula and bootstrap

Code show/hide
library(boot)

# Transplantation
boot.fun <- function(dat, index){
bdata <- dat[index,]
obj<-rmtl.ipcw(bdata$time,bdata$status,eoi=1,tau=3,cbind(bdata$arm,bdata$alb,bdata$logbili))
rmst0<-obj[1]+obj[2]*0+obj[3]*bdata$alb+obj[4]*bdata$logbili
rmst1<-obj[1]+obj[2]*1+obj[3]*bdata$alb+obj[4]*bdata$logbili
diff<-rmst1-rmst0
res<-cbind(mean(rmst0),mean(rmst1),mean(diff))
return(res)
}
B<-200 
set.seed(1234)
trydata<-as.data.frame(cbind(time,status,x1))
bootres <- boot(trydata, boot.fun, R = B)

# mean and SD
print("Transplantation")
[1] "Transplantation"
Code show/hide
rbind(
  cbind(Placebo=mean(bootres$t[,1]),
        CyA=mean(bootres$t[,2]),
        Diff=mean(bootres$t[,3])),
  cbind(Placebo=sqrt(var(bootres$t[,1])),
        CyA=sqrt(var(bootres$t[,2])),
        Diff=sqrt(var(bootres$t[,3])))
)
        Placebo        CyA        Diff
[1,] 0.14211202 0.07317405 -0.06893798
[2,] 0.04101209 0.02876443  0.04993619
Code show/hide
# Death without transplantation
boot.fun <- function(dat, index){
bdata <- dat[index,]
obj<-rmtl.ipcw(bdata$time,bdata$status,eoi=2,tau=3,cbind(bdata$arm,bdata$alb,bdata$logbili))

rmst0<-obj[1]+obj[2]*0+obj[3]*bdata$alb+obj[4]*bdata$logbili
rmst1<-obj[1]+obj[2]*1+obj[3]*bdata$alb+obj[4]*bdata$logbili
diff<-rmst1-rmst0
res<-cbind(mean(rmst0),mean(rmst1),mean(diff))
return(res)
}

trydata<-as.data.frame(cbind(time,status,x1))
bootres <- boot(trydata, boot.fun, R = B)
# mean and SD
print("Death without transplantation")
[1] "Death without transplantation"
Code show/hide
rbind(
  cbind(Placebo=mean(bootres$t[,1]),
        CyA=mean(bootres$t[,2]),
        Diff=mean(bootres$t[,3])),
  cbind(Placebo=sqrt(var(bootres$t[,1])),
        CyA=sqrt(var(bootres$t[,2])),
        Diff=sqrt(var(bootres$t[,3])))
)
        Placebo        CyA        Diff
[1,] 0.28945212 0.21368521 -0.07576691
[2,] 0.05330835 0.05003825  0.07054484

Figure 4.21

Code show/hide
# Overall survival 
overall_surv <- survfit(Surv(days, status != 0) ~ 1, data = pbc3)

# Censoring
cens_surv <- survfit(Surv(days, status == 0) ~ 1, data = pbc3)

# Make data ready for plotting 
pdata <- data.frame(time = c(overall_surv$time, cens_surv$time), 
                    surv = c(overall_surv$surv, cens_surv$surv), 
                    type = c(rep("Treatment failure", length(overall_surv$time)), 
                             rep("Censoring", length(overall_surv$time))))

# Create Figure
fig4.21 <- ggplot(aes(x = time / 365.25, y = surv, linetype = type), 
                 data = pdata) + 
  geom_step(linewidth = 1) + 
  scale_linetype_manual("Type", values = c("dashed", "solid")) + 
  xlab("Time since randomization (years)") + 
  ylab('Probability') + 
  scale_x_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     limits = c(0, 6), 
                     breaks = seq(0, 6, 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.02, 0.05)), 
                     limits = c(0, 1.0), 
                     breaks = seq(0, 1.0, 0.1)) +
  theme_general + 
  theme(legend.box = "vertical",
        legend.key.size = unit(1.5, 'cm'))


fig4.21

Code show/hide
proc phreg data=pbc3 atrisk noprint;
    model followup*status(1 2)=;
    baseline out=survcens survival=kmc / method=pl;
run;
proc phreg data=pbc3 atrisk noprint;
    model followup*status(0)=;
    baseline out=survdat survival=kms / method=pl;
run;
data plotsurv; 
    merge survcens survdat; 
    by followup;
run;
data plotfin; set plotsurv;
    by followup;
    retain lastc lasts;
    if kmc=. then do c=lastc; s=kms; end; 
    if kms=. then do s=lasts; c=kmc; end;
    if kmc ne . and kms ne . then do; c=kmc; s=kms; end;
    output;
    lastc=c; lasts=s;
run;
proc gplot data=plotfin;
    plot c*followup s*followup/haxis=axis1 vaxis=axis2 overlay;
    axis1 order=0 to 6 by 1 minor=none label=('Years');
    axis2 order=0 to 1 by 0.1 minor=none
    label=(a=90 '"Survival" probability');
    symbol1  v=none i=stepjl c=blue;
    symbol2  v=none i=stepjl c=red;
run;
quit;

In-text, p.154: Cox for censoring

Code show/hide
summary(coxph(Surv(days, status == 0) ~ tment, data = pbc3, method = "breslow"))
Call:
coxph(formula = Surv(days, status == 0) ~ tment, data = pbc3, 
    method = "breslow")

  n= 349, number of events= 259 

         coef exp(coef) se(coef)     z Pr(>|z|)
tment 0.08402   1.08765  0.12550 0.669    0.503

      exp(coef) exp(-coef) lower .95 upper .95
tment     1.088     0.9194    0.8505     1.391

Concordance= 0.507  (se = 0.018 )
Likelihood ratio test= 0.45  on 1 df,   p=0.5
Wald test            = 0.45  on 1 df,   p=0.5
Score (logrank) test = 0.45  on 1 df,   p=0.5
Code show/hide
summary(coxph(Surv(days, status == 0) ~ alb, data = pbc3, method = "breslow"))
Call:
coxph(formula = Surv(days, status == 0) ~ alb, data = pbc3, method = "breslow")

  n= 343, number of events= 255 
   (6 observations deleted due to missingness)

        coef exp(coef) se(coef)     z Pr(>|z|)
alb 0.001017  1.001017 0.012818 0.079    0.937

    exp(coef) exp(-coef) lower .95 upper .95
alb     1.001      0.999    0.9762     1.026

Concordance= 0.496  (se = 0.021 )
Likelihood ratio test= 0.01  on 1 df,   p=0.9
Wald test            = 0.01  on 1 df,   p=0.9
Score (logrank) test = 0.01  on 1 df,   p=0.9
Code show/hide
summary(coxph(Surv(days, status == 0) ~ bili, data = pbc3, method = "breslow"))
Call:
coxph(formula = Surv(days, status == 0) ~ bili, data = pbc3, 
    method = "breslow")

  n= 349, number of events= 259 

          coef exp(coef)  se(coef)      z Pr(>|z|)
bili -0.002524  0.997479  0.001817 -1.389    0.165

     exp(coef) exp(-coef) lower .95 upper .95
bili    0.9975      1.003    0.9939     1.001

Concordance= 0.49  (se = 0.021 )
Likelihood ratio test= 2.18  on 1 df,   p=0.1
Wald test            = 1.93  on 1 df,   p=0.2
Score (logrank) test = 1.93  on 1 df,   p=0.2
Code show/hide
proc phreg data=pbc3;
    class tment (ref='0');
    model followup*status(1 2)=tment/rl;
run;
proc phreg data=pbc3;
    model followup*status(1 2)=alb/rl;
run;
proc phreg data=pbc3;
    model followup*status(1 2)=bili/rl;
run;

PROVA trial in liver cirrhosis

Read data

Code show/hide
prova <- read.csv("data/prova.csv", na.strings = c("."))
# Treatment 2x2 factorial
prova$beh <- with(prova, as.factor(scle + beta*2))
# Extra variables
provany <- prova
provany$log2bili <- with(provany, log2(bili))
Code show/hide
proc import out=prova
    datafile="data/prova.csv"
    dbms=csv replace;
run;
data cens;
  set prova;
    if timebleed=. then time=timedeath;
    else time= timebleed + timedeath;
    beh = scle + beta*2; 
    log2bili = log2(bili);
run;

Figure 4.22

Code show/hide
library(ggplot2)
theme_general <- theme_bw() +
  theme(legend.position = "bottom",
        text = element_text(size = 20),
        axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 20))

# Make KM estimate of censoring distribution
provany$time <- with(provany, ifelse(!is.na(timebleed), timebleed + timedeath, timedeath))
censdist <- survfit(Surv(time, death == 0) ~ 1, data = provany)
censdist
Call: survfit(formula = Surv(time, death == 0) ~ 1, data = provany)

       n events median 0.95LCL 0.95UCL
[1,] 286    211    905     796    1082
Code show/hide
# Make data ready for plotting
pdata <- data.frame(time = censdist$time,
                    surv = censdist$surv)


# Create Figure
fig4.22 <- ggplot(aes(x = time / 365.25, y = surv),
                 data = pdata) +
  geom_step(linewidth = 1) +
  xlab("Time since randomization (years)") +
  ylab('Probability of no censoring') +
  scale_x_continuous(expand = expansion(mult = c(0.005, 0.005)),
                     limits = c(0, 7),
                     breaks = seq(0, 7, 1)) +
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.005)),
                     limits = c(0, 1.0),
                     breaks = seq(0, 1.0, 0.1)) +
  theme_general +
  theme(legend.box = "vertical",
        legend.key.size = unit(1.5, 'cm'))


fig4.22

Code show/hide
proc phreg data=cens atrisk noprint;
    model time*death(1)=;
    baseline out=survcens survival=kmc / method=pl;
run;
data survcens; 
    set survcens; 
    timey = time/365.25; 
run;
proc gplot data=survcens;
    plot kmc*timey/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 6 by 1 minor=none label=('Years');
    axis2 order=0 to 1 by 0.1 minor=none label=(a=90 'Probability of no censoring');
    symbol1  v=none i=stepjl c=black;
run;
quit;

Table 4.12

Code show/hide
library(survival)
options(contrasts=c("contr.treatment", "contr.poly"))
# treat
coxph(Surv(time, death == 0) ~ beh, data = provany)
Call:
coxph(formula = Surv(time, death == 0) ~ beh, data = provany)

         coef exp(coef) se(coef)      z     p
beh1  0.03074   1.03122  0.19080  0.161 0.872
beh2 -0.04113   0.95970  0.18910 -0.218 0.828
beh3  0.04131   1.04218  0.20386  0.203 0.839

Likelihood ratio test=0.22  on 3 df, p=0.9751
n= 286, number of events= 211 
Code show/hide
# size 
coxph(Surv(time, death == 0) ~ factor(varsize), data = provany)
Call:
coxph(formula = Surv(time, death == 0) ~ factor(varsize), data = provany)

                    coef exp(coef) se(coef)      z      p
factor(varsize)2 -0.2324    0.7926   0.1465 -1.586 0.1127
factor(varsize)3 -0.4386    0.6449   0.2327 -1.885 0.0594

Likelihood ratio test=4.77  on 2 df, p=0.0922
n= 286, number of events= 211 
Code show/hide
# sex
coxph(Surv(time, death == 0) ~ sex, data = provany)
Call:
coxph(formula = Surv(time, death == 0) ~ sex, data = provany)

       coef exp(coef) se(coef)    z     p
sex 0.07847   1.08163  0.14542 0.54 0.589

Likelihood ratio test=0.29  on 1 df, p=0.588
n= 286, number of events= 211 
Code show/hide
# coag
coxph(Surv(time, death == 0) ~ coag, data = provany)
Call:
coxph(formula = Surv(time, death == 0) ~ coag, data = provany)

          coef exp(coef)  se(coef)      z     p
coag -0.002827  0.997177  0.002635 -1.073 0.283

Likelihood ratio test=1.18  on 1 df, p=0.2775
n= 272, number of events= 199 
   (14 observations deleted due to missingness)
Code show/hide
# bili
coxph(Surv(time, death == 0) ~ log2bili, data = provany)
Call:
coxph(formula = Surv(time, death == 0) ~ log2bili, data = provany)

            coef exp(coef) se(coef)     z    p
log2bili 0.07575   1.07870  0.05649 1.341 0.18

Likelihood ratio test=1.77  on 1 df, p=0.1834
n= 275, number of events= 202 
   (11 observations deleted due to missingness)
Code show/hide
# age
coxph(Surv(time, death == 0) ~ age, data = provany)
Call:
coxph(formula = Surv(time, death == 0) ~ age, data = provany)

         coef exp(coef)  se(coef)      z       p
age -0.017105  0.983040  0.005777 -2.961 0.00307

Likelihood ratio test=8.67  on 1 df, p=0.003242
n= 286, number of events= 211 
Code show/hide
proc phreg data=cens;
    class beh (ref='0');
    model time*death(1)=beh;
run;
proc phreg data=cens;
    class varsize (ref='1');
    model time*death(1)=varsize;
run;
proc phreg data=cens;
    class sex (ref='1');
    model time*death(1)=sex;
run;
proc phreg data=cens;
    model time*death(1)=coag;
run;
proc phreg data=cens;
    model time*death(1)=log2bili;
run;
proc phreg data=cens;
    model time*death(1)=age;
run;

Recurrent episodes in affective disorders

Read data

Code show/hide
affective <- read.csv("data/affective.csv")
affective$wait <- with(affective, stop - start)
affectivewlw <- read.csv("data/affectivewlw.csv")
library(survival)
Code show/hide
proc import out=affective
    datafile="data/affective.csv"
    dbms=csv replace;
run;
data affective; 
    set affective; 
    wait = stop - start; 
run; 
data angstprev; 
    set affective;
    by id;
    retain prev;
    if first.id then prev=0; 
    output; 
    if state=1 then prev=start; if state=0 then prev=stop;
run;

Table 4.3

Code show/hide
# Make dataset ready for mstate 
# From Out -> In,   trans = 1
# From Out -> Dead, trans = 2
# From In -> Out,   trans = 3
# From In -> Dead,  trans = 4
# + update status variable

library(dplyr)
affectivemstate__ <- affective %>% 
  mutate(statusnew = ifelse(status == 3, 0, 1), 
         trans = case_when(state == 0 & status == 1 ~ 1, 
                           state == 0 & status == 2 ~ 2, 
                           state == 1 & status == 0 ~ 3,
                           state == 1 & status == 2 ~ 4, 
                           state == 0 & status == 3 ~ 1, 
                           state == 1 & status == 3 ~ 3))

# For each transition, we should have a censoring for the trans to the other state
affectivemstate_ <- affectivemstate__ %>% 
  mutate(statusnew = 0,
         trans = case_when(trans == 1 ~ 2,
                           trans == 2 ~ 1, 
                           trans == 3 ~ 4, 
                           trans == 4 ~ 3))

affectivemstate <- rbind(affectivemstate__, affectivemstate_) %>% arrange(id, start)
affectivemstate <- affectivemstate %>% 
  mutate(from = case_when(trans == 1 ~ 1,
                          trans == 2 ~ 1, 
                          trans == 3 ~ 2, 
                          trans == 4 ~ 2), 
         to = case_when(trans == 1 ~ 2,
                        trans == 2 ~ 3, 
                        trans == 3 ~ 1, 
                        trans == 4 ~ 3),
         starty = start/12, 
         stopy = stop/12
         )

# Subset data by disease
affective0 <- subset(affectivemstate, bip == 0)
affective1 <- subset(affectivemstate, bip == 1)

# Set-up transition matrix
tmat <- matrix(NA, 3, 3)
tmat[1, 2:3]    <- 1:2
tmat[2, c(1,3)] <- 3:4
statenames <- c("Out of hospital", "In hospital", "Dead")
dimnames(tmat) <- list(from = statenames, to = statenames)

library(mstate)
## For unipolar (bip = 0) ----------------------------------- ##
attr(affective0, 'class') <- c("msdata","data.frame")
attr(affective0, 'trans') <- tmat
# Fit empty cox model per trans
c0 <- coxph(Surv(starty, stopy, statusnew) ~ strata(trans), data = affective0)
# Make a mstate object
msf0 <- msfit(object=c0, trans=tmat)
pt0  <- probtrans(msf0, predt=0)

## For bipolar (bip = 1) ----------------------------------- ##
attr(affective1, 'class') <- c("msdata","data.frame")
attr(affective1, 'trans') <- tmat
# Fit empty cox model per trans
c1 <- coxph(Surv(starty, stopy, statusnew) ~ strata(trans), data = affective1)
# Make a mstate object
msf1 <- msfit(object=c1, trans=tmat)
pt1 <- probtrans(msf1, predt=0)


regcoefvec <- function(data, tmat, tau) {
  cx <- coxph(Surv(starty, stopy, statusnew) ~ strata(trans), data=data)
  msf0 <- msfit(object = cx, trans = tmat)
  pt0 <- probtrans(msf0, predt=0)
  mat <- ELOS(pt0, tau=tau)
  return(mat[2,])
}

set.seed(1234)
res <- msboot(theta=regcoefvec, data=affective0, B=100, id="id", tmat=tmat, tau=15)
uniest<-regcoefvec(affective0, tmat, 15)
uniboots<-matrix(c(mean(res[1,]),sqrt(var(res[1,])),
         mean(res[2,]),sqrt(var(res[2,])),
         mean(res[3,]),sqrt(var(res[3,]))),
       nrow = 3, dimnames = list(c("Out of hosp","In hosp","Dead"), c("Years","SD")))
uni<-list("estimate"=uniest,"bootstrap"=uniboots)
set.seed(1234)
res <- msboot(theta=regcoefvec, data=affective1, B=100, id="id", tmat=tmat, tau=15)
biest<-regcoefvec(affective0, tmat, 15)
biboots<-matrix(c(mean(res[1,]),sqrt(var(res[1,])),
         mean(res[2,]),sqrt(var(res[2,])),
         mean(res[3,]),sqrt(var(res[3,]))),
       nrow = 3, dimnames = list(c("Out of hosp","In hosp","Dead"), c("Years","SD")))
bi<-list("estimate"=biest,"bootstrap"=biboots)
list("Unipolar"=uni,"Bipolar"=bi)
$Unipolar
$Unipolar$estimate
     in1      in2      in3 
9.589138 2.202036 3.208826 

$Unipolar$bootstrap
                Years        SD
Out of hosp 9.6888785 0.2515182
In hosp     0.4962329 3.1828301
Dead        2.1282913 0.4881871


$Bipolar
$Bipolar$estimate
     in1      in2      in3 
9.589138 2.202036 3.208826 

$Bipolar$bootstrap
                 Years        SD
Out of hosp 12.6037941 0.3523277
In hosp      0.7299104 0.8614208
Dead         1.5347851 0.5616117

Figure 4.16

Code show/hide
library(ggplot2)
theme_general <- theme_bw() +
  theme(legend.position = "bottom", 
        text = element_text(size = 20), 
        axis.text.x = element_text(size = 20), 
        axis.text.y = element_text(size = 20)) 

# Make data set with probabilities - predictions from state 2 (in hospital)
# bip = 0 
t0 <- data.frame(
  time = pt0[[2]]$time, 
  pstate1 = pt0[[2]]$pstate1, 
  pstate2 = pt0[[2]]$pstate2, 
  pstate3 = pt0[[2]]$pstate3, 
  bip = rep("No", nrow(pt0[[2]]))
)

# bip = 1
t1 <- data.frame(
  time = pt1[[2]]$time, 
  pstate1 = pt1[[2]]$pstate1, 
  pstate2 = pt1[[2]]$pstate2, 
  pstate3 = pt1[[2]]$pstate3, 
  bip = rep("Yes", nrow(pt1[[2]]))
)
pstate1 <- data.frame(
  type = rep("Out of hospital", nrow(pt0[[2]]) + nrow(pt1[[2]])), 
  bip = c(rep("No", nrow(pt0[[2]])), rep("Yes", nrow(pt1[[2]]))),
  pstate = c(pt0[[2]]$pstate1, pt1[[2]]$pstate1), 
  time = c(pt0[[2]]$time, pt1[[2]]$time))

pstate2 <- data.frame(
  type = rep("In hospital", nrow(pt0[[2]]) + nrow(pt1[[2]])), 
  bip = c(rep("No", nrow(pt0[[2]])), rep("Yes", nrow(pt1[[2]]))),
  pstate = c(pt0[[2]]$pstate2, pt1[[2]]$pstate2), 
  time = c(pt0[[2]]$time, pt1[[2]]$time))

pstate3 <- data.frame(
  type = rep("Dead", nrow(pt0[[2]]) + nrow(pt1[[2]])),
  bip = c(rep("No", nrow(pt0[[2]])), rep("Yes", nrow(pt1[[2]]))),
  pstate = c(pt0[[2]]$pstate3, pt1[[2]]$pstate3), 
  time = c(pt0[[2]]$time, pt1[[2]]$time))

probs <- rbind(pstate1, pstate2, pstate3)

# A couple of places, a very small negative probability is predicted
# We fix it + sum here
t <- probs[probs$pstate <0,]$time #<-0

probs[probs$bip == "No" & probs$time == t[1], "pstate"]  <- 
  c(probs[probs$bip == "No" & probs$time == t[1], "pstate"] %*% c(1,1,0), 
    0, 
    probs[probs$bip == "No" & probs$time == t[1], "pstate"] %*% c(0,0,1))

probs[probs$bip == "No" & probs$time == t[2], "pstate"]  <- 
  c(probs[probs$bip == "No" & probs$time == t[2], "pstate"] %*% c(1,1,0), 
    0, 
    probs[probs$bip == "No" & probs$time == t[2], "pstate"] %*% c(0,0,1))

probs[probs$bip == "No" & probs$time == t[3], "pstate"]  <- 
  c(probs[probs$bip == "No" & probs$time == t[3], "pstate"] %*% c(1,1,0), 
    0, 
    probs[probs$bip == "No" & probs$time == t[3], "pstate"] %*% c(0,0,1))

probs[probs$bip == "No" & probs$time == t[4], "pstate"]  <- 
  c(probs[probs$bip == "No" & probs$time == t[4], "pstate"] %*% c(1,1,0), 
    0, 
    probs[probs$bip == "No" & probs$time == t[4], "pstate"] %*% c(0,0,1))


probs[probs$bip == "Yes" & probs$time == t[5], "pstate"]  <- 
  c(probs[probs$bip == "Yes" & probs$time == t[5], "pstate"] %*% c(1,1,0), 
    0, 
    probs[probs$bip == "Yes" & probs$time == t[5], "pstate"] %*% c(0,0,1))


probs[probs$bip == "Yes" & probs$time == t[6], "pstate"]  <- 
  c(probs[probs$bip == "Yes" & probs$time == t[6], "pstate"] %*% c(1,1,0), 
    0, 
    probs[probs$bip == "Yes" & probs$time == t[6], "pstate"] %*% c(0,0,1))


probs[probs$bip == "Yes" & probs$time == t[7], "pstate"]  <- 
  c(probs[probs$bip == "Yes" & probs$time == t[7], "pstate"] %*% c(1,1,0), 
    0, 
    probs[probs$bip == "Yes" & probs$time == t[7], "pstate"] %*% c(0,0,1))

probs[probs$bip == "Yes" & probs$time == t[8], "pstate"]  <- 
  c(probs[probs$bip == "Yes" & probs$time == t[8], "pstate"] %*% c(1,1,0), 
    0, 
    probs[probs$bip == "Yes" & probs$time == t[8], "pstate"] %*% c(0,0,1))

probs[probs$bip == "Yes" & probs$time == t[9], "pstate"]  <- 
  c(probs[probs$bip == "Yes" & probs$time == t[9], "pstate"] %*% c(1,1,0), 
    0, 
    probs[probs$bip == "Yes" & probs$time == t[9], "pstate"] %*% c(0,0,1))

probs[probs$bip == "Yes" & probs$time == t[10], "pstate"]  <- 
  c(probs[probs$bip == "Yes" & probs$time == t[10], "pstate"] %*% c(1,1,0), 
    0, 
    probs[probs$bip == "Yes" & probs$time == t[10], "pstate"] %*% c(0,0,1))


probs2 <- probs[order(probs$bip, probs$type, probs$time, probs$pstate, decreasing = F),]

library(ggpattern)
p1 <- ggplot(aes(x = time), data = subset(probs2, bip == "No")) +
  geom_area_pattern(aes(y = pstate, 
                        pattern = type,
                        pattern_fill = type ), 
                    fill = 'white', 
                    colour = 'black', 
                    #pattern_density = 0.02, 
                    pattern_aspect_ratio = 1,
                    pattern_fill = 'darkgrey', 
                    pattern_color = 'black', 
                    pattern_spacing = 0.02,
                    linewidth = 0.7) + 
  ylab("Probability") + 
  xlab("Time since first admission (years)") + 
  scale_pattern_discrete(name = c("State"), 
                         choices = c("circle", "stripe", "crosshatch")) + 
  scale_pattern_fill_discrete(name = c("State")) + 
  scale_pattern_spacing_discrete(name = c("State")) + 
  theme_general + ggtitle("Unipolar") + 
  theme(legend.key.size = unit(1, 'cm'))

p2 <- ggplot(aes(x = time), data = subset(probs2, bip == "Yes")) +
  geom_area_pattern(aes(y = pstate, 
                        pattern = type,
                        pattern_fill = type ), 
                    fill = 'white', 
                    colour = 'black', 
                    #pattern_density = 0.02, 
                    pattern_aspect_ratio = 1,
                    pattern_fill = 'darkgrey', 
                    pattern_color = 'black', 
                    pattern_spacing = 0.02,
                    linewidth = 0.7) + 
  ylab("Probability") + 
  xlab("Time since first admission (years)") + 
  scale_pattern_discrete(name = c("State"),
                         choices = c("circle", "stripe", "crosshatch")) + 
  scale_pattern_fill_discrete(name = c("State")) + 
  scale_pattern_spacing_discrete(name = c("State")) + 
  theme_general + ggtitle("Bipolar") + 
  theme(legend.key.size = unit(1, 'cm'))

# common legend
library(grid)
library(gridExtra)
plots <- list(p1, p2)
g <- ggplotGrob(plots[[1]] + theme(legend.position="bottom"))$grobs
legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
lheight <- sum(legend$height)
tmp <- arrangeGrob(p1 + theme(legend.position = "none"), 
                   p2 + theme(legend.position = "none"), 
                   layout_matrix = matrix(c(1, 2), nrow = 1))


fig4.16<-grid.arrange(tmp, legend, ncol = 1, 
                       heights = unit.c(unit(1, "npc") - lheight, lheight))

Table 4.7

Code show/hide
# In years
library(dplyr)
affective <- affective %>% mutate(starty = start / 12, stopy = stop / 12) %>% group_by(id) %>% 
                mutate(prevy1 = lag(starty, n = 1, default = 0), 
                       prevy2 = lag(stopy, n = 1, default = 0),
                       prevy = ifelse(state == 1, prevy2, prevy1))

# LWYY model - Mortality treated as censoring 
subaff<-data.frame(subset(affective, state == 0 | status %in% c(2,3)))
fit1 <- coxph(Surv(prevy, stopy, status == 1) ~ bip + cluster(id), 
              data = subaff, ties = "breslow")
summary(fit1)
Call:
coxph(formula = Surv(prevy, stopy, status == 1) ~ bip, data = subaff, 
    ties = "breslow", cluster = id)

  n= 661, number of events= 542 

       coef exp(coef) se(coef) robust se     z Pr(>|z|)  
bip 0.42019   1.52225  0.09446   0.18167 2.313   0.0207 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

    exp(coef) exp(-coef) lower .95 upper .95
bip     1.522     0.6569     1.066     2.173

Concordance= 0.535  (se = 0.019 )
Likelihood ratio test= 18.62  on 1 df,   p=2e-05
Wald test            = 5.35  on 1 df,   p=0.02
Score (logrank) test = 20.07  on 1 df,   p=7e-06,   Robust = 4.15  p=0.04

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).
Code show/hide
# Ghosh-Lin model - Mortality treated as competing risk
library(mets)
fit2 <- recreg(Event(prevy, stopy, status) ~ bip + cluster(id),
               data = subaff, cause = 1, cens.code = 3, death.code = 2)
summary(fit2)

   n events
 661    542

 119 clusters
coeffients:
    Estimate     S.E.  dU^-1/2 P-value
bip 0.674604 0.193771 0.094016   5e-04

exp(coeffients):
    Estimate   2.5%  97.5%
bip   1.9633 1.3429 2.8702
Code show/hide
* LWYY model - Mortality treated as censoring; 
proc phreg covs(aggregate) data=angstprev;
    where state=0 or status=2 or status=3;
    class bip (ref='0');
    model stop*status(2 3)=bip/entry=prev rl;
    id id;
run;

                              Analysis of Maximum Likelihood Estimates

                 Parameter   Standard StdErr                         Hazard  95% Hazard Ratio
 Parameter   DF   Estimate      Error  Ratio Chi-Square Pr > ChiSq    Ratio Confidence Limits

 bip       1  1    0.42063    0.18165  1.923     5.3621     0.0206    1.523    1.067    2.174


* Ghosh-Lin model - Mortality treated as competing risk; 
proc phreg data=angstprev;
    where state=0 or status=2 or status=3;
    class bip (ref='0');
    model stop*status(3)=bip/entry=prev eventcode=1 rl;
run;

                              Analysis of Maximum Likelihood Estimates

                  Parameter    Standard                            Hazard   95% Hazard Ratio
Parameter    DF    Estimate       Error  Chi-Square  Pr > ChiSq     Ratio   Confidence Limits

bip       1   1     0.66630     0.13971     22.7440      <.0001     1.947     1.481     2.560

In-text, p. 145: Cox for mortality

Code show/hide
summary(coxph(Surv(start,stop,status==2)~bip,data=affective,ties="breslow"))
Call:
coxph(formula = Surv(start, stop, status == 2) ~ bip, data = affective, 
    ties = "breslow")

  n= 1287, number of events= 78 

       coef exp(coef) se(coef)      z Pr(>|z|)  
bip -0.8905    0.4104   0.3560 -2.502   0.0124 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

    exp(coef) exp(-coef) lower .95 upper .95
bip    0.4104      2.436    0.2043    0.8246

Concordance= 0.56  (se = 0.02 )
Likelihood ratio test= 7.79  on 1 df,   p=0.005
Wald test            = 6.26  on 1 df,   p=0.01
Score (logrank) test = 6.67  on 1 df,   p=0.01
Code show/hide
proc phreg data=affective;
  model (start,stop)*status(0,1,3) = bip / rl;
run;

Figure 4.18

Code show/hide
theme_general <- theme_bw() +
  theme(legend.position = "bottom", 
        legend.title=element_blank(),
        legend.text = element_text(size = 20),
        text = element_text(size = 20), 
        axis.text.x = element_text(size = 20), 
        axis.text.y = element_text(size = 20)) 

# Use Nelson-Aalen to estimate marginal mean, incorrectly censoring for death 
naa_est <- survfit(Surv(prevy, stopy, status == 1) ~ bip + cluster(id), 
    data = subset(affective, stopy > prevy & (state == 0 | status %in% c(2,3))), 
                   ctype = 1)
# Collect data for plotting
plotdata <- data.frame(time = naa_est$time, 
                       mu = naa_est$cumhaz, 
                       bip = c(rep("No", naa_est$strata[[1]]), 
                               rep("Yes", naa_est$strata[[2]])))

fig4.18 <- ggplot(aes(x = time, y = mu, linetype = bip), data = plotdata) + 
  geom_step(size = 1) + 
  xlab("Time since first admission (years)") + 
  ylab("Expected number of episodes") + 
  scale_linetype_manual("Bipolar", values = c("dashed", "solid"),
                        labels=c("Unipolar","Bipolar")) + 
  scale_x_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     limits = c(0, 30), 
                     breaks = seq(0, 30, by = 5)) + 
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     limits = c(0, 10), 
                     breaks = seq(0, 10, by = 2)) +
  theme_general + 
  theme(legend.box = "vertical",
        legend.key.size = unit(1.5, 'cm'))
fig4.18

Code show/hide
ods graphics on; 
proc phreg plots(overlay=row)=mcf covs(aggregate) data=angstprev;
    where state=0 or status=2 or status=3;
    class bip;
    model stop*status(2 3)=/entry=prev;
    id id;
    strata bip;
    baseline out=mcfdata cmf=naa;
run;
data mcfdata; set mcfdata;
    years=stop/12;
run;
proc gplot data=mcfdata;
    plot naa*years=bip/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 30 by 5 minor=none label=('Years');
    axis2 order=0 to 12 by 2 minor=none label=(a=90 'Expected number of episodes');
    symbol1  v=none i=stepjl c=red;
    symbol2  v=none i=stepjl c=blue;
run;
quit;

Figure 4.19

Code show/hide
theme_general <- theme_bw() +
  theme(legend.position = "bottom", 
        legend.title=element_blank(),
        legend.text = element_text(size = 20),
        text = element_text(size = 20), 
        axis.text.x = element_text(size = 20), 
        axis.text.y = element_text(size = 20)) 

library(mets)
xr <- phreg(Surv(prevy, stopy, status == 1) ~ strata(bip) + cluster(id),
            data = subset(affective, state == 0 | status %in% c(2,3)))
xd <- phreg(Surv(prevy, stopy, status == 2) ~ strata(bip) + cluster(id),
            data = subset(affective, state == 0 | status %in% c(2,3)))

out <- recurrentMarginal(xr, xd)
pout <- data.frame(time = out$cumhaz[,1], 
                   mu = out$cumhaz[,2],
                   bip = as.factor(out$strata))

NAa_fit <- survfit(Surv(prevy, stopy, status == 1) ~ strata(bip),
                   data = subset(affective, state == 0 | status %in% c(2,3)),
                   id = id, ctype = 1, timefix = FALSE)
  
KM_fit <- survfit(Surv(prevy, stopy, status == 2) ~ strata(bip),
                  data = subset(affective, state == 0 | status %in% c(2,3)),
                  id = id, timefix = FALSE)
  
# Adjust hat(mu)
lS0 <- dplyr::lag(KM_fit$surv[1:(KM_fit$strata[1])], default = 1)
dA0 <- diff(NAa_fit$cumhaz[1:NAa_fit$strata[1]])
mu_adj0 <- cumsum(lS0 * c(0, dA0))

lS1 <- dplyr::lag(KM_fit$surv[(KM_fit$strata[1]+1):(KM_fit$strata[1] + KM_fit$strata[2])], default = 1)
dA1 <- diff(NAa_fit$cumhaz[(KM_fit$strata[1]+1):(KM_fit$strata[1] + KM_fit$strata[2])])
mu_adj1 <- cumsum(lS1 * c(0, dA1))

plotdata2 <- data.frame(time = KM_fit$time, 
                       mu = c(mu_adj0, mu_adj1), 
                       bip = c(rep("No", length(mu_adj0)), 
                               rep("Yes", length(mu_adj1))))

fig4.19 <- ggplot(aes(x = time, y = mu, linetype = bip), data = plotdata2) + 
  geom_step(linewidth = 1) + 
  xlab("Time since first admission (years)") + 
  ylab("Expected number of episodes") + 
  scale_linetype_manual("Bipolar", values = c("dashed", "solid"),
                        labels=c("Unipolar","Bipolar") ) + 
  scale_x_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     limits = c(0, 30), breaks = seq(0, 30, by = 5)) + 
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     limits = c(0, 10), breaks = seq(0, 10, by = 2)) +
  theme_general + 
  theme(legend.box = "vertical",
        legend.key.size = unit(1.5, 'cm'))
fig4.19

Code show/hide
/* Using "fine-gray model" in PHREG gives an alternative solution to 
  the estimator for CMF using the Breslow type estimator for 
  the baseline mean function (see p. 199 in book). The estimator is not
    exactly the same as Cook-Lawless because of a different procedures 
    for ties of terminating events and censorings. If no ties 
    (or no censorings) it equals Cook & Lawless */

proc phreg data=angstprev;
    where state=0 or status=2 or status=3;
    model stop*status(3)=/entry=prev eventcode=1;
    strata bip;
    baseline out=mcfdata1 cif=naa1;
run;
data mcfdata1;
    set mcfdata1;
    cmf=-log(1-naa1);
    years=stop/12;
run;
proc gplot data=mcfdata1;
    plot cmf*years=bip/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 30 by 5 minor=none label=('Years');
    axis2 order=0 to 12 by 2 minor=none label=(a=90 'Expected number of episodes');
    symbol1  v=none i=stepjl c=red;
    symbol2  v=none i=stepjl c=blue;
run;
quit;


/*** Calc Cook & Lawless or (Ghosh & Lin (GL)) estimator for CMF 'by hand' ***/
/* First create KM data for death */
proc phreg data=angstprev noprint;
    where state=0 or status=2 or status=3;
  model stop*status(1 3)= / entry=prev; /* status=2=death */
  strata bip;
  baseline out=kmdata survival=km / method=pl ;
run;
/* Second create NAa data */
proc phreg data=angstprev noprint;
    where state=0 or status=2 or status=3;
  model stop*status(2 3)= / entry=prev;/* status=1=event */
  strata bip;
  baseline out=nadata cumhaz=na;
run;
/* Use NA data to calculate dA(u), i.e., increments in NAa */
data na;
  set nadata;
  dAu=na-lag(na);
  if stop=0 then dAu=0;
  keep bip stop dAu na;
run;
/* merge NAa and KM data */
data merged;
  merge na kmdata;
  by bip stop;
run;
/* multiply S(u-) and dA(u) */
data fill;
   set merged;
   retain _km;
   if not missing(km) then _km=km;
   else km=_km;
   /* S(u-) */
   S_uminus=lag(km);
   if stop=0 then S_uminus=1;

   if dAu=. then dAu=0;
   GLfactor=S_uminus*dAu;
   keep bip stop na dAu S_uminus GLfactor;
run;
data GLdata;
  set fill;
  by bip;
  if first.bip then GL=0;
  else GL+GLfactor;
run;
proc sgplot data=GLdata;
  step x=stop y=GL / group=bip;
  step x=stop y=na / group=bip;
run;

Table 4.9

Code show/hide
# Number of recurrences and death
with(subset(affective, episode < 5), table(episode, status))
       status
episode   0   1   2   3
      1 116  99  16   4
      2  91  82  12   5
      3  74  62  17   3
      4  56  47  10   5
Code show/hide
apply(with(subset(affective, episode < 5), table(episode, status)), 2, cumsum)
       status
episode   0   1  2  3
      1 116  99 16  4
      2 207 181 28  9
      3 281 243 45 12
      4 337 290 55 17
Code show/hide
data angstwlw; 
    set affective;
    if episode<5 and (state=0 or status=2 or status=3);
run;
proc sort data=angstwlw; 
    by id; 
run;
proc freq data=angstwlw;
    tables episode*status; 
run;

Table 4.10

Code show/hide
# We're cheating here: Make WLW data ready using SAS data - see SAS code!
affectivewlw <- read.csv("data/affectivewlw.csv")
affectivewlw <- affectivewlw %>% mutate(bip1 = bip * (stratum == 1), 
                                        bip2 = bip * (stratum == 2), 
                                        bip3 = bip * (stratum == 3), 
                                        bip4 = bip * (stratum == 4))

# Composite endpoint
fit1 <- coxph(Surv(time, dc %in% c(1, 2)) ~ bip1 + bip2 + bip3 + bip4 + cluster(id) + strata(stratum), 
              data = affectivewlw, 
              ties = "breslow")
summary(fit1)
Call:
coxph(formula = Surv(time, dc %in% c(1, 2)) ~ bip1 + bip2 + bip3 + 
    bip4 + strata(stratum), data = affectivewlw, ties = "breslow", 
    cluster = id)

  n= 476, number of events= 434 

         coef exp(coef) se(coef) robust se     z Pr(>|z|)  
bip1 0.379171  1.461073 0.244976  0.208531 1.818    0.069 .
bip2 0.290599  1.337228 0.249212  0.255307 1.138    0.255  
bip3 0.003218  1.003223 0.253766  0.245542 0.013    0.990  
bip4 0.107276  1.113241 0.255085  0.236751 0.453    0.650  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

     exp(coef) exp(-coef) lower .95 upper .95
bip1     1.461     0.6844    0.9709     2.199
bip2     1.337     0.7478    0.8108     2.206
bip3     1.003     0.9968    0.6200     1.623
bip4     1.113     0.8983    0.6999     1.771

Concordance= 0.51  (se = 0.017 )
Likelihood ratio test= 3.67  on 4 df,   p=0.5
Wald test            = 8.7  on 4 df,   p=0.07
Score (logrank) test = 3.97  on 4 df,   p=0.4,   Robust = 7.94  p=0.09

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).
Code show/hide
fit2 <- coxph(Surv(time, dc %in% c(1, 2)) ~ bip + cluster(id) + strata(stratum), 
              data = affectivewlw, 
              ties = "breslow")
summary(fit2)
Call:
coxph(formula = Surv(time, dc %in% c(1, 2)) ~ bip + strata(stratum), 
    data = affectivewlw, ties = "breslow", cluster = id)

  n= 476, number of events= 434 

      coef exp(coef) se(coef) robust se     z Pr(>|z|)
bip 0.1927    1.2125   0.1254    0.2037 0.946    0.344

    exp(coef) exp(-coef) lower .95 upper .95
bip     1.212     0.8248    0.8133     1.808

Concordance= 0.51  (se = 0.017 )
Likelihood ratio test= 2.27  on 1 df,   p=0.1
Wald test            = 0.89  on 1 df,   p=0.3
Score (logrank) test = 2.37  on 1 df,   p=0.1,   Robust = 0.95  p=0.3

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).
Code show/hide
# Cause-specific hazard of recurrence
fit3 <- coxph(Surv(time, dc %in% c(1)) ~ bip1 + bip2 + bip3 + bip4 + cluster(id) + strata(stratum), 
              data = affectivewlw, 
              ties = "breslow")
summary(fit3)
Call:
coxph(formula = Surv(time, dc %in% c(1)) ~ bip1 + bip2 + bip3 + 
    bip4 + strata(stratum), data = affectivewlw, ties = "breslow", 
    cluster = id)

  n= 476, number of events= 290 

       coef exp(coef) se(coef) robust se     z Pr(>|z|)   
bip1 0.4951    1.6406   0.2485    0.2017 2.454  0.01412 * 
bip2 0.6395    1.8956   0.2593    0.2420 2.642  0.00823 **
bip3 0.5342    1.7060   0.2853    0.2694 1.983  0.04741 * 
bip4 0.8793    2.4093   0.3085    0.2832 3.106  0.00190 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

     exp(coef) exp(-coef) lower .95 upper .95
bip1     1.641     0.6095     1.105     2.436
bip2     1.896     0.5275     1.180     3.046
bip3     1.706     0.5862     1.006     2.893
bip4     2.409     0.4151     1.383     4.197

Concordance= 0.543  (se = 0.021 )
Likelihood ratio test= 19.5  on 4 df,   p=6e-04
Wald test            = 16.37  on 4 df,   p=0.003
Score (logrank) test = 22.58  on 4 df,   p=2e-04,   Robust = 12.85  p=0.01

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).
Code show/hide
fit4 <- coxph(Surv(time, dc %in% c(1)) ~ bip + cluster(id) + strata(stratum), 
              data = affectivewlw, 
              ties = "breslow")
summary(fit4)
Call:
coxph(formula = Surv(time, dc %in% c(1)) ~ bip + strata(stratum), 
    data = affectivewlw, ties = "breslow", cluster = id)

  n= 476, number of events= 290 

      coef exp(coef) se(coef) robust se     z Pr(>|z|)   
bip 0.6150    1.8496   0.1359    0.2106 2.921  0.00349 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

    exp(coef) exp(-coef) lower .95 upper .95
bip      1.85     0.5407     1.224     2.794

Concordance= 0.543  (se = 0.021 )
Likelihood ratio test= 18.46  on 1 df,   p=2e-05
Wald test            = 8.53  on 1 df,   p=0.003
Score (logrank) test = 21.13  on 1 df,   p=4e-06,   Robust = 7.89  p=0.005

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).
Code show/hide
data angstwlw; 
    set affective;
    where episode<5 and (state=0 or status=2 or status=3);
run;
proc sort data=angstwlw; 
    by id; 
run;
data angstwlw4; 
    set angstwlw;
    by id;
    time=stop; dc=status; stratum=episode;
    output; 
    /* if last episode is not #4 then later episodes are either
         censored (1 or 3) or the 'end in death' (2) */
    if last.id then do;
        if episode=3 then do;
            time=stop;
        if status=1 or status=3 then dc=0; 
            if status=2 then dc=2;
            stratum=4;
            output;
    end;
        if episode=2 then do;
            time=stop; 
            if status=1 or status=3 then dc=0; 
            if status=2  then dc=2;
            stratum=3;
            output; 
            time=stop;
            if status=1 or status=3 then dc=0; 
            if status=2 then dc=2;
            stratum=4;
            output; 
        end;
        if episode=1 then do; 
            time=stop;
            if status=1 or status=3 then dc=0; 
            if status=2 then dc=2;
            stratum=2;
            output; 
            time=stop; if status=1 or status=3 then dc=0; 
            if status=2  then dc=2;
            stratum=3;
            output; 
            time=stop;
            if status=1 or status=3 then dc=0; 
            if status=2 then dc=2;
            stratum=4;
            output; 
        end;
    end;
run;
/* to use in R */
proc export data=angstwlw4
    outfile="data/affectivewlw.csv"
    dbms=csv replace;
run;
data angstwlw4; set angstwlw4;
    bip1=bip*(stratum=1); bip2=bip*(stratum=2);
    bip3=bip*(stratum=3); bip4=bip*(stratum=4);
run;

/* composite end point */ 
proc phreg data=angstwlw4 covs(aggregate);
    model time*dc(0 3)=bip1 bip2 bip3 bip4;
    strata stratum;
    id id;
    bip: test bip1=bip2=bip3=bip4;
run;
/* Joint model */
proc phreg data=angstwlw4 covs(aggregate);
    model time*dc(0 3)=bip;
    strata stratum;
    id id;
run;

/* Cause-spec. hazards for 1.,2.,3.,4. event */
proc phreg data=angstwlw4 covs(aggregate);
    model time*dc(0 2 3)=bip1 bip2 bip3 bip4;
    strata stratum;
    id id;
    bip: test bip1=bip2=bip3=bip4;
run;
/* Joint model */
proc phreg data=angstwlw4 covs(aggregate);
    model time*dc(0 2 3)=bip;
    strata stratum;
    id id;
run;

Figure 4.23

Code show/hide
theme_general <- theme_bw() +
  theme(legend.position = "bottom", 
        text = element_text(size = 20), 
        axis.text.x = element_text(size = 20), 
        axis.text.y = element_text(size = 20)) 
# Last observations
cens <- affective %>% group_by(id) %>% slice(c(n()))
# Censoring dist, KM
censdist <- survfit(Surv(stop, status == 3) ~ 1, 
                    data = cens)
# Make data ready for plotting
pdata <- data.frame(time = censdist$time,
                    surv = censdist$surv)
fig4.23 <- ggplot(aes(x = time / 12, y = surv), data = pdata) +
  geom_step(linewidth = 1) +
  xlab("Time since first admission (years)") +
  ylab('Probability of no censoring') +
  scale_x_continuous(expand = expansion(mult = c(0.005, 0.05)),
                     limits = c(0, 30),
                     breaks = seq(0, 30, 5)) +
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.005)),
                     limits = c(0, 1.0),
                     breaks = seq(0, 1.0, 0.1)) +
  theme_general
fig4.23

Code show/hide
data cens; 
    set affective;
    by id;
    if last.id;
run;
proc phreg data=cens atrisk noprint;
    model stop*status(2)=;
    baseline out=angstcens survival=kmc / method=pl;
run;
data angstcens; 
    set angstcens; 
    years=stop/12; 
run;
proc gplot data=angstcens;
    plot kmc*years/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 30 by 5 minor=none label=('Years');
    axis2 order=0 to 1 by 0.1 minor=none
    label=(a=90 'Probability of no censoring');
    symbol1  v=none i=stepjl c=black;
run;
quit;

In-text, p. 155: Cox for censoring

Code show/hide
coxph(Surv(stop,status==3)~bip,data=cens)
Call:
coxph(formula = Surv(stop, status == 3) ~ bip, data = cens)

       coef exp(coef) se(coef)      z     p
bip -0.4844    0.6161   0.3738 -1.296 0.195

Likelihood ratio test=1.8  on 1 df, p=0.18
n= 119, number of events= 41 
Code show/hide
coxph(Surv(stop,status==3)~factor(year),data=cens)
Call:
coxph(formula = Surv(stop, status == 3) ~ factor(year), data = cens)

                     coef  exp(coef)   se(coef)      z        p
factor(year)59 -9.0565234  0.0001166  1.5191362 -5.962 2.50e-09
factor(year)60 -7.3882425  0.0006185  1.4565408 -5.072 3.93e-07
factor(year)61 -5.5586608  0.0038539  1.3476207 -4.125 3.71e-05
factor(year)62 -3.1137847  0.0444325  1.1253686 -2.767  0.00566

Likelihood ratio test=69.26  on 4 df, p=3.248e-14
n= 119, number of events= 41 
Code show/hide
proc phreg data=cens;
    model stop*status(2)=bip/rl;
run;
proc phreg data=cens;
  class year;
    model stop*status(2)=year/rl;
run;

LEADER cardiovascular trial in type 2 diabetes

Assume that the LEADER data set is loaded in data set leader_mi.

Figure 4.20

Code show/hide
library(survival)
library(ggplot2)
theme_general <- theme_bw() +
  theme(
    legend.position = "bottom",
    text = element_text(size = 20),
    axis.text.x = element_text(size = 20),
    axis.text.y = element_text(size = 20)
  )

ghosh_lin_nonpar_mcf <- function(endpointdat) {
  # Fit NAa
  NAa_fit <- survfit(
    Surv(start, stop, status == 1) ~ treat,
    data = endpointdat,
    id = id,
    ctype = 1
  )
  
  # Fit KM
  KM_fit <- survfit(
    Surv(start, stop, status == 2) ~ treat,
    data = endpointdat,
    id = id,
    ctype = 1
  )
  
  # Adjust hat(mu)
  mu_adj <-
    c(cumsum(KM_fit$surv[1:KM_fit$strata[[1]]] * c(0, diff(NAa_fit$cumhaz[1:NAa_fit$strata[[1]]]))),
      cumsum(KM_fit$surv[(KM_fit$strata[[1]] + 1):(KM_fit$strata[[1]] + KM_fit$strata[[2]])] *
               c(0, diff(NAa_fit$cumhaz[(NAa_fit$strata[[1]] + 1):(NAa_fit$strata[[1]] + NAa_fit$strata[[2]])]))))
  
  dat_adj <- data.frame(
    mu = mu_adj,
    time = NAa_fit$time,
    treat = c(
      rep("Liraglutide", NAa_fit$strata[[1]]),
      rep("Placebo", NAa_fit$strata[[2]])
    ),
    type = rep(
      "Mortality treated as a competing risk (CL)",
      length(NAa_fit$time)
    )
  )
  
  dat_unadj <- data.frame(
    mu = NAa_fit$cumhaz,
    time = NAa_fit$time,
    treat = c(
      rep("Liraglutide", NAa_fit$strata[[1]]),
      rep("Placebo", NAa_fit$strata[[2]])
    ),
    type = rep("Mortality treated as censoring (NA)", length(NAa_fit$time))
  )
  
  dat_adj$both <- with(dat_adj, paste(type, treat, sep = ", "))
  dat_unadj$both <- with(dat_unadj, paste(type, treat, sep = ", "))
  
  pdat <- rbind(dat_adj, dat_unadj)
  
    ggplot(aes(x = time  * 1 / (365.25 / 12), y = mu), data = pdat) +
    geom_step(aes(linetype = both), linewidth = 1.05) +
    xlab("Time since randomization (months)") +
    ylab("Expected number of events per subject") +
    scale_color_discrete("Treatment") +
    scale_linetype_manual("", values = c("dotdash", "dotted", "solid", "dashed")) +
    theme_general +
    theme(
      legend.position = "bottom",
      legend.margin = margin(t = -25),
      legend.direction = "vertical",
      legend.box = "horizontal",
      text = element_text(size = 20),
      legend.text = element_text(size = 18),
      legend.key.width = unit(3, "cm")
    ) +
    scale_x_continuous(
      expand = expansion(mult = c(0.005, 0.05)),
      limits = c(0, 65),
      breaks = seq(0, 65, by = 12)
    ) +
    scale_y_continuous(
      expand = expansion(mult = c(0.005, 0.05)),
      limits = c(0, 0.13),
      breaks = seq(0, 0.13, by = 0.02)
    )
  
}

fig4.20 <- ghosh_lin_nonpar_mcf(endpointdat = leader_mi)
fig4.20

Code show/hide
/* Using "fine-gray model" in PHREG gives an alternative solution to 
  the estimator for CMF using the Breslow type estimator for 
  the baseline mean function (see p. 199 in book). The estimator is not
    exactly the same as Cook-Lawless because of a different procedures 
    for ties of terminating events and censorings. If no ties 
    (or no censorings) it equals Cook & Lawless */

* NELSON-AALEN; 
proc phreg data=leader_mi noprint;
    model stop*status(0 2)=/entry=start;
    id id;
    strata treat;
  baseline out=na_data cumhaz=naa;
run;
data na_est;
    set na_data; 
    type = "Nelson-Aalen";
    cumevent = naa; 
    treat_type = trim(treat) || ", "  || type; 
run; 

* COOK & LAWLESS (GHOSH & LIN);
proc phreg data=leader_mi noprint;
  model (start, stop)*status(0)=/eventcode=1; 
  strata treat;
  baseline out=gl_data cif=cuminc;
run;
data gl_est;
    set gl_data; 
    type = "Ghosh & Lin";
    cumevent = -log(1-cuminc); 
    treat_type = trim(treat) || ", " || type; 
run; 
data comb; 
    set na_est gl_est; 
    time = stop/(365.25/12);
    drop naa cuminc;
run;
proc sgplot data=comb;
    step x=time y=cumevent/group=treat_type justify=left;
    xaxis grid values=(0 to 60 by 12);
    yaxis grid values=(0 to 0.12 by 0.02);
    label time="Time since randomisation (months)";
    label cumevent="Expected number events per subject"; 
run; 


/*** Calc Cook & Lawless or (Ghosh & Lin (GL)) estimator for CMF by hand ***/
/* First create KM data for death */
proc phreg data=leader_mi noprint;
  model stop*status(0 1)= / entry=start; /* status=2=death */
  strata treat;
  baseline out=kmdata survival=km / method=pl ;
run;
/* Second create NAa data */
proc phreg data=leader_mi noprint;
  model stop*status(0 2)= / entry=start; /* status=1=event */
  strata treat;
  baseline out=nadata cumhaz=na;
run;
/* Use NA data to calculate dA(u), i.e., increments in NAa */
data na;
  set nadata;
  dAu=na-lag(na);
  if stop=0 then dAu=0;
  keep treat stop dAu na;
run;
/* merge NAa and KM data */
data merged;
  merge na kmdata;
  by treat stop;
run;
/* multiply S(u-) and dA(u) */
data fill;
   set merged;
   retain _km;
   if not missing(km) then _km=km;
   else km=_km;
   /* S(u-) */
   S_uminus=lag(km);
   if stop=0 then S_uminus=1;

   if dAu=. then dAu=0;
   GLfactor=S_uminus*dAu;
   keep treat stop na dAu S_uminus GLfactor;
run;
data GLdata;
  set fill;
  by treat;
  if first.treat then GL=0;
  else GL+GLfactor;
    time = stop/(365.25/12);
run;
proc sgplot data=GLdata;
  step x=time y=na / group=treat;
  step x=time y=GL / group=treat;
    xaxis grid values=(0 to 60 by 12);
    yaxis grid values=(0 to 0.12 by 0.02);
    label time="Time since randomisation (months)";
    label na="Expected number events per subject"; 
run;

In-text, p. 146: LWYY and Ghosh-Lin models

Code show/hide
library(survival)
fit1 <- coxph(Surv(start, stop, status == 1) ~ treat + cluster(id), 
              data = leader_mi, ties = "breslow")
summary(fit1)
Call:
coxph(formula = Surv(start, stop, status == 1) ~ treat, data = leader_mi, 
    ties = "breslow", cluster = id)

  n= 10120, number of events= 780 

          coef exp(coef) se(coef) robust se      z Pr(>|z|)  
treat -0.16418   0.84859  0.07184   0.08810 -1.864   0.0624 .
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

      exp(coef) exp(-coef) lower .95 upper .95
treat    0.8486      1.178     0.714     1.009

Concordance= 0.524  (se = 0.011 )
Likelihood ratio test= 5.24  on 1 df,   p=0.02
Wald test            = 3.47  on 1 df,   p=0.06
Score (logrank) test = 5.23  on 1 df,   p=0.02,   Robust = 3.47  p=0.06

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).
Code show/hide
# Ghosh-Lin model - Mortality treated as competing risk
library(mets)
fit2 <- recreg(Event(start, stop, status) ~ treat + cluster(id),
               data = leader_mi, cause = 1, cens.code = 0, death.code = 2)
summary(fit2)

     n events
 10120    780

 9340 clusters
coeffients:
       Estimate      S.E.   dU^-1/2 P-value
treat -0.158754  0.087861  0.071839  0.0708

exp(coeffients):
      Estimate    2.5%  97.5%
treat  0.85321 0.71823 1.0135
Code show/hide
title 'LWYY model';
proc phreg data=leader_mi covs(aggregate);
  class treat(ref="0");
  model (start, stop)*status(0 2) = treat / rl; 
  id id;
run;

                              Analysis of Maximum Likelihood Estimates

                 Parameter   Standard StdErr                         Hazard  95% Hazard Ratio
 Parameter   DF   Estimate      Error  Ratio Chi-Square Pr > ChiSq    Ratio Confidence Limits Label

 treat     1  1   -0.16384    0.08810  1.226     3.4587     0.0629    0.849    0.714    1.009 treat 1


title 'Ghosh-Lin model';
proc phreg data=leader_mi covs(aggregate);
  class treat(ref="0");
  model (start, stop)*status(0) = treat / rl
        eventcode=1 convergelike=1E-9; 
  id id;
run;
        
                               Analysis of Maximum Likelihood Estimates

                  Parameter    Standard                            Hazard   95% Hazard Ratio
Parameter    DF    Estimate       Error  Chi-Square  Pr > ChiSq     Ratio   Confidence Limits  Label

treat     1   1    -0.15877     0.08786      3.2658      0.0707     0.853     0.718     1.014  treat 1

In-text, p. 146: Cox for mortality

Code show/hide
coxph(Surv(start,stop,status==2)~factor(treat),data=leader_mi,ties="breslow")
Call:
coxph(formula = Surv(start, stop, status == 2) ~ factor(treat), 
    data = leader_mi, ties = "breslow")

                  coef exp(coef) se(coef)     z      p
factor(treat)0 0.16627   1.18090  0.06973 2.385 0.0171

Likelihood ratio test=5.71  on 1 df, p=0.01692
n= 10120, number of events= 828 
Code show/hide
proc phreg data=leader_mi;
  class treat(ref="1");
  model (start,stop)*status(0,1) = treat / rl;
run;

Bone marrow transplantation in acute leukemia

Read data

Code show/hide
bmt <- read.csv("data/bmt.csv")
#bmt$timedeath <- ifelse(bmt$rel==1 & bmt$timedeath==bmt$timerel,bmt$timedeath+0.01,bmt$timedeath)
bmt$intxsurv<- bmt$timedeath
bmt$dead <- bmt$death
bmt$intxrel <- ifelse(bmt$rel == 1, bmt$timerel, bmt$timedeath)
bmt$trm     <- ifelse(bmt$rel == 0 & bmt$death == 1, 1, 0)
bmt$tgvhd   <- ifelse(bmt$gvhd == 1, bmt$timegvhd, bmt$intxrel)
bmt$tanc500 <- ifelse(bmt$anc500 == 1, bmt$timeanc500, bmt$intxrel)
bmt$state0  <- bmt$rel + 2*bmt$trm
Code show/hide
proc import out=bmt
    datafile="data/bmt.csv" 
    dbms=csv replace;
run;
data bmt; 
  set bmt;
    intxsurv=timedeath;
    dead=death;
    if rel=1 then intxrel=timerel;
    if rel=0 then intxrel=timedeath;
    if gvhd=1 then tgvhd=timegvhd;
    if gvhd=0 then tgvhd=intxrel;
    trm=0;
    if rel=0 and death=1 then trm=1;
    state0=rel+2*trm;
run;

Figure 4.15

Code show/hide
library(ggplot2)
# General theme
theme_general <- theme_bw() +
  theme(legend.position = "bottom", 
        text = element_text(size = 20), 
        axis.text.x = element_text(size = 20), 
        axis.text.y = element_text(size = 20)) 

library(survival)
# Relapse-free survival 
fit1 <- survfit(Surv(intxrel, state0 != 0) ~ 1, data = bmt)

# relapse
require(mets)
fit2 <- cif(Event(intxrel, state0) ~ 1, data = bmt, cause = 1)

# death in remission
fit3 <- cif(Event(intxrel, state0) ~ 1, data = bmt, cause = 2)

# overall survival
fit4 <- survfit(Surv(intxsurv, dead == 1) ~ 1, data = bmt)

# We need the same time for all probabilities
require(dplyr)
require(tidyr)
m1 <- stepfun(x = fit1$time, y = c(1, fit1$surv)) 
m2 <- stepfun(x = fit2$times, y = c(0, fit2$mu))
m3 <- stepfun(x = fit3$times, y = c(0, fit3$mu))
m4 <- stepfun(x = fit4$time, y = c(0, 1-fit4$surv))

unitimes <- sort(unique(c(fit1$time, fit2$times, fit3$times, fit4$time)))
m <- data.frame(time = unitimes, 
                q0 = m1(unitimes),
                c1 = m2(unitimes), 
                c2 = m3(unitimes), 
                c23 = m4(unitimes))

m$q2 <-m$c2
m$q3 <- m$c23 - m$c2
m$q1 <- m$c1 - m$q3
m$sum <- with(m, q0+q1+q2+q3)
m$prev <- with(m, q1 / (q0 + q1))

# Prepare data for plotting
plotdata <- with(m, 
                 data.frame(time = c(time, time), 
                            prob = c(prev, q1),
                            type = c(rep("Prevalence of relapse", length(time)), 
                                     rep("Probability of being alive with relapse",
                                         length(time)))))

# Create Figure
fig4.15 <- ggplot(aes(x = time, y = prob, linetype = type), data = plotdata) + 
  geom_step(linewidth = 1) + 
  scale_linetype_discrete("Type") + 
  xlab("Time since bone marrow transplantation (months)") + 
  ylab("Probability") + 
  scale_x_continuous(expand = expansion(mult = c(0.001, 0.05)), 
                     limits = c(0, 156), breaks = seq(0, 156, by = 12)) + 
  scale_y_continuous(expand = expansion(mult = c(0.001, 0.05)), 
                     limits = c(0, 0.05), 
                     breaks = seq(0, 0.05, 0.01)) +
  theme_general + 
  theme(legend.box = "vertical",
        text = element_text(size=21), 
        legend.key.size = unit(1, 'cm'))

fig4.15

Code show/hide
proc phreg data=bmt noprint; /* Relapse-free surv */
    model intxrel*state0(0)=;
    baseline out=surv survival=km;
run;

proc phreg data=bmt noprint; /* Relapse */
    model intxrel*state0(0)=/eventcode=1;
    baseline out=cif1 cif=cif1;
run;

proc phreg data=bmt noprint; /* Death in remission */
    model intxrel*state0(0)=/eventcode=2;
    baseline out=cif2 cif=cif2;
run;

proc phreg data=bmt noprint; /* Overall surv. */
    model intxsurv*dead(0)=/eventcode=1;
    baseline out=dead cif=cif23;
run;

/* We need the same time variable for all probabilities */
data dead; set dead; time=intxsurv; run;
data surv; set surv; time=intxrel; run;
data cif1; set cif1; time=intxrel; run;
data cif2; set cif2; time=intxrel; run;
data all; merge surv cif1 cif2 dead; by time; run;

data allrev; 
    set all;
    by time;
    retain last1 last2 last3 last4;
    if km=. then rfs=last1; if km ne . then rfs=km; 
    if cif1=. then c1=last2; if cif1 ne . then c1=cif1;
    if cif2=. then c2=last3; if cif2 ne . then c2=cif2;
    if cif23=. then c23=last4; if cif23 ne . then c23=cif23;
    output;
    last1=rfs; last2=c1; last3=c2; last4=c23;
run;
data allrev; 
set allrev;
    q0=rfs; q2=c2; q3=c23-c2; q1=c1-q3; sum=q0+q1+q2+q3; prev=q1/(q0+q1); tment=0;
run;
proc gplot data=allrev;
    plot prev*time q1*time/overlay haxis=axis1 vaxis=axis2;
    axis1 order=0 to 150 by 10 minor=none label=('Months');
    axis2 order=0 to 0.05 by 0.01 minor=none label=(a=90 'Relapse prev. and prob.');
    symbol1  v=none i=stepjl c=blue;
    symbol2  v=none i=stepjl c=red;
run;
quit;

In-text p. 133: Expected time lost

Code show/hide
# Transition matrix
library(mstate)
tmat <- trans.illdeath(names = c("BMT", "Relapse", "Dead"))
# Creating data in long format, i.e. row for every transition

bmtms <- msprep(time = c(NA, "intxrel", "intxsurv"),
                status = c(NA, "rel", "dead"),
                data = bmt,
                trans = tmat)

# function to collect elos to be bootstrapped
epsi <- function(data, tmat, tau) {
  np <- coxph(Surv(Tstart,Tstop,status) ~ strata(trans),data=data)
  msfitnp<-msfit(np,trans=tmat)
  pt<-probtrans(msfitnp,predt=0,variance = F)
  mat<-ELOS(pt,tau=120)
  colnames(mat)<-c("eps0","eps1","timelost")
  return(mat[1,])
}
est<-epsi(bmtms,tmat,120)

suppressWarnings(res <- msboot(theta=epsi, data=bmtms, B=1000, id="id", tmat=tmat, tau=120))

boots<-matrix(c(
        mean(res[1,]),sqrt(var(res[1,])),
        mean(res[2,]),sqrt(var(res[2,])),
        mean(res[3,]),sqrt(var(res[3,]))),
        nrow = 3, byrow=T, 
        dimnames = list(c("eps0","eps1","timelost"), c("Years","SD")))
list("estimate"=est,"bootstrap"=boots)
$estimate
     eps0      eps1  timelost 
75.747056  1.477754 42.775190 

$bootstrap
             Years        SD
eps0     75.666653 1.2132936
eps1      1.441664 0.2568131
timelost 42.891682 1.1978824
Code show/hide
/* Bootstrap */
data bootbmt;
    do sampnum = 1 to 1000; /* nboot=1000*/
    do i = 1 to 2009; /*nobs=2009*/
    x=round(ranuni(0)*2009); /*nobs=2009*/
    set bmt
    point=x;
    output;
    end;
    end;
    stop;
run;

%macro areastepby(data,byvar,beh,grp,tid,y,tau);
    data select;
        set &data;
        where &beh=&grp;
    run;
    data select;
        set select;
        by &byvar;
        retain mu oldt oldy;
        if first.&byvar then do oldt=0; oldy=1; mu=0;  end;
        if &tid>&tau then do;
        &tid=&tau; &y=oldy; end;
        if not first.&byvar then mu+oldy*(&tid-oldt);
        if last.&byvar then do;
        if &tid<&tau then mu+(&tau-&tid)*&y; end;
        oldy=&y; oldt=&tid;
    run;
    data last;
        set select;
        by  &byvar;
        if last.&byvar;
    run;
%mend areastepby;

proc phreg data=bootbmt noprint; /* Relapse-free surv */
by sampnum;
model intxrel*state0(0)=;
baseline out=surv survival=km;
run;

proc phreg data=bootbmt noprint; /* Relapse */
by sampnum;
model intxrel*state0(0)=/eventcode=1;
baseline out=cif1 cif=cif1;
run;

proc phreg data=bootbmt noprint; /* Death in remission */
by sampnum;
model intxrel*state0(0)=/eventcode=2;
baseline out=cif2 cif=cif2;
run;

proc phreg data=bootbmt noprint; /* Overall surv. */
by sampnum;
model intxsurv*dead(0)=/eventcode=1;
baseline out=dead cif=cif23;
run;

data dead; set dead; time=intxsurv; drop intxsurv;  run;  
data surv; set surv; time=intxrel; drop intxrel; run;
data cif1; set cif1; time=intxrel; drop intxrel; run;
data cif2; set cif2; time=intxrel; drop intxrel; run;
data all; merge surv cif1 cif2 dead ; by sampnum time; run;

data allrev; set all;
by sampnum time; 
retain last1 last2 last3 last4;
if km=. then rfs=last1; if km ne . then rfs=km; 
if cif1=. then c1=last2; if cif1 ne . then c1=cif1;
if cif2=. then c2=last3; if cif2 ne . then c2=cif2;
if cif23=. then c23=last4; if cif23 ne . then c23=cif23;
output;
last1=rfs; last2=c1; last3=c2; last4=c23;
run;

data allrev; set allrev;
q0=rfs; q2=c2; q3=c23-c2; q1=c1-q3; sum=q0+q1+q2+q3; prev=q1/(q0+q1); tment=0;
run;

* Alive replase free;
%areastepby(allrev,sampnum,tment,0,time,q0,120);
proc means data=last n mean stddev;
var mu;
run;

            The MEANS Procedure

        Analysis Variable : mu

    N            Mean         Std Dev
 ------------------------------------
 1000      75.7672178       1.2434139
 ------------------------------------

    
/* macro need to be changed for cuminc (start in 0) */

%macro areastepby0(data,byvar,beh,grp,tid,y,tau);
    data select;
        set &data;
        where &beh=&grp;
    run;
    data select;
        set select;
        by &byvar;
        retain mu oldt oldy;
        if first.&byvar then do oldt=0; oldy=0; mu=0;  end;
        if &tid>&tau then do;
        &tid=&tau; &y=oldy; end;
        if not first.&byvar then mu+oldy*(&tid-oldt);
        if last.&byvar then do;
        if &tid<&tau then mu+(&tau-&tid)*&y; end;
        oldy=&y; oldt=&tid;
    run;
    data last;
        set select;
        by  &byvar;
        if last.&byvar;
    run;
%mend areastepby;

* Relapse;
%areastepby0(allrev,sampnum,tment,0,time,q1,120);
proc means data=last n mean stddev;
var mu;
run;
    
          The MEANS Procedure

         Analysis Variable : mu

     N            Mean         Std Dev
  ------------------------------------
  1000       1.6264372       0.2929657
  ------------------------------------

* Death;
%areastepby0(allrev,sampnum,tment,0,time,c23,120);
proc means data=last n mean stddev;
var mu;
run;
     
         The MEANS Procedure

        Analysis Variable : mu

    N            Mean         Std Dev
 ------------------------------------
 1000      42.6118743       1.2241774
 ------------------------------------


* Death without replase;
%areastepby0(allrev,sampnum,tment,0,time,q2,120);
proc means data=last n mean stddev;
var mu;
run;

         The MEANS Procedure

        Analysis Variable : mu

    N            Mean         Std Dev
 ------------------------------------
 1000      29.1476344       1.1006234
 ------------------------------------



* Death with replase;
%areastepby0(allrev,sampnum,tment,0,time,q3,120);
proc means data=last n mean stddev;
var mu;
run;
    
         The MEANS Procedure

        Analysis Variable : mu

    N            Mean         Std Dev
 ------------------------------------
 1000      13.4642399       0.8270791
 ------------------------------------

Table 4.8 and in-text Wald tests

Code show/hide
bmt$age10<-bmt$age/10
summary(coxph(Surv(intxrel, rel == 1) ~ bmonly + all + age, data = bmt, 
      ties = "breslow"))
Call:
coxph(formula = Surv(intxrel, rel == 1) ~ bmonly + all + age, 
    data = bmt, ties = "breslow")

  n= 2009, number of events= 259 

            coef exp(coef)  se(coef)      z Pr(>|z|)    
bmonly -0.107627  0.897962  0.134487 -0.800    0.424    
all     0.548871  1.731297  0.129307  4.245 2.19e-05 ***
age    -0.004487  0.995523  0.004440 -1.011    0.312    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

       exp(coef) exp(-coef) lower .95 upper .95
bmonly    0.8980     1.1136    0.6899     1.169
all       1.7313     0.5776    1.3437     2.231
age       0.9955     1.0045    0.9869     1.004

Concordance= 0.576  (se = 0.018 )
Likelihood ratio test= 20.61  on 3 df,   p=1e-04
Wald test            = 21.57  on 3 df,   p=8e-05
Score (logrank) test = 22.12  on 3 df,   p=6e-05
Code show/hide
summary(coxph(Surv(intxrel, rel == 1) ~ bmonly + all + age + cluster(team), 
      data = bmt, 
      ties = "breslow"))
Call:
coxph(formula = Surv(intxrel, rel == 1) ~ bmonly + all + age, 
    data = bmt, ties = "breslow", cluster = team)

  n= 2009, number of events= 259 

            coef exp(coef)  se(coef) robust se      z Pr(>|z|)   
bmonly -0.107627  0.897962  0.134487  0.137880 -0.781   0.4350   
all     0.548871  1.731297  0.129307  0.173916  3.156   0.0016 **
age    -0.004487  0.995523  0.004440  0.007486 -0.599   0.5489   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

       exp(coef) exp(-coef) lower .95 upper .95
bmonly    0.8980     1.1136    0.6853     1.177
all       1.7313     0.5776    1.2312     2.434
age       0.9955     1.0045    0.9810     1.010

Concordance= 0.576  (se = 0.022 )
Likelihood ratio test= 20.61  on 3 df,   p=1e-04
Wald test            = 16.34  on 3 df,   p=0.001
Score (logrank) test = 22.12  on 3 df,   p=6e-05,   Robust = 13.19  p=0.004

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).
Code show/hide
# Relapse-free survival
summary(coxph(Surv(intxrel, state0 != 0) ~ bmonly + all + age, data = bmt, 
      ties = "breslow"))
Call:
coxph(formula = Surv(intxrel, state0 != 0) ~ bmonly + all + age, 
    data = bmt, ties = "breslow")

  n= 2009, number of events= 764 

            coef exp(coef)  se(coef)      z Pr(>|z|)    
bmonly -0.161079  0.851225  0.077445 -2.080   0.0375 *  
all     0.454636  1.575599  0.077733  5.849 4.95e-09 ***
age     0.016917  1.017061  0.002588  6.537 6.28e-11 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

       exp(coef) exp(-coef) lower .95 upper .95
bmonly    0.8512     1.1748    0.7313    0.9908
all       1.5756     0.6347    1.3529    1.8349
age       1.0171     0.9832    1.0119    1.0222

Concordance= 0.588  (se = 0.011 )
Likelihood ratio test= 81.95  on 3 df,   p=<2e-16
Wald test            = 81.52  on 3 df,   p=<2e-16
Score (logrank) test = 82.07  on 3 df,   p=<2e-16
Code show/hide
summary(coxph(Surv(intxrel, state0 != 0) ~ bmonly + all + age + cluster(team), 
      data = bmt, 
      ties = "breslow"))
Call:
coxph(formula = Surv(intxrel, state0 != 0) ~ bmonly + all + age, 
    data = bmt, ties = "breslow", cluster = team)

  n= 2009, number of events= 764 

            coef exp(coef)  se(coef) robust se      z Pr(>|z|)    
bmonly -0.161079  0.851225  0.077445  0.077240 -2.085    0.037 *  
all     0.454636  1.575599  0.077733  0.078077  5.823 5.78e-09 ***
age     0.016917  1.017061  0.002588  0.003328  5.083 3.71e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

       exp(coef) exp(-coef) lower .95 upper .95
bmonly    0.8512     1.1748    0.7316    0.9904
all       1.5756     0.6347    1.3520    1.8361
age       1.0171     0.9832    1.0104    1.0237

Concordance= 0.588  (se = 0.013 )
Likelihood ratio test= 81.95  on 3 df,   p=<2e-16
Wald test            = 52.58  on 3 df,   p=2e-11
Score (logrank) test = 82.07  on 3 df,   p=<2e-16,   Robust = 47.68  p=2e-10

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).
Code show/hide
# overall survival
summary(coxph(Surv(intxsurv, dead != 0) ~ bmonly + all + age, data = bmt, 
      ties = "breslow"))
Call:
coxph(formula = Surv(intxsurv, dead != 0) ~ bmonly + all + age, 
    data = bmt, ties = "breslow")

  n= 2009, number of events= 737 

            coef exp(coef)  se(coef)      z Pr(>|z|)    
bmonly -0.160104  0.852055  0.079035 -2.026   0.0428 *  
all     0.405480  1.500022  0.079556  5.097 3.45e-07 ***
age     0.017286  1.017437  0.002636  6.558 5.45e-11 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

       exp(coef) exp(-coef) lower .95 upper .95
bmonly    0.8521     1.1736    0.7298    0.9948
all       1.5000     0.6667    1.2835    1.7531
age       1.0174     0.9829    1.0122    1.0227

Concordance= 0.59  (se = 0.011 )
Likelihood ratio test= 76.65  on 3 df,   p=<2e-16
Wald test            = 76.05  on 3 df,   p=<2e-16
Score (logrank) test = 76.57  on 3 df,   p=<2e-16
Code show/hide
summary(coxph(Surv(intxsurv, dead != 0) ~ bmonly + all + age + cluster(team), 
      data = bmt, 
      ties = "breslow"))
Call:
coxph(formula = Surv(intxsurv, dead != 0) ~ bmonly + all + age, 
    data = bmt, ties = "breslow", cluster = team)

  n= 2009, number of events= 737 

            coef exp(coef)  se(coef) robust se      z Pr(>|z|)    
bmonly -0.160104  0.852055  0.079035  0.080788 -1.982   0.0475 *  
all     0.405480  1.500022  0.079556  0.077594  5.226 1.74e-07 ***
age     0.017286  1.017437  0.002636  0.003339  5.177 2.26e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

       exp(coef) exp(-coef) lower .95 upper .95
bmonly    0.8521     1.1736    0.7273    0.9982
all       1.5000     0.6667    1.2884    1.7464
age       1.0174     0.9829    1.0108    1.0241

Concordance= 0.59  (se = 0.013 )
Likelihood ratio test= 76.65  on 3 df,   p=<2e-16
Wald test            = 48.78  on 3 df,   p=1e-10
Score (logrank) test = 76.57  on 3 df,   p=<2e-16,   Robust = 45.26  p=8e-10

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).
Code show/hide
/* Relapse, relapse-free and overall survival
   without and with adjustment for center */
proc phreg data=bmt;
    class bmonly(ref="0") all(ref="0");
    model intxrel*rel(0)=bmonly all age;
run;
proc phreg data=bmt covs(aggregate);
    class bmonly(ref="0") all(ref="0") team;
    model intxrel*rel(0)=bmonly all age;
    id team;
run;
proc phreg data=bmt;
    class bmonly(ref="0") all(ref="0");
    model intxrel*state0(0)=bmonly all age;
run;
proc phreg data=bmt covs(aggregate);
    class bmonly(ref="0") all(ref="0");
    model intxrel*state0(0)=bmonly all age;
    id team;
run;
proc phreg data=bmt;
    class bmonly(ref="0") all(ref="0");
    model intxsurv*dead(0)=bmonly all age;
run;
proc phreg data=bmt covs(aggregate);
    class bmonly(ref="0") all(ref="0");
    model intxsurv*dead(0)=bmonly all age;
    id team;
run;

In-text, p. 148: Oneway anova

Code show/hide
anova(lm(age~factor(team),data=bmt))
Analysis of Variance Table

Response: age
               Df Sum Sq Mean Sq F value    Pr(>F)    
factor(team)  254 215520  848.51  5.6389 < 2.2e-16 ***
Residuals    1754 263929  150.47                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code show/hide
proc glm data=bmt;
    class team;
    model age=team;
run;
quit;

Table 4.11 and in-text, p. 151

Code show/hide
# joint analysis of relapse-free and overall survival

bmt <- read.csv("data/bmt.csv")
bmt$intxsurv<- bmt$timedeath
bmt$dead <- bmt$death
bmt$intxrel <- ifelse(bmt$rel == 1, bmt$timerel, bmt$timedeath)
bmt$trm     <- ifelse(bmt$rel == 0 & bmt$death == 1, 1, 0)
bmt$tgvhd   <- ifelse(bmt$gvhd == 1, bmt$timegvhd, bmt$intxrel)
bmt$tanc500 <- ifelse(bmt$anc500 == 1, bmt$timeanc500, bmt$intxrel)
bmt$state0  <- bmt$rel + 2*bmt$trm

library(dplyr)
rfs<-mutate(bmt,
            version=1,
            dc=state0>0,
            time=intxrel,
            gsource0=bmonly,
            gsource2=0,
            disease0=all,
            disease2=0,
            age0=age/10,
            age2=0)

srv<-mutate(bmt,
            version=2,
            dc=dead,
            time=intxsurv,
            gsource0=0,
            gsource2=bmonly,
            disease0=0,
            disease2=all,
            age0=0,
            age2=age/10)
double02<-rbind(rfs,srv)

# joint analysis of relapse-free, GvHD-free survival and overall survival
(fitd<-coxph(Surv(time, dc) ~
        gsource0 + disease0 + age0+
        gsource2 + disease2 + age2+
        strata(version)+cluster(id), data=double02, ties="breslow"))
Call:
coxph(formula = Surv(time, dc) ~ gsource0 + disease0 + age0 + 
    gsource2 + disease2 + age2 + strata(version), data = double02, 
    ties = "breslow", cluster = id)

             coef exp(coef) se(coef) robust se      z        p
gsource0 -0.16108   0.85122  0.07745   0.07654 -2.105   0.0353
disease0  0.45464   1.57560  0.07773   0.07720  5.889 3.88e-09
age0      0.16917   1.18432  0.02588   0.02593  6.523 6.87e-11
gsource2 -0.16010   0.85206  0.07904   0.07871 -2.034   0.0419
disease2  0.40548   1.50002  0.07956   0.07915  5.123 3.01e-07
age2      0.17286   1.18871  0.02636   0.02646  6.534 6.42e-11

Likelihood ratio test=158.6  on 6 df, p=< 2.2e-16
n= 4018, number of events= 1501 
Code show/hide
# Correlation matrix
cov2cor(vcov(fitd))
            gsource0    disease0      age0    gsource2    disease2      age2
gsource0  1.00000000 -0.03174451 0.3066426  0.97967993 -0.02182335 0.3177509
disease0 -0.03174451  1.00000000 0.2134773 -0.02318512  0.96200549 0.2038750
age0      0.30664257  0.21347726 1.0000000  0.31716041  0.20061246 0.9694844
gsource2  0.97967993 -0.02318512 0.3171604  1.00000000 -0.02206643 0.3292655
disease2 -0.02182335  0.96200549 0.2006125 -0.02206643  1.00000000 0.2125261
age2      0.31775090  0.20387498 0.9694844  0.32926550  0.21252611 1.0000000
Code show/hide
# Wald tests
library(car) # Companion to Applied Regression package
linearHypothesis(fitd, "gsource0=gsource2")
Linear hypothesis test

Hypothesis:
gsource0 - gsource2 = 0

Model 1: restricted model
Model 2: Surv(time, dc) ~ gsource0 + disease0 + age0 + gsource2 + disease2 + 
    age2 + strata(version)

  Res.Df Df  Chisq Pr(>Chisq)
1   4013                     
2   4012  1 0.0038     0.9508
Code show/hide
linearHypothesis(fitd, "disease0=disease2")
Linear hypothesis test

Hypothesis:
disease0 - disease2 = 0

Model 1: restricted model
Model 2: Surv(time, dc) ~ gsource0 + disease0 + age0 + gsource2 + disease2 + 
    age2 + strata(version)

  Res.Df Df  Chisq Pr(>Chisq)  
1   4013                       
2   4012  1 5.1615    0.02309 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code show/hide
linearHypothesis(fitd, "age0=age2")
Linear hypothesis test

Hypothesis:
age0 - age2 = 0

Model 1: restricted model
Model 2: Surv(time, dc) ~ gsource0 + disease0 + age0 + gsource2 + disease2 + 
    age2 + strata(version)

  Res.Df Df  Chisq Pr(>Chisq)
1   4013                     
2   4012  1 0.3233     0.5696
Code show/hide
# Under hypothesis of equal coefficients for gsource and age
(fitdreduced<-coxph(Surv(time, dc) ~
        bmonly + disease0 + disease2 + I(age/10) +
        strata(version)+cluster(id), data=double02, ties="breslow"))
Call:
coxph(formula = Surv(time, dc) ~ bmonly + disease0 + disease2 + 
    I(age/10) + strata(version), data = double02, ties = "breslow", 
    cluster = id)

              coef exp(coef) se(coef) robust se      z        p
bmonly    -0.16062   0.85161  0.05532   0.07721 -2.080   0.0375
disease0   0.45582   1.57747  0.07687   0.07717  5.907 3.49e-09
disease2   0.40426   1.49820  0.07867   0.07903  5.116 3.13e-07
I(age/10)  0.17099   1.18648  0.01847   0.02599  6.579 4.73e-11

Likelihood ratio test=158.6  on 4 df, p=< 2.2e-16
n= 4018, number of events= 1501 
Code show/hide
# NB: both GvHD and death without GvHD count as event here
bmt$gvhdnew<-bmt$gvhd
bmt$newtgvhd <- ifelse(bmt$gvhdnew == 1, bmt$tgvhd, bmt$intxsurv)
bmt$gvhdnew  <- ifelse(bmt$gvhdnew == 0 & bmt$dead==1, 1, bmt$gvhdnew)

rfs<-mutate(bmt,
            version=1,
            dc=state0>0,
            time=intxrel,
            gsource0=bmonly,
            gsource2=0,
            gsourceG=0,
            disease0=all,
            disease2=0,
            diseaseG=0,
            age0=age/10,
            age2=0,
            ageG=0)

srv<-mutate(bmt,
            version=2,
            dc=dead,
            time=intxsurv,
            gsource0=0,
            gsource2=bmonly,
            gsourceG=0,
            disease0=0,
            disease2=all,
            diseaseG=0,
            age0=0,
            age2=age/10,
            ageG=0)

Gfs<-mutate(bmt,
            version=3,
            dc=gvhdnew,
            time=newtgvhd,
            gsource0=0,
            gsource2=0,
            gsourceG=bmonly,
            disease0=0,
            disease2=0,
            diseaseG=all,
            age0=0,
            age2=0,
            ageG=age/10)

trible02G<-rbind(rfs,srv,Gfs)

(fitt<-coxph(Surv(time, dc) ~
        gsource0 + disease0 + age0+
        gsource2 + disease2 + age2+
        gsourceG + diseaseG + ageG+
        strata(version)+cluster(id), data=trible02G, ties="breslow"))
Call:
coxph(formula = Surv(time, dc) ~ gsource0 + disease0 + age0 + 
    gsource2 + disease2 + age2 + gsourceG + diseaseG + ageG + 
    strata(version), data = trible02G, ties = "breslow", cluster = id)

             coef exp(coef) se(coef) robust se      z        p
gsource0 -0.16108   0.85122  0.07745   0.07654 -2.105   0.0353
disease0  0.45464   1.57560  0.07773   0.07720  5.889 3.88e-09
age0      0.16917   1.18432  0.02588   0.02593  6.523 6.87e-11
gsource2 -0.16010   0.85206  0.07904   0.07871 -2.034   0.0419
disease2  0.40548   1.50002  0.07956   0.07915  5.123 3.01e-07
age2      0.17286   1.18871  0.02636   0.02646  6.534 6.42e-11
gsourceG -0.26024   0.77086  0.05930   0.05907 -4.406 1.05e-05
diseaseG  0.29235   1.33958  0.06005   0.05986  4.884 1.04e-06
ageG      0.11699   1.12410  0.01936   0.01919  6.096 1.09e-09

Likelihood ratio test=256  on 9 df, p=< 2.2e-16
n= 6027, number of events= 2825 
Code show/hide
data double02; set bmt; 
    /* joint analysis of relapse-free and overall survival */
    version=1; dc=state0>0; time=intxrel; gsource0=bmonly; gsource2=0;
    disease0=all; disease2=0; age0=age; age2=0; output;
    version=2; dc=dead; time=intxsurv; gsource2=bmonly; gsource0=0;
    disease2=all; disease0=0; age2=age; age0=0; output;
run;
proc phreg data=double02 covs(aggregate);
    /* NB bmonly and all are now binary quantitative */
    model time*dc(0)=gsource0 gsource2 disease0 disease2 age0 age2 / corrb;
    strata version;
    id id;
    gs:  test gsource0=gsource2;
    dis: test disease0=disease2;
    a:   test age0=age2;
run;

                                     The PHREG Procedure

                          Analysis of Maximum Likelihood Estimates

                    Parameter      Standard    StdErr                                  Hazard
Parameter    DF      Estimate         Error     Ratio    Chi-Square    Pr > ChiSq       Ratio

gsource0      1      -0.16108       0.07654     0.988        4.4293        0.0353       0.851
gsource2      1      -0.16011       0.07871     0.996        4.1375        0.0419       0.852
disease0      1       0.45467       0.07720     0.993       34.6864        <.0001       1.576
disease2      1       0.40549       0.07915     0.995       26.2453        <.0001       1.500
age0          1       0.01692       0.00259     1.002       42.5543        <.0001       1.017
age2          1       0.01729       0.00265     1.004       42.6865        <.0001       1.017

                                Estimated Correlation Matrix

 Parameter      gsource0      gsource2      disease0      disease2         age0         age2

 gsource0         1.0000        0.9797       -0.0317       -0.0218       0.3066       0.3177
 gsource2         0.9797        1.0000       -0.0232       -0.0221       0.3172       0.3293
 disease0        -0.0317       -0.0232        1.0000        0.9620       0.2135       0.2039
 disease2        -0.0218       -0.0221        0.9620        1.0000       0.2006       0.2125
 age0             0.3066        0.3172        0.2135        0.2006       1.0000       0.9695
 age2             0.3177        0.3293        0.2039        0.2125       0.9695       1.0000

                              Linear Hypotheses Testing Results

                                          Wald
                           Label    Chi-Square      DF    Pr > ChiSq

                           gs           0.0038       1        0.9507
                           dis          5.1662       1        0.0230
                           a            0.3233       1        0.5696


* Under hypothesis of equal coefficients for gsource and age;
proc phreg data=double02 covs(aggregate);
    model time*dc(0)=bmonly disease0 disease2 age;
    strata version;
    id id;
    dis: test disease0=disease2;
run;
                           
                                     The PHREG Procedure

                          Analysis of Maximum Likelihood Estimates

                    Parameter      Standard    StdErr                                  Hazard
Parameter    DF      Estimate         Error     Ratio    Chi-Square    Pr > ChiSq       Ratio

bmonly        1      -0.16063       0.07721     1.396        4.3283        0.0375       0.852
disease0      1       0.45585       0.07717     1.004       34.8938        <.0001       1.578
disease2      1       0.40428       0.07903     1.005       26.1713        <.0001       1.498
age10         1       0.17099       0.02599     1.407       43.2845        <.0001       1.186

                              Linear Hypotheses Testing Results

                                          Wald
                           Label    Chi-Square      DF    Pr > ChiSq

                           dis          6.3329       1        0.0119


data bmt; set bmt;
    gvhdny=gvhd; 
    if gvhdny=1 then nytgvhd=tgvhd;
    if gvhdny=0 then do; nytgvhd=intxsurv; if dead=1 then gvhdny=1; end; 
    /* NB both GvHD and death without GvHD */
run;
data triple02G; set bmt; 
    /* joint analysis of relapse-free, GvHD-free and overall survival */
    version=1; dc=state0>0; time=intxrel; gsource0=bmonly; 
    gsource2=0; gsourceG=0;
    disease0=all; disease2=0; diseaseG=0; age0=age; age2=0; ageG=0; output;
    version=2; dc=dead; time=intxsurv; gsource2=bmonly; 
    gsource0=0; gsourceG=0;
    disease2=all; disease0=0; diseaseG=0; age2=age; age0=0; ageG=0; output;
    version=3; dc=gvhdny; time=nytgvhd; gsourceG=bmonly; gsource0=0; gsource2=0;
    diseaseG=all; disease0=0; disease2=0; ageG=age; age0=0; age2=0; output;
run;
proc phreg data=triple02G covs(aggregate) covout outest=params3;
    /* bmonly and all binary quatitative*/
    model time*dc(0)=gsource0 gsource2 gsourceG disease0 disease2 diseaseG
    age0 age2 ageG;
    strata version;
    id id;
run;
    
                                     The PHREG Procedure

                          Analysis of Maximum Likelihood Estimates

                    Parameter      Standard    StdErr                                  Hazard
Parameter    DF      Estimate         Error     Ratio    Chi-Square    Pr > ChiSq       Ratio

gsource0      1      -0.16108       0.07654     0.988        4.4293        0.0353       0.851
gsource2      1      -0.16011       0.07871     0.996        4.1375        0.0419       0.852
gsourceG      1      -0.26025       0.05907     0.996       19.4108        <.0001       0.771
disease0      1       0.45467       0.07720     0.993       34.6864        <.0001       1.576
disease2      1       0.40549       0.07915     0.995       26.2453        <.0001       1.500
diseaseG      1       0.29236       0.05986     0.997       23.8552        <.0001       1.340
age0          1       0.01692       0.00259     1.002       42.5543        <.0001       1.017
age2          1       0.01729       0.00265     1.004       42.6865        <.0001       1.017
ageG          1       0.01170       0.00192     0.991       37.1562        <.0001       1.012

Exercises

The SAS solutions are available as single files for download:

Exercise 4.1

Consider the data from the Copenhagen Holter study and estimate the probabilities of stroke-free survival for subjects with or without ESVEA using the Kaplan-Meier estimator.

The data should be loaded as chs_data

Code show/hide
chs_data <- read.csv("data/cphholter.csv")

We then load the relevant packages

Code show/hide
library(tidyverse) #Data manipulations and plots
library(survival) #Core survival analysis routines
library(survminer) #Plots of survival curves
library(survRM2) #RMST 
library(mstate) #probtrans, ELOS

Finally, we will convert the time variables to years and add a time variable and status indicator for the composite end-point stroke-free survival.

Code show/hide
chs_data <- chs_data %>% mutate(timeafib = timeafib/365.25,
                                timestroke = timestroke/365.25,
                                timedeath = timedeath/365.25,
                                timestrokeordeath = ifelse(stroke == 1, timestroke, timedeath),
                                strokeordeath = ifelse(stroke ==1, 1, death))

To estimate the probability of stroke-free survival for subjects with or without ESVEA using the Kaplan-Meier estimator we use the survfit function from the survival package.

Code show/hide
# Kaplan-Meier estimate of the survival functions
km41 <- survfit(formula = Surv(timestrokeordeath, strokeordeath) ~ esvea, data = chs_data)

kmdata41 <- data.frame(time = km41$time,
                        surv = km41$surv, 
                        esvea = c(rep(names(km41$strata)[1], km41$strata[1]),
                               rep(names(km41$strata)[2], km41$strata[2])))

Then, we can plot the Kaplan-Meier estimates of the survival probabilities against time.

Code show/hide
# Plotting the Kaplan-Meier estimate
(fig41 <- ggplot(data = kmdata41) + geom_step(aes(x = time, y = surv, linetype = esvea), size = 1) + 
          scale_linetype_discrete("ESVEA", labels = c("0", "1")) + 
          ylim(c(0,1)) +
          xlab("Time since randomization (years)") + 
          ylab("Probability of stroke-free survival"))

Code show/hide
* We must first load the data;
proc import out = chs_data
    datafile = 'data/cphholter.csv'
    dbms= csv replace;
    getnames=yes;
run;

* We will convert the time variables (timeafib, timestroke, and timedeath) from days to years;
* Furthermore, we add variables for the composite end-point of stroke or death without stroke;
data chs_data;
    set chs_data;
    timeafib = timeafib/365.25;
    timestroke = timestroke/365.25;
    timedeath = timedeath/365.25;
    timestrokeordeath = timedeath;
    if stroke = 1 then timestrokeordeath = timestroke;
    strokeordeath = death;
    if stroke = 1 then strokeordeath = 1;
run;

* We estimate the Kaplan-Meier survival function for subjects with or without ESVEA with the phreg procedure where 'esvea' is added
  in the strata statement. The result is saved as 'survdat'.;

title "4.1: Stroke-free survival probabilities estimated with the Kaplan-Meier estimator";
proc phreg data=chs_data;
    model timestrokeordeath*strokeordeath(0)=;
    strata esvea;
    baseline out=survdat survival=km;
run;

* Then the estimates are plotted using the gplot procedure;

proc gplot data=survdat;
plot km*timestrokeordeath=esvea/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 16 by 2 label=('Years');
    axis2 order=0 to 1 by 0.1 label=(a=90 'Survival probability');
    symbol1 i=stepjl c=red;
    symbol2 i=stepjl c=blue;
run;
quit;

Exercise 4.2

Consider the Cox model for stroke-free survival in the Copenhagen Holter study including the covariates ESVEA, sex, age, and systolic blood pressure (Exercise 2.4).

1.

Estimate the survival functions for a female subject aged 65 years and with systolic blood pressure equal to 150 mmHg – either with or without ESVEA.

The Cox model including ESVEA, sex, age, and systolic blood pressure is fitted using the coxph function from the survival package as first done in exercise 2.4.1.

Code show/hide
# Cox model for the composite end-point stroke or death with covariates ESVEA, sex, age, and systolic blood pressure
cox241 <- coxph(formula = Surv(timestrokeordeath, strokeordeath) ~ esvea + sex + age + sbp , data = chs_data, method = "breslow")
summary(cox241)
Call:
coxph(formula = Surv(timestrokeordeath, strokeordeath) ~ esvea + 
    sex + age + sbp, data = chs_data, method = "breslow")

  n= 675, number of events= 285 
   (3 observations deleted due to missingness)

          coef exp(coef) se(coef)     z Pr(>|z|)    
esvea 0.318284  1.374767 0.152587 2.086   0.0370 *  
sex   0.577585  1.781731 0.126946 4.550 5.37e-06 ***
age   0.076658  1.079673 0.009362 8.189 2.64e-16 ***
sbp   0.005152  1.005165 0.002438 2.113   0.0346 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

      exp(coef) exp(-coef) lower .95 upper .95
esvea     1.375     0.7274     1.019     1.854
sex       1.782     0.5613     1.389     2.285
age       1.080     0.9262     1.060     1.100
sbp       1.005     0.9949     1.000     1.010

Concordance= 0.672  (se = 0.016 )
Likelihood ratio test= 99.45  on 4 df,   p=<2e-16
Wald test            = 104.1  on 4 df,   p=<2e-16
Score (logrank) test = 110  on 4 df,   p=<2e-16

We will now estimate the survival functions for a 65-year-old female (sex = 0) with a systolic blood pressure of 150mmHg with or without ESVEA. The values of the covariates are stored in the data frame covar. The survival function is then found using the survfit function with the formula argument given by the Cox model and the newdata argument given by the data frame covar.

Code show/hide
# Defining the covariates
covar <- data.frame(esvea = c(0,1), sex = 0, age = 65, sbp = 150)

# Estimate of the survival function given the covariate values
surv421 <- survfit(cox241, newdata = covar)

Finally, the survival functions are plotted

Code show/hide
# Plotting the predicted survival probabilities.
(plot421 <- ggplot() + geom_step(aes(x = surv421$time, y = surv421$surv[,1], color = "ESVEA=0")) +
  geom_step(aes(x = surv421$time, y = surv421$surv[,2], color = "ESVEA=1")) +
  theme(legend.title=element_blank()) +
  ylab("Probability") + xlab("Time since randomization (years)") + 
  ggtitle("Stroke-free survival for a 65-year-old woman with a sbp of 150mmHg")  + ylim(c(0,1)))

Code show/hide
* To estimate the stroke-free survival functions for a 65-year old woman with a systolic blood pressure of 150mmHg with or without
  ESVEA we will first create a data frame 'cov' with the desired values of the covariate.;

data cov;
    esvea = 0; sex = 0; age = 65; sbp = 150; output;
    esvea = 1; sex = 0; age = 65; sbp = 150; output;
run;

* Then, a Cox model including ESVEA, sex, age, and systolic blood pressure is fitted with the phreg procedure and the stroke-free
  survival functions for subjects with values according to 'cov' are saved as 'survdata'.;

title "4.2: Stroke-free survival for a 65-year old woman with sbp = 150mmHg";
proc phreg data=chs_data;
    model timestrokeordeath*strokeordeath(0)=esvea sex age sbp;
    baseline out=survdata survival=surv covariates = cov;
run;

* Finally, the survival functions are plotted using the gplot procedure;

proc gplot data=survdata;
    plot surv*timestrokeordeath=esvea/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 16 by 2 label=('Years');
    axis2 order=0 to 1 by 0.1 label=(a=90 'Stroke-free survival probability');
    symbol1  i=stepjl c=blue;
    symbol2  i=stepjl c=red;
run;
quit;

2.

Estimate the survival functions for patients with or without ESVEA using the g-formula.

To estimate the survival functions using the g-formula, two predictions are made for each subject, i: one setting ESVEA \((Z_1)\) to 0, and one setting ESVEA to 1, while keeping the observed values of sex, age, and systolic blood pressure \((Z_2, Z_3, Z_4)\). The g-formula estimate is then

\[ \hat{S}_j(t) = \frac{1}{n}\sum_{i}\hat{S}(t|Z_1 = j, Z_{2i}, Z_{3i}, Z_{4i}), j = 0,1 \] Thus, we will make two new data frames corresponding to the two settings, i.e. ESVEA = 0 (chs_covar0) or ESVEA = 1 (chs_covar1) and all other covariates equal to the observed values. Then, the two predictions of the survival functions for each subject are found using the survfit function, and the average of these predictions with or without ESVEA are taken to obtaian the g-formula estimate.

Code show/hide
# Creating data sets with or without ESVEA while keeping the observed values of sex, age, and sbp for all subjects
chs_covar0 <- data.frame(esvea = 0, sex = chs_data$sex, age = chs_data$age, sbp = chs_data$sbp)
chs_covar1 <- data.frame(esvea = 1, sex = chs_data$sex, age = chs_data$age, sbp = chs_data$sbp)
                           
# Predicting the survival functions for all rows in chs_covar0 and chs_covar1
pred0_422 <- survfit(cox241, newdata = chs_covar0)
pred1_422 <- survfit(cox241, newdata = chs_covar1)

# Taking the average prediction at each transition time
surv0_422 <- rowMeans(pred0_422$surv, na.rm = TRUE)
surv1_422 <- rowMeans(pred1_422$surv, na.rm = TRUE)

The survival functions estimated using the g-formula are then plotted against time.

Code show/hide
# Plotting the predicted survival probabilities (g-formula).
(plot422 <- ggplot() + geom_step(aes(x = pred0_422$time, y = surv0_422, color = "ESVEA=0")) +
  geom_step(aes(x =pred1_422$time, y = surv1_422, color = "ESVEA=1")) +  theme(legend.title=element_blank()) +
  ylab("Probability") + xlab("Time since randomization (years)") + 
  ggtitle("Survival function for stroke-free survival (g-formula)")) + ylim(c(0,1))

Code show/hide
* A Cox model including ESVEA, sex, age, and systolic blood pressure is fitted and 'diradj group = esvea' is added to obtain the
  predicted survival functions for patients with or without ESVEA using the g-formula. The data is saved as 'gsurv'.;

title "4.2: Cox model for the outcome stroke-free survival including ESVEA, sex, age, and systolic blood pressure";
proc phreg data=chs_data;
    class esvea (ref = '0');
    model timestrokeordeath*strokeordeath(0)=esvea sex age sbp;
    baseline out=gsurv survival=surv / diradj group=esvea;
run;

* The survival functions are then plotted using the gplot procedure;

title "4.2: Stroke-free survival probabilities estimated using the G-formula";
proc gplot data=gsurv;
    plot surv*timestrokeordeath=esvea/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 16 by 2 minor=none label=('Years');
    axis2 order=0 to 1 by 0.1 minor=none label=(a=90 'Estimated survival function (g-formula)');
    symbol1  v=none i=stepjl c=blue;
    symbol2  v=none i=stepjl c=red;
run;
quit;

Exercise 4.3

Consider the data from the Copenhagen Holter study and fit a linear model for the 3-year restricted mean time to the composite end-point stroke or death including ESVEA, sex, age, and systolic blood pressure.

We will use the rmst2 function from the survRM2 package to fit a linear model for the 3-year restricted mean time to the composite end-point stroke or death including ESVEA, sex, age, and systolic blood pressure. We must include the arguments time, status, arm, tau and covariates which in this case are timestrokeordeath, strokeordeath, esvea, 3 and sex,age, and systolic blood pressure respectively.

Code show/hide
# 3-year restricted mean time to the composite end-point stroke or death

# Attention must be restricted to subjects with complete covariate data (sbp).

newchs <- subset(chs_data,!is.na(sbp))

rmst <- rmst2(time = newchs$timestrokeordeath, status = newchs$strokeordeath, arm = newchs$esvea, tau = 3,  covariates = newchs[, c(8,9,17)] )
rmst$RMST.difference.adjusted
                   coef     se(coef)          z          p     lower .95
intercept  3.3551746975 0.2078355430 16.1434115 0.00000000  2.9478245185
arm       -0.0273741987 0.0527081358 -0.5193543 0.60351367 -0.1306802466
sex       -0.0540324232 0.0352191476 -1.5341775 0.12498600 -0.1230606841
age       -0.0073221791 0.0028486997 -2.5703583 0.01015934 -0.0129055280
sbp        0.0005144224 0.0006748332  0.7622955 0.44588365 -0.0008082265
             upper .95
intercept  3.762524877
arm        0.075931849
sex        0.014995838
age       -0.001738830
sbp        0.001837071

Thus, we obtain the following model for the 3-year restricted mean time to the composite end-point stroke or death

\[\varepsilon(3|Z) = 3.3552 -0.0274\cdot Z_1 -0.054\cdot Z_2 -0.0073\cdot Z_3 + 0.0005\cdot Z_4,\]

where \((Z_1,Z_2,Z_3,Z_4)\) are ESVEA, sex, age, and systolic blood pressure.

We will finally estimate the 3-year resticted mean time to the composite end-point stroke or death for subjects with or without ESVEA non-parametrically using the area under the Kaplan-Meier curve. We use the object from Exercise 4.1.

Code show/hide
print(km41,rmean=3)
Call: survfit(formula = Surv(timestrokeordeath, strokeordeath) ~ esvea, 
    data = chs_data)

          n events rmean* se(rmean) median 0.95LCL 0.95UCL
esvea=0 579    230   2.93    0.0151     NA      NA      NA
esvea=1  99     57   2.87    0.0487     11    8.93      NA
    * restricted mean with upper limit =  3 
Code show/hide
* We will estimate the 3-year restricted mean time survival to the composite end-point strokke or death including ESVEA, sex, age, 
  and systolic blood pressure using the rmstreg procedure. We specify 'tau = 3' in the rmstreg statement to obtain a 3 year time 
  limit and 'link = linear' in the model statement to get a linear model. NB: requires SAS STAT 15.1;

title "4.3";
proc rmstreg data=chs_data tau=3;
   model timestrokeordeath*strokeordeath(0)=esvea sex age sbp / link=linear;
run;

* Thus, we obtain the following model for the 3-year restricted mean time to the composite end-point stroke or death
  epsilon(3|Z) = 3.3552 - 0.0274*Z1 - 0.0540*Z2 - 0.0073*Z3 + 0.0005*Z4, where (Z1,Z2,Z3,Z4) are ESVEA, age, sex, and systolic blood 
  pressure;

* We will also present the non-parametric estimates. We restrict the data set at tau=3.  NB: requires SAS STAT 15.1;

proc lifetest data=chs_data rmst(tau=3);
time timestrokeordeath*strokeordeath(0);
strata esvea;
run;

Exercise 4.4

Consider the Cox models for the cause-specific hazards for the outcomes stroke and death without stroke in the Copenhagen Holter study including ESVEA, sex, age, and systolic blood pressure (Exercise 2.7). Estimate (using plug-in) the cumulative incidences for both end-points for a female subject aged 65 years and with systolic blood pressure equal to 150 mmHg – either with or without ESVEA.

The cumulative incidence functions for cause \(h\) for a subject with covariates \(Z\) is calculated using the formula \(F_{h}(t|Z) = \int_0^t S(u|Z)\alpha_{h}(u|Z)du\), where \(S(u|Z) = \prod\limits_{j} \exp(-\int_0^u \alpha_j(x|Z) dx)\).

We will first fit Cox models for the cause-specific outcomes. For the outcome stroke we will use timestrokeordeath as time variable and stroke as status indicator in the Surv object. For the outcome death without stroke we will also use timestrokeordeath as time variable, but we must create a new status indicator, which will be named death_wo_stroke.

Code show/hide
# Cox model with stroke as outcome
cox44_stroke <- coxph(formula = Surv(timestrokeordeath, stroke) ~ esvea + sex + age + sbp , data = chs_data)
summary(cox44_stroke)
Call:
coxph(formula = Surv(timestrokeordeath, stroke) ~ esvea + sex + 
    age + sbp, data = chs_data)

  n= 675, number of events= 72 
   (3 observations deleted due to missingness)

          coef exp(coef) se(coef)     z Pr(>|z|)    
esvea 0.702407  2.018606 0.269968 2.602  0.00927 ** 
sex   0.491881  1.635389 0.248634 1.978  0.04789 *  
age   0.078980  1.082183 0.019054 4.145  3.4e-05 ***
sbp   0.011340  1.011404 0.004651 2.438  0.01477 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

      exp(coef) exp(-coef) lower .95 upper .95
esvea     2.019     0.4954     1.189     3.426
sex       1.635     0.6115     1.005     2.662
age       1.082     0.9241     1.043     1.123
sbp       1.011     0.9887     1.002     1.021

Concordance= 0.728  (se = 0.028 )
Likelihood ratio test= 41.22  on 4 df,   p=2e-08
Wald test            = 43.26  on 4 df,   p=9e-09
Score (logrank) test = 47.2  on 4 df,   p=1e-09
Code show/hide
# Status indicator for death without stroke
chs_data$death_wo_stroke <- ifelse(chs_data$stroke == 1, 0, chs_data$death)

# Cox model with death without stroke as outcome
cox44_death <- coxph(formula = Surv(timestrokeordeath, death_wo_stroke) ~ esvea + sex + age + sbp , data = chs_data)
summary(cox44_death)
Call:
coxph(formula = Surv(timestrokeordeath, death_wo_stroke) ~ esvea + 
    sex + age + sbp, data = chs_data)

  n= 675, number of events= 213 
   (3 observations deleted due to missingness)

          coef exp(coef) se(coef)     z Pr(>|z|)    
esvea 0.160110  1.173640 0.186795 0.857    0.391    
sex   0.605186  1.831592 0.147665 4.098 4.16e-05 ***
age   0.076075  1.079043 0.010758 7.071 1.54e-12 ***
sbp   0.002955  1.002960 0.002867 1.031    0.303    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

      exp(coef) exp(-coef) lower .95 upper .95
esvea     1.174     0.8521    0.8138     1.693
sex       1.832     0.5460    1.3713     2.446
age       1.079     0.9267    1.0565     1.102
sbp       1.003     0.9970    0.9973     1.009

Concordance= 0.657  (se = 0.019 )
Likelihood ratio test= 64.38  on 4 df,   p=3e-13
Wald test            = 67.44  on 4 df,   p=8e-14
Score (logrank) test = 70.93  on 4 df,   p=1e-14

Then, the hazard, \(\alpha_h(t|Z)\) and \(\exp(-\int_0^ta_{h}(t|Z))\) are extracted for a 65-year-old female with systolic blood pressure of 150mmHg with or without ESVEA for each cause-specific Cox model using the survfit function.

Code show/hide
# Survfit for the cause-specific hazard stroke given covariates Z
survfit_stroke44 <- survfit(cox44_stroke, newdata = covar)
# Estimate of exp(-A_02(t|Z))
S0_stroke44 <- survfit_stroke44$surv[,1]
S1_stroke44 <- survfit_stroke44$surv[,2]
#Estimate of the hazard for stroke, alpha_02(t|Z)
haz0_stroke44 <- c(0, diff(survfit_stroke44$cumhaz[,1]))
haz1_stroke44 <- c(0, diff(survfit_stroke44$cumhaz[,2]))

# Survfit for the cause specific hazard death without stroke given covariates Z
survfit_death44 <- survfit(cox44_death, newdata = covar)
# Estimate of exp(-A_03(t|Z))
S0_death44 <- survfit_death44$surv[,1]
S1_death44 <- survfit_death44$surv[,2]
# Estimate of the hazard for death without stroke, alpha_03(t|Z)
haz0_death44 <- c(0, diff(survfit_death44$cumhaz[,1]))
haz1_death44 <- c(0, diff(survfit_death44$cumhaz[,2]))
Code show/hide
# Estimate of cumulative incidence functions for stroke
cif0_stroke44 <- cumsum(S0_stroke44*S0_death44*haz0_stroke44)
cif1_stroke44 <- cumsum(S1_stroke44*S1_death44*haz1_stroke44)

#Plotting the cumulative incidence function with stroke as outcome for a 65-year-old female with sbp of 150mmHg
(plot44_stroke <- ggplot() + geom_step(aes(x = survfit_stroke44$time, y = cif0_stroke44, color = "ESVEA=0")) +
  geom_step(aes(x =survfit_stroke44$time, y = cif1_stroke44, color = "ESVEA=1")) +  theme(legend.title=element_blank()) +
  ylab("Cumulative incidence") + xlab("Time since randomization (years)") + ggtitle("Cox: CIF for stroke for a 65-year-old female with sbp 150mmHg"))

Likewise, we can estimate and plot the cumulative incidence functions for the outcome death without stroke.

Code show/hide
# Estimate of cumulative incidence functions for death without stroke
cif0_death44 <- cumsum(S0_stroke44*S0_death44*haz0_death44)
cif1_death44 <- cumsum(S1_stroke44*S1_death44*haz1_death44)

#Plotting the cumulative incidence function with stroke as outcome for a 65-year-old female with sbp of 150mmHg
(plot44_death <- ggplot() + geom_step(aes(x = survfit_death44$time, y = cif0_death44, color = "ESVEA=0")) +
  geom_step(aes(x =survfit_death44$time, y = cif1_death44, color = "ESVEA=1")) +  theme(legend.title=element_blank()) +
  ylab("Cumulative incidence") + xlab("Time since randomization (years)") + 
  ggtitle("Cox: CIF for death for a 65-year-old female with sbp 150mmHg"))

Code show/hide
* We must first create a variable for the competing risks which we will call 'event'. 0 is censored, 1 is stroke, and 2 is death 
  without stroke;

data chs_data;
    set chs_data;
    death_wo_stroke = death;
    if stroke = 1 then death_wo_stroke = 0;
    event = 0;
    if stroke = 1 then event = 1;
    if death_wo_stroke = 1 then event = 2;
run;

* Then we will fit a Cox model returning the predicted cumulative incidence functions with the specified covariates. 
  This is done by adding the argument eventcode(cox) to the model statement and adding the 'cif' argument in the baseline 
  statement.  NB: requires SAS STAT 15.1;

proc phreg data = chs_data noprint; 
    model timestrokeordeath*event(0) =esvea sex age sbp / eventcode(cox) = 1;
    baseline covariates = cov out=cif44_stroke cif = cif;
run;

* Finally, the cumulative incidence functions are plotted using the gplot procedure;

title '4.4: CIF for the outcome stroke (based on Cox model)';
proc gplot data=cif44_stroke;
    plot cif*timestrokeordeath=esvea/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 16 by 2 label=('Years');
    axis2 order=0 to 0.2 by 0.02 label=(a=90 'CIF for stroke');
    symbol1  i=stepjl c=blue;
    symbol2  i=stepjl c=red;
run;

* Then, we repeat the procedure for the outcome death without stroke;

proc phreg data = chs_data noprint; 
    model timestrokeordeath*event(0) =esvea sex age sbp / eventcode(cox) = 2;
    baseline covariates = cov out=cif44_death cif = cif;
run;

* We can now plot the cumulative incidence functions gpt death without stroke using the gplot procedure;

title '4.4: CIF for the outcome death without stroke (based on Cox model)';
proc gplot data=cif44_death;
    plot cif*timestrokeordeath=esvea/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 16 by 2 label=('Years');
    axis2 order=0 to 0.3 by 0.03 label=(a=90 'CIF for death w/o stroke');
    symbol1  i=stepjl c=blue;
    symbol2  i=stepjl c=red;
run;
quit;

Exercise 4.5

1.

Repeat the previous question using instead Fine-Gray models.

We will fit the Fine-Gray models using the finegray function from the survival package. We must make a new status indicator with one level for each possible outcome, which we will call fg_event. The value 0 must indicate stroke-free survival and we let 1 indicate stroke and 2 indicate death without stroke.

Code show/hide
#Creating event variable, 0 = censored, 1 = stroke, 2 = death w/o stroke
fg_event <- with(chs_data, ifelse(death_wo_stroke == 0, stroke, death_wo_stroke*2))

The finegray function takes a formula argument with a Surv object on the left of ‘~’ and ‘.’ on the right. The cause of interest is specified by the etype argument.

Afterwards the model is fitted using the coxph function with the data frame created by the finegray function. The formula argument should have Surv(fgstart,fgstop,fgstatus) on the left side of ‘~’ and as usual the covariates of interest (ESVEA, age, sex, and sbp) on the right. The argument weigth = fgwt must also be included.

Code show/hide
#Fitting Fine-Gray model for stroke
fgdata_stroke <- finegray(Surv(timestrokeordeath, factor(fg_event)) ~ ., etype = 1, data =chs_data)
fg45_stroke <- coxph(Surv(fgstart, fgstop, fgstatus) ~ esvea + sex + age + sbp, weight = fgwt, data = fgdata_stroke)
summary(fg45_stroke)
Call:
coxph(formula = Surv(fgstart, fgstop, fgstatus) ~ esvea + sex + 
    age + sbp, data = fgdata_stroke, weights = fgwt)

  n= 888, number of events= 72 
   (4 observations deleted due to missingness)

          coef exp(coef) se(coef) robust se     z Pr(>|z|)    
esvea 0.593921  1.811075 0.271675  0.275526 2.156 0.031116 *  
sex   0.379189  1.461099 0.248427  0.243020 1.560 0.118684    
age   0.063347  1.065397 0.019072  0.018469 3.430 0.000604 ***
sbp   0.010629  1.010686 0.004608  0.004196 2.533 0.011305 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

      exp(coef) exp(-coef) lower .95 upper .95
esvea     1.811     0.5522    1.0554     3.108
sex       1.461     0.6844    0.9074     2.353
age       1.065     0.9386    1.0275     1.105
sbp       1.011     0.9894    1.0024     1.019

Concordance= 0.699  (se = 0.029 )
Likelihood ratio test= 30.7  on 4 df,   p=4e-06
Wald test            = 37.72  on 4 df,   p=1e-07
Score (logrank) test = 34.45  on 4 df,   p=6e-07,   Robust = 24.82  p=5e-05

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).
Code show/hide
#Fitting Fine-Gray for death without stroke
fgdata_death <- finegray(Surv(timestrokeordeath, factor(fg_event)) ~ ., etype = 2, data =chs_data)
fg45_death <- coxph(Surv(fgstart, fgstop, fgstatus) ~ esvea + sex + age + sbp, weight = fgwt, data = fgdata_death)
summary(fg45_death)
Call:
coxph(formula = Surv(fgstart, fgstop, fgstatus) ~ esvea + sex + 
    age + sbp, data = fgdata_death, weights = fgwt)

  n= 1172, number of events= 213 
   (10 observations deleted due to missingness)

           coef exp(coef)  se(coef) robust se      z Pr(>|z|)    
esvea -0.006269  0.993751  0.188059  0.193559 -0.032 0.974164    
sex    0.530219  1.699304  0.148362  0.146047  3.630 0.000283 ***
age    0.066495  1.068756  0.010812  0.010673  6.230 4.65e-10 ***
sbp    0.001601  1.001602  0.002927  0.002917  0.549 0.583197    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

      exp(coef) exp(-coef) lower .95 upper .95
esvea    0.9938     1.0063    0.6800     1.452
sex      1.6993     0.5885    1.2763     2.262
age      1.0688     0.9357    1.0466     1.091
sbp      1.0016     0.9984    0.9959     1.007

Concordance= 0.636  (se = 0.019 )
Likelihood ratio test= 46.38  on 4 df,   p=2e-09
Wald test            = 50.97  on 4 df,   p=2e-10
Score (logrank) test = 49.87  on 4 df,   p=4e-10,   Robust = 41  p=3e-08

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).

As described in Section 4.2.2, the Fine-Gray model has a simple expression for the cumulative incidence function for cause \(h\) since

\[ \log(-\log(1-F_h(t|Z))) = \log(\tilde{A}_{0h}(t)) + LP_h \\ \iff F_h(t|Z) = 1-\exp(-\tilde{A}_{0h}(t)\exp(LP_h)) \]

Thus, we can use the survfit function to obtain the estimate for \(\exp(-\tilde{A}_{0h}(t)\exp(\beta Z))\), given \(Z_1 = 0,1\) and \((Z_2,Z_3,Z_4) = (0,65,150)\) for the outcome stroke.

Code show/hide
# Cumulative incidence functions for the outcome stroke given covariates Z
cif_stroke451 <- 1- survfit(fg45_stroke, newdata = covar)$surv

The Fine-Gray estimate of the cumulative incidence function for stroke can then be plotted.

Code show/hide
# Plotting the predicted cumulative incidence functions.
(plot451_stroke <- ggplot() + geom_step(aes(x = survfit(fg45_stroke, newdata = covar)$time, y = cif_stroke451[,1], color = "ESVEA=0")) +
  geom_step(aes(x =survfit(fg45_stroke, newdata = covar)$time, y = cif_stroke451[,2], color = "ESVEA=1")) +  theme(legend.title=element_blank()) +
  ylab("Cumulative incidence") + xlab("Time since randomization (years)") + 
  ggtitle("Fine-Gray: CIF for stroke for a 65-year-old woman with sbp 150 mmHg"))

We repeat the procedure above for the outcome death without stroke

Code show/hide
# Cumulative incidence functions for the outcome death without stroke given covariates Z
cif_death451 <- 1- survfit(fg45_death, newdata = covar)$surv

# Plotting the predicted cumulative incidence functions.
(plot451_death <- ggplot() + geom_step(aes(x = survfit(fg45_death, newdata = covar)$time, y = cif_death451[,1], color = "ESVEA=0")) +
  geom_step(aes(x =survfit(fg45_death, newdata = covar)$time, y = cif_death451[,2], color = "ESVEA=1")) +  theme(legend.title=element_blank()) +
  ylab("Cumulative incidence") + xlab("Time since randomization (years)") 
  + ggtitle("Fine-Gray: CIF for death w/o stroke for a 65-year-old woman with sbp 150 mmHg"))

Code show/hide
* To obtain the CIF for the outcomes stroke and death without stroke using the Fine-Gray model, 'eventcode' is added instead of
  'eventcode(cox)' in the model statement of the phreg procedure;

* We will first estimate the CIF for the outcome stroke;

proc phreg data = chs_data; 
    model timestrokeordeath*event(0) =esvea sex age sbp / eventcode = 1;
    baseline covariates = cov out=cif451_stroke cif = cif;
run;

title '4.5.1: CIF for the outcome stroke (based on Fine-Gray model)';
proc gplot data=cif451_stroke;
    plot cif*timestrokeordeath=esvea/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 16 by 2 label=('Years');
    axis2 order=0 to 0.125 by 0.0125 label=(a=90 'CIF for stroke');
    symbol1  i=stepjl c=blue;
    symbol2  i=stepjl c=red;
run;

* Then we will estimate the CIF for the outcome death without stroke;

proc phreg data = chs_data; 
    model timestrokeordeath*event(0) =esvea sex age sbp / eventcode = 2;
    baseline covariates = cov out=cif451_death cif = cif;
run;

title '4.5.1: CIF for the outcome stroke (based on Fine-Gray model)';
proc gplot data=cif451_death;
    plot cif*timestrokeordeath=esvea/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 16 by 2 label=('Years');
    axis2 order=0 to 0.25 by 0.025 label=(a=90 'CIF for death w/o stroke');
    symbol1  i=stepjl c=blue;
    symbol2  i=stepjl c=red;
run;
quit;

2.

Estimate the cumulative incidence functions for patients with or without ESVEA using the g-formula.

To obtain an estimate of the cumulative incidence functions with or without ESVEA using the g-formula we will exploit the simple expression of the CIF in the Fine-Gray model and simply use survfit and the data frames chs_covar0 and chs_covar1 from exercise 4.2.2 to make two predictions of \(1 -\exp(-\tilde{A}_{0h}(t)\exp(LP_h))\) for each subject, one with ESVEA and one without ESVEA. Then, the g-formula estimate is the average of the predictions with or without ESVEA.

We will find the g-formula estimate of the cumulative incidence functions for the outcome stroke.

Code show/hide
# Calculating the CIFs for stroke for all subjects with ESVEA = 0 or ESVEA = 1
cifs0_stroke452 <- 1- survfit(fg45_stroke, newdata = chs_covar0)$surv
cifs1_stroke452 <- 1- survfit(fg45_stroke, newdata = chs_covar1)$surv

# Taking the average of the CIFs
cif0_stroke452 <- rowMeans(cifs0_stroke452)
cif1_stroke452 <- rowMeans(cifs1_stroke452)

# Plotting the predicted cumulative incidence functions for death without stroke.
(plot452_stroke <- ggplot() + geom_step(aes(x = survfit(fg45_stroke, newdata = covar)$time, y = cif0_stroke452, color = "ESVEA=0")) +
  geom_step(aes(x =survfit(fg45_stroke, newdata = covar)$time, y = cif1_stroke452, color = "ESVEA=1")) +  theme(legend.title=element_blank()) +
  ylab("Cumulative incidence") + xlab("Time since randomization (years)") + 
  ggtitle("Fine-Gray: CIF for stroke (g-formula)"))

Then, we repeat the procedure for the outcome death without stroke.

Code show/hide
# Calculating the CIFs for death without stroke for all subjects with ESVEA = 0 or ESVEA = 1
cifs0_death452 <- 1- survfit(fg45_death, newdata = chs_covar0)$surv
cifs1_death452 <- 1- survfit(fg45_death, newdata = chs_covar1)$surv

# Taking the average of the CIFs
cif0_death452 <- rowMeans(cifs0_death452)
cif1_death452 <- rowMeans(cifs1_death452)

# Plotting the predicted cumulative incidence functions for death without stroke.
(plot452_death <- ggplot() + geom_step(aes(x = survfit(fg45_death, newdata = covar)$time, y = cif0_death452, color = "ESVEA=0")) +
  geom_step(aes(x =survfit(fg45_death, newdata = covar)$time, y = cif1_death452, color = "ESVEA=1")) +  theme(legend.title=element_blank()) +
  ylab("Cumulative incidence") + xlab("Time since randomization (years)") 
  + ggtitle("Fine-Gray: CIF for death w/o stroke (g-formula)"))

We will finally compare the estimates obtained using the g-formula with the non-parametric estimates obtained using the Aalen-Johansen estimator. The latter are obtained using survfit.

Code show/hide
aj <- survfit(Surv(timestrokeordeath,factor(fg_event),type='mstate')~esvea,data=chs_data,influence = TRUE)
plot(aj,col=c(1,2,1,2))
legend("topleft",legend=c("esvea=0", "esvea=1"),col=1:2,lty=c(1,1))

Code show/hide
* We will then make a macro function taking the cause of interest (stroke or death without stroke) and value of ESVEA as arguments;

%macro cif452(cause, esvea);
* Creating modified covariates. Observed values for sex, age, and sbp, while ESVEA is either 0 or 1 for all subjects;
data covar_temp;
    set chs_data;
    esvea = &esvea;
    keep id esvea sex age sbp;
run;
* Fine-Gray model returning the predicted cumulative incidence functions for the modified covariates for all patients;
proc phreg data = chs_data noprint; 
    model timestrokeordeath*event(0) =esvea sex age sbp / eventcode = &cause;
    baseline covariates = covar_temp out=ciftest cif = cif;
run;
* Splitting the data set ciftest and returning one data set per patient containing the predicted cumulative incidence function;
%do i = 1 %to 678;
    data cif&i;
        set ciftest;
        if id = &i;
        cif&i = cif;
        keep id timestrokeordeath cif&i;  
    run;    
%end;
* Calculating the average of the cumulative incidence functions;
data res&esvea;
    set cif1-cif678 ;
    merge cif1-cif678;
    by timestrokeordeath;
    ESVEA&esvea = mean(of cif1-cif678);
    keep timestrokeordeath ESVEA&esvea;
run;
%mend;

*We call the function for the outcome stroke and ESVEA = 0,1;

%cif452(1,1);
%cif452(1,0);

* Then we create a data set containing the cumulative incidence function for both ESVEA = 0 and ESVEA = 1 and plot the result.;

data stroke452;
    set res0 res1;
    merge res0 res1;
    by timestrokeordeath;
run;

proc sgplot data=stroke452;
title1'4.5.2 - Cumulative incidence function for stroke predicted with Fine-Gray models using the g-formula';
   step y=ESVEA0 x=timestrokeordeath;
   step y=ESVEA1 x=timestrokeordeath;
   xaxis label= "Time (Years since randomization)";
   yaxis label= "Cumulative incidence";
run;

* We repeat for the outcome death without stroke. First we call our macro function;

%cif452(2,1);
%cif452(2,0);

*Then the data is collected in one data set and afterwards we plot the result;

data death452;
    set res0 res1;
    merge res0 res1;
    by timestrokeordeath;
run;

proc sgplot data=death452;
title1'4.5.2 - Cumulative incidence function for death predicted with Fine-Gray models using the g-formula';
   step y=ESVEA0 x=timestrokeordeath;
   step y=ESVEA1 x=timestrokeordeath;
   xaxis label= "Time (Years since randomization)";
   yaxis label= "Cumulative incidence";
run;


/* For comparison, we also estimate the cumulative incidences non-parametrically. */

proc lifetest data=chs_data plots=(cif);
time timestrokeordeath*event(0)/eventcode=1;
strata esvea;
run;


proc lifetest data=chs_data plots=(cif);
time timestrokeordeath*event(0)/eventcode=2;
strata esvea;
run;

Exercise 4.6

Consider the data from the Copenhagen Holter study and fit linear models for the expected time lost (numbers of years) before 3 years due to either stroke or death without stroke including ESVEA, sex, age, and systolic blood pressure.

There are currently no packages in R where a function estimating linear models for the time lost due to cause-specific outcomes are implemented.

However, Conner and Trinquart (2021) have made their code available online. Thus, the following code chunk which implements such a function is adapted from their code on github

Code show/hide
rmtl.ipcw <- function(times, event, eoi=1, tau, cov=NULL, strata=FALSE, group=NULL){
  
  if(is.null(group) & strata==TRUE){stop('Please specify a factor variable to statify weights.')}
  if(is.null(cov)){print('Warning: Fitting intercept-only model.')}
  
  # Round event times to avoid issues with survival() package rounding differently
  y <- round(times,4)
  id <- 1:length(y)
  
  # Recode so delta1 reflects event of interest, delta2 reflects all competing events. Assumes 0=censoring.
  delta1 <- ifelse(event==eoi, 1, 0)
  delta2 <- ifelse(event!=0 & event!=eoi, 1, 0)
  
  # Overall quantities
  x <- cbind(int=rep(1, length(y)), cov)
  p <- length(x[1,])
  if(is.null(group)){group <- as.factor(rep(1, length(y)))}
  
  # Recode event indicators to reflect status at chosen tau
  delta1[y>tau] <- 0
  delta2[y>tau] <- 0
  
  y <- pmin(y, tau)
  y1 <- y*delta1
  
  d0 <- 1 - (delta1 + delta2) # censoring indicator
  d0[y==tau] <- 0  # If follow-up lasts til tau, the event will not count as 'censored' in IPCW weights
  weights <- NULL
  
  ## Calculate IPCW weights (option to stratify by group) ## 
  
  if(strata==TRUE){
    for(aa in 1:length(unique(group))){
      # Subset the group
      a <- unique(group)[aa]
      d0.a <- d0[group==a]
      delta1.a <- delta1[group==a]
      y.a <- y[group==a]
      x.a <- x[group==a,]
      n.a <- length(d0.a)
      orig.id.a0 <- orig.id.a <- id[group==a]
      
      # Order the event times
      id.a <- order(y.a)
      y.a <- y.a[id.a]
      d0.a <- d0.a[id.a]
      delta1.a <- delta1.a[id.a]
      x.a <- x.a[id.a,]
      orig.id.a <- orig.id.a[id.a]
      
      # Derive IPCW
      fit <- survfit(Surv(y.a, d0.a) ~ 1)
      weights.a <- (1-d0.a)/rep(fit$surv, table(y.a))
      
      # Need to assign weights accordig to original ID, not ordered by event time
      linked.weights.a <- cbind(orig.id.a, weights.a, delta1.a, d0.a, y.a)
      weights <- rbind(weights, linked.weights.a)
    }
  } else {
    
    # Order the event times
    id.a <- order(y)
    y.a <- y[id.a]
    d0.a <- d0[id.a]
    delta1.a <- delta1[id.a]
    x.a <- x[id.a,]
    orig.id.a <- id[id.a]
    
    # Derive IPCW
    fit <- survfit(Surv(y.a, d0.a) ~ 1)
    weights.a <- (1-d0.a)/rep(fit$surv, table(y.a))
    
    # Need to assign weights accordig to original ID, not ordered by event time
    linked.weights.a <- cbind(orig.id.a, weights.a, delta1.a, d0.a, y.a)
    weights <- rbind(weights, linked.weights.a)
  }
  
  
  ## Fit linear model ## 
  
  # Link weights to original data frame
  #colnames(weights) <- c('id', 'weights')
  #data <- merge(data0, weights, by='id')
  #summary(lm(tau-y ~ x-1, weights=weights, data=data))
  
  # Or, sort weights and use vectors
  w <- weights[order(weights[, 1]),2]
  lm.fit <- lm(delta1*(tau-y) ~ x-1, weights=w)
  
  
  ## Derive SE ##
  
  beta0 <- lm.fit$coef
  error <- tau - y - as.vector(x %*% beta0)
  score <- x * w * error
  
  # Kappa (sandwich variance components) stratified by group
  kappa <- NULL
  
  for(aa in 1:length(unique(group))){
    
    # Subset the group
    a <- unique(group)[aa]
    d0.a <- d0[group==a]
    delta1.a <- delta1[group==a]
    y.a <- y[group==a]
    x.a <- x[group==a,]
    n.a <- length(d0.a)
    orig.id.a0 <- orig.id.a <- id[group==a]
    score.a <- score[group==a,]
    
    # Kappa calculations for sandwich variance
    kappa.a <- matrix(0, n.a, p)
    
    for(i in 1:n.a){
      kappa1 <- score.a[i,]
    
      kappa2 <- apply(score.a[y.a>=y.a[i],,drop=F], 2, sum)*(d0.a[i])/sum(y.a>=y.a[i])
    
      kappa3 <- rep(0, p)
    
      for(k in 1:n.a){
        if(y.a[k]<=y.a[i]){
          kappa3 <- kappa3+apply(score.a[y.a>=y.a[k],,drop=F], 2, sum)*(d0.a[k])/(sum(y.a>=y.a[k]))^2
        }
      }
  
      kappa.a[i,] <- kappa1+kappa2-kappa3
    }
    kappa <- rbind(kappa, kappa.a)
  }
  
  # Transpose the kappas rbinded from each group gives pxp matrix
  gamma <- t(kappa) %*% kappa
  
  A <- t(x) %*% x
  varbeta <- solve(A) %*% gamma %*% solve(A)
  se <- sqrt(diag(varbeta))
  
  
  #--- Return results ---
  
  res <- cbind(beta=lm.fit$coef, se=se, cil=lm.fit$coef-(1.96*se), ciu=lm.fit$coef+(1.96*se), 
               z=lm.fit$coef/se, p=2*(1-pnorm(abs(lm.fit$coef/se))))
  #rownames(res) <- c("Intercept", colnames(x[,-1])): REMOVED!
  
  allres <- list(res=res, varbeta=varbeta)
  print(round(res, 3))
  invisible(allres)
return(res[,1]) # ADDED
}  

The argument times is the time of event or censoring for all subjects. The argument event is the type of transition happening at the time times for all subjects. The arguments eio and tau are the event of interest and the threshold \(\tau\). The argument cov gives the covariates to be included in the model.

We will first extract the values of our covariates of interest and store them as covar46.

Code show/hide
# Extracting the values of the covariates ESVEA, sex, age, and sbp
covar46 <- cbind(newchs$esvea, newchs$sex, newchs$age, newchs$sbp)
colnames(covar46) <- c("esvea", "sex", "age", "sbp")

Then, we will fit a linear model for the time lost due to stroke using the function rmtl.ipcw. The arguments times, event, eoi, tau and cov are set to chs_data$timestrokeordeath, fg_event, 1, 3 and **covar46, respectively.

Code show/hide
# Fitting a linear model for the time lost due to stroke before 3 ýears

newchs$death_wo_stroke <- ifelse(newchs$stroke == 1, 0, newchs$death)
newchs$fg_event <- ifelse(newchs$death_wo_stroke == 0, newchs$stroke, newchs$death_wo_stroke*2)

(rmtl_stroke <- rmtl.ipcw(times = newchs$timestrokeordeath,event = newchs$fg_event, eoi = 1, tau = 3, cov = covar46))
         beta    se    cil   ciu      z     p
xint   -0.277 0.211 -0.691 0.137 -1.313 0.189
xesvea  0.005 0.053 -0.099 0.110  0.095 0.924
xsex    0.024 0.036 -0.046 0.094  0.666 0.506
xage    0.004 0.003 -0.001 0.010  1.532 0.126
xsbp    0.000 0.001 -0.001 0.001 -0.004 0.997
         xint        xesvea          xsex          xage          xsbp 
-2.774303e-01  5.072473e-03  2.377510e-02  4.439468e-03 -3.000091e-06 

Thus, we obtain the following model for the time lost before 3 years due to stroke

\[\varepsilon(3|Z) = -0.277 + 0.005 \cdot Z_1 + 0.024 \cdot Z_2 + 0.004 \cdot Z_3 - 3 \cdot 10^{-6} \cdot Z_4,\]

where \((Z_1,Z_2,Z_3,Z_4)\) are ESVEA, sex, age, and systolic blood pressure.

We will fit a linear model to the time lost due to death without stroke by changing the value of the eio argument to 2.

Code show/hide
# Fitting a linear model for the time lost due to death without stroke before 3 years
(rmtl_death <- rmtl.ipcw(newchs$timestrokeordeath,newchs$fg_event, eoi = 2, tau = 3, covar46))
         beta    se    cil   ciu      z     p
xint   -0.078 0.210 -0.489 0.333 -0.371 0.711
xesvea  0.022 0.053 -0.081 0.126  0.422 0.673
xsex    0.030 0.036 -0.039 0.100  0.852 0.394
xage    0.003 0.003 -0.003 0.009  1.001 0.317
xsbp   -0.001 0.001 -0.002 0.001 -0.752 0.452
         xint        xesvea          xsex          xage          xsbp 
-0.0777484888  0.0223008572  0.0302582070  0.0028827687 -0.0005114223 

Thus, we obtain the following model for the time lost before 3 years due to death without stroke

\[\varepsilon(3|Z) = -0.078 + 0.022 \cdot Z_1 + 0.03 \cdot Z_2 + 0.003 \cdot Z_3 - 0.001 \cdot Z_4,\] where \((Z_1,Z_2,Z_3,Z_4)\) are ESVEA, sex, age, and systolic blood pressure.

We will finally estimate the cause-specific time lost for subjects with or without ESVEA non-parametrically as the area under the Aalen-Johansen estimates. We use the aj object from the previous exercise (where the option influence=TRUE provides SD estimates).

Code show/hide
print(aj,rmean=3)
Call: survfit(formula = Surv(timestrokeordeath, factor(fg_event), type = "mstate") ~ 
    esvea, data = chs_data, influence = TRUE)

                n nevent      rmean  se(rmean)*
esvea=0, (s0) 579      0 2.93362840 0.015098727
esvea=1, (s0)  99      0 2.86719350 0.048735634
esvea=0, 1    579     52 0.01954915 0.007978075
esvea=1, 1     99     21 0.05767462 0.029900214
esvea=0, 2    579    178 0.04682245 0.012941548
esvea=1, 2     99     36 0.07513188 0.039606548
   *restricted mean time in state (max time = 3 )

There is currently not an implementation of a procedure for linear models of the number of years lost due to a specific cause in SAS.

Exercise 4.7

Consider an illness-death model for the Copenhagen Holter study with states ‘0: Alive without AF or stroke’, ‘1: Alive with AF and no stroke’, ‘2: Dead or stroke’, see Figures 1.3 and 1.7.

1.

Estimate the prevalence of AF.

The prevalence of AF at time \(t\) can be estimated as the conditional probability of having AF at time \(t\) given alive at time \(t\), i.e. \(\frac{Q_1(t)}{Q_0(t) + Q_1(t)}\). Here, \(Q_0\) and \(Q_1\) can be estimated the mstate package. We must first define a transition matrix, tmat for the ilness-death model and turn the data into long format using the msprep function as we did in exercise 1.2. Then, we extract the hazards for each transition using the msfit function. Finally, \(Q_0\) and \(Q_1\) are estimated with the probtrans function where the prediction time predt is set to 0.

Code show/hide
# Transition matrix for the three-state illness-death model.
tmat <- trans.illdeath()

# Putting the data on long format, i.e. 1 row per possible transition
chs_data <- chs_data %>% mutate(timestroke = ifelse(stroke == 1, timestroke, timedeath),
                                timeafib = ifelse(afib == 1, timeafib, timestroke))
long_format <- msprep(time = c(NA, "timeafib", "timestrokeordeath"), status = c(NA,"afib","strokeordeath"), data = chs_data, trans = tmat)

# Fitting a model for each of the three transitions, 0 -> 1, 0 -> 2 and 1 -> 2.
cox471 <- coxph(Surv(Tstart, Tstop, status) ~ strata(trans), data = long_format, method = "breslow")

# Extracting the hazards for each transition
msfit <- msfit(cox471, trans = tmat)

# Calculating the state occupation probabilities; pstate1 is $Q_0$, pstate2 is $Q_1$, and pstate3 is $Q_2$.
probtrans <- probtrans(msfit, predt = 0)
AaJ471 <- probtrans[[1]]

# Calculating the prevalence of AF
AaJ471$prevalence <- AaJ471$pstate2 / (AaJ471$pstate1 + AaJ471$pstate2)

# Plotting the prevalence of AF against time
(fig471 <- ggplot(aes(x = time, y = prevalence), data = AaJ471) +
          geom_step())

Code show/hide
* The prevalence of AF is given by Q_1 / (Q_0 + Q_1). Thus, we must first obtain estimates of Q_0 and Q_1.

* We will first fill in the empty spots of timeafib and replace paths where AF happens after stroke;
* We will also add an censoring variable for leaving state 0 called 'outof0'.;

title '4.7.1';
data chs_data;
    set chs_data;
    if afib = 1 and timeafib > timestrokeordeath then afib = 0;
    if afib = 0 then timeafib = timestrokeordeath;
    outof0 = 0;
    if afib = 1 or strokeordeath = 1 then outof0 = 1;
run;


* Then, we will fit a model for being in state 0 by using 'timeafib' as our time variable and 'outof0' as our censoring variable;

proc phreg data=chs_data; /* Q0(t) */
    model timeafib*outof0(0)=;
    baseline out=q0 survival=km0;
run;

* We will also fit a model for being in state 0 or state 1 by using 'timestrokeordeath' as our timevariable and 'strokeordeath' 
  as our censoring variable.;

proc phreg data=chs_data; /* Q0(t) + Q1(t) */
    model timestrokeordeath*strokeordeath(0)=;
    baseline out=q0andq1 survival = km01;
run;


* We will now estimate the probability of being in state 1 as the difference between the two models specified above. We must 
  first merge the data frames 'q0' and 'q0andq1' by the joint 'time' variable and fill the empty cells for the survival 
  probabilities with the last observed value. Then, 'q1' and the prevalence 'prev' is added to the dataframe 'allrev'.;

data q0; set q0; time=timeafib; run;
data q0andq1; set q0andq1; time=timestrokeordeath; run;
data all; merge q0 q0andq1; by time; run;

data allrev; 
set all;
    by time;
    retain last1 last2;
    if km0=. then q0=last1; if km0 ne . then q0=km0; *AF-free survival, Q0; 
    if km01=. then q01=last2; if km01 ne . then q01=km01; *Q0 + Q1;
    q1 = q01 - q0;
    prev = q1/q01;
    output;
    last1=q0; last2=q01; 
run;

*Finally, we plot the result;

proc gplot data=allrev;
    title'4.7.1: Prevalence of AF';
    plot prev*time / haxis=axis1 vaxis=axis2;
    axis1 order=0 to 16 by 5 minor=none label=('Years');
    axis2 order=0 to 0.12 by 0.02 minor=none label=(a=90 'Prevalence of AF');
    symbol  v=none i=stepjl c=blue;
run;

2.

Estimate the expected lengths of stay in states 0 or 1 up to 3 years.

The expected lengths of stay in states 0 or 1 up till 3 years can be estimated using the ELOS function from the mstate package which take a probtrans object and threshold tau as input.

Code show/hide
# Estimating the expected length of stay in each state from state 0 before 3 years
(ELOS472 <- ELOS(probtrans, tau = 3))
           in1         in2        in3
from1 2.915131 0.008797062 0.07607227
from2 0.000000 3.000000000 0.00000000
from3 0.000000 0.000000000 3.00000000

Thus, the expected length of stay up till 3 years is \(2.9151\) in state 0 and \(0.0088\) in state 1.

Code show/hide
* To estimate the expected length of stay in state 0 and state 1 up till 3 years we will integrate the functions for being in state 
  0 or 1 from 0 t0 3 years. 

* We must first add the time point for 3 years to the data set 'allrev'.;

data end_point; time = 3; run;
data allrev; merge allrev end_point; by time; run;

* Then we will calculate the product of the length of each time period and the value of q0 or q1 and then sum these products to 
  obtain estimates of the expected length of state in state 0 or 1.;

data allrev;
    set allrev;
    retain elos0 elos1;
    dq0 = dif(time)*lag(q0);
    dq1 = dif(time)*lag(q1);
    elos0 + dq0;
    elos1 + dq1;
run;

* Finally, we will print the result;

title'4.7.2';
proc print data = allrev;
    var time elos0 elos1;
    where time = 3;
run;

3.

Evaluate the SD of the expected lengths of stay using the bootstrap.

We will use bootstrapping to estimate the SD of the expected lengths of stay in states 0 and 1. This can be done using the msboot function from the mstate package.

We must first create a function which, given a data set data, calculates the statistics of interest, in this case the expected lengths of stay in state 0 and 1. Then, the msboot function takes this function as well as the data in long format, the desired number of bootstrap replications B and the name of the id variable in the data frame as arguments.

Code show/hide
# Setting a seed for reproducibility
set.seed(1234)

# Function of data returning the value of the statistic to be bootstrapped
boot_fct473 <- function(data){
  cox <- coxph(Surv(Tstart, Tstop, status) ~ strata(trans), data = data, method = "breslow")
  msfit <-  msfit(cox, trans = tmat)
  pt <- probtrans(msfit, predt = 0, direction = "forward", method = "aalen")
  elos <- ELOS(pt, 3)
  return(elos[1,1:2])}

# Bootstrapping with B = 200
elos_boot473 <- msboot(theta = boot_fct473, data = long_format, B = 200, id = "id")

Finally, the mean and standard deviation of the statistics based on the bootstrap samples can be calculated

Code show/hide
# Mean and SD obtained using bootstrapping for expected length of stay in state 0 and 1
(c(mean(elos_boot473[1,]), sqrt(var(elos_boot473[1,]))))
[1] 2.91297302 0.01642631
Code show/hide
(c(mean(elos_boot473[2,]), sqrt(var(elos_boot473[2,]))))
[1] 0.009175036 0.004859470

We obtain an estimate for the standard deviation of 0.0160782 for the expected length in state 0 and 0.004976452 for the expected length in state 1 using a bootstrap with 200 replications.

Code show/hide
* We will first make our bootstrap data frames. We will make 200 bootstrap samples and each bootstrap sample contains 678 rows which
  are sampled with replacement from our originqal data.;

title'4.7.3';
data boot_chs;
    do sampnum = 1 to 200; /* nboot=200*/
    do i = 1 to 678; /*nobs=678*/
    x=round(ranuni(0)*678); /*nobs=678*/
    set chs_data
    point=x;
    output;
    end;
    end;
    stop;
run;

* Then we will estimate the expected length of stay in state 0 before 3 years based on each of the 200 bootstrap samples;

* We will first estimate the probability of being in state 0 and the probability of being in either state 0 or state 1.;

proc phreg data=boot_chs noprint; /* Q0(t) */
    by sampnum;
    model timeafib*outof0(0)=;
    baseline out=q0_boot survival=km0;
run;

proc phreg data=boot_chs noprint; /* Q0(t) + Q1(t) */
    by sampnum;
    model timestrokeordeath*strokeordeath(0)=;
    baseline out=q0andq1_boot survival = km01;
run;

* Then, we will merge the two datasets by time and add the time point for 3 years to all bootstrap samples.;

data q0_boot; set q0_boot; time=timeafib; run;
data q0andq1_boot; set q0andq1_boot; time=timestrokeordeath; run;

data end_point; do sampnum = 1 to 200; time = 3; output; end; run;
data all_boot; merge q0_boot q0andq1_boot end_point; by sampnum time; run;


* Then, we will estimate q1 as the difference between the probability of being in either state 0 or state 1 and the 
  probability of being in state 0.;

data allrev_boot; 
set all_boot;
    by sampnum time;
    retain last1 last2;
    if km0=. then q0=last1; if km0 ne . then q0=km0; *AF-free survival, Q0; 
    if km01=. then q01=last2; if km01 ne . then q01=km01; *Q0 + Q1;
    q1 = q01 - q0;
    output;
    last1=q0; last2=q01; 
run;

* The expected length of stay in state 0 or 1 is then estimated as the sum of the products of the length of the time intervals 
  and the value of q0 or q1;

data allrev_boot;
    set allrev_boot;
    if time > 3 then delete;
    retain elos0 elos1;
    dq0 = dif(time)*lag(q0);
    dq1 = dif(time)*lag(q1);
    if time = 0 then do; elos0 = 0; elos1 = 0; dq0 = .; dq1 = .; end;
    elos0 + dq0;
    elos1 + dq1;
run;

* We will only keep the result for the value of the expected lengths of stay up at 3 years in the data frame 'elos_est';

data elos_est;
    set allrev_boot;
    keep time elos0 elos1;
    where time = 3;
run;

* Finally, the mean and standard deviation is calculated;

proc means data=elos_est stddev mean;
    var elos0 elos1;
run;

Exercise 4.8

Consider the data on mortality in relation to childhood vaccinations in Guinea-Bissau, Example 1.1.2.

1.

Fit a marginal hazard model for the mortality rate, adjusting for cluster ‘(village)’ and including binary variables for BCG and DTP vaccination and adjusting for age at recruitment (i.e., using time since recruitment as time-variable).

The data should be loaded as bissau_data

Code show/hide
bissau_data <- read.csv("data/bissau.csv")
bissau_data$agem<-bissau_data$age/30.4

We then load the relevant packages

Code show/hide
library(tidyverse) #Data manipulations and plots
library(survival) #Core survival analysis routines

We use the coxph function to fit the marginal hazard model for the mortality rate adjusting for cluster and the variables BCG, DTP and age. We adjust for cluster by adding cluster(cluster) in the formula argument.

Code show/hide
# Fitting a marginal hazard model
fit481 <- coxph(Surv(fuptime,dead)~ bcg + dtp + agem + cluster(cluster), data=bissau_data)
summary(fit481)
Call:
coxph(formula = Surv(fuptime, dead) ~ bcg + dtp + agem, data = bissau_data, 
    cluster = cluster)

  n= 5274, number of events= 222 

         coef exp(coef) se(coef) robust se      z Pr(>|z|)   
bcg  -0.40978   0.66380  0.16685   0.15011 -2.730  0.00634 **
dtp   0.07331   1.07606  0.10366   0.09033  0.812  0.41706   
agem  0.04006   1.04088  0.04337   0.04236  0.946  0.34422   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

     exp(coef) exp(-coef) lower .95 upper .95
bcg     0.6638     1.5065    0.4946    0.8909
dtp     1.0761     0.9293    0.9015    1.2845
agem    1.0409     0.9607    0.9580    1.1310

Concordance= 0.549  (se = 0.017 )
Likelihood ratio test= 6.7  on 3 df,   p=0.08
Wald test            = 8.68  on 3 df,   p=0.03
Score (logrank) test = 6.69  on 3 df,   p=0.08,   Robust = 7.5  p=0.06

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).

We obtain a coefficient of -0.41 for BCG, 0.073 for DTP and 0.04 for age.

Code show/hide
* We first load the data;

proc import out=bissau_data
  datafile='data/bissau.csv' 
    dbms=csv replace;
    getnames=yes;
run;

* To make our results comparable to table 2.12 we must first convert the age variable from days to months.;

data bissau_data;
    set bissau_data;
    agem = age/30.4;
run;

/* We fit a marginal hazard model for the mortality rate adjusting for cluster and the variables BCG, DTP and age using the 'phreg'  procedure where we include 'covs(aggregate)' in the phreg statement to obtain robust SD estimates and 'cluster' in the id statement */

title '4.8.1';
proc phreg data=bissau_data covs(aggregate);
    class cluster;
    model fuptime*dead(0)= bcg dtp agem;
    id cluster;
run;

2.

Compare the results with those from the gamma frailty model (Exercise 3.12).

The estimates of the coefficients are almost identical with the ones obtained with the gamma frailty model. The standard deviations of the marginal hazards model are, however, a little bit smaller than the standard errors from the gamma frailty model.

Exercise 4.9

Consider the data on recurrent episodes in affective disorder, Example 1.1.5.

The data should be loaded as affective_data

Code show/hide
affective_data <- read.csv("data/affective.csv")

We then load the relevant packages

Code show/hide
library(tidyverse) #Data manipulations and plots
library(survival) #Core survival analysis routines
library(mets)
Code show/hide
proc import out=affective_data
    datafile='data/affective.csv' 
    dbms=csv replace;
    getnames=yes;
run;

1.

Estimate the mean number of episodes, \(\mu(t)\), in \([0; t]\) for unipolar and bipolar patients, taking the mortality into account.

We will estimate the mean number of episodes, \(\mu(t)\), in \([0; t]\) for unipolar and bipolar patients, taking the mortality into account by

\[\hat{\mu}(t) = \sum_{x \leq t} \hat{S}(x-)\frac{dN(x)}{Y(x)}\] We must first make a data set corresponding to the setting with cycles depicted in Figure 1.5, i.e., the interval in hospital is included in the time between events.

Code show/hide
# Creating data corresponding to set up depicted in Figure 1.5
data491 <- affective_data %>% group_by(id) %>% 
                mutate(prev1 = lag(start, n = 1, default = 0),  # moves start one line down
                       prev2 = lag(stop, n = 1, default = 0), # moves stop one line down
                       prev = ifelse(state == 1, prev2, prev1)) %>% # picks the displaced value of stop if hospitalized, otherwise the displaced value of start.
                filter(state == 0 | status %in% c(2,3))

We can estimate \(\hat{S}(t)\) with the survfit function from the survival package where we use the data set data491 and death (status = 2) as the event variable.

Code show/hide
#S(t)
kmfit491 <- survfit(Surv(prev, stop, status == 2) ~ strata(bip) + cluster(id), data = data491)
  
#S(t) for unipolar patients
S0 <- dplyr::lag(kmfit491$surv[1:(kmfit491$strata[1])], default = 1)

#S(t) for bipolar patients
S1 <- dplyr::lag(kmfit491$surv[(kmfit491$strata[1]+1):(kmfit491$strata[1] + kmfit491$strata[2])], default = 1)

We can also calculate \(\frac{dN(x)}{Y(x)}\) using the survfit function where we have admission to hospital (status = 1) as event variable.

Code show/hide
#A(t)
naafit491 <- survfit(Surv(prev, stop, status == 1) ~ strata(bip) + cluster(id), data = data491)

#dA(t) for unipolar patients
dA0 <- diff(naafit491$cumhaz[1:naafit491$strata[1]])

#dA(t) for bipolar patients
dA1 <- diff(naafit491$cumhaz[(kmfit491$strata[1]+1):(kmfit491$strata[1] + kmfit491$strata[2])])

Then we can calculate the estimate of m(t) for both patient groups and plot the result

Code show/hide
#m(t) for unipolar patients
mu0 <- cumsum(S0 * c(0, dA0))

#m(t) for bipolar patients
mu1 <- cumsum(S1 * c(0, dA1))

# Collecting the data
plotdata491 <- data.frame(time = kmfit491$time/12, 
                       mu = c(mu0, mu1), 
                       disorder = c(rep("Unipolar", length(mu0)), 
                               rep("Bipolar", length(mu1))))

# Plotting the result
fig491 <- ggplot(data = plotdata491) + geom_step(aes(x = time, y = mu, color = disorder)) + 
  xlab("Time since first admission (years)") + 
  ylab("Expected number of episodes") + 
  theme_bw()
fig491

The expected number of episodes is larger for bipolar patients at all times compared to unipolar patients.

Code show/hide
* We can estimate the mean number of episodes in [0,t] for unipolar and bipolar patients, taking the mortality into account 
  by equation (4.13).

* We make a data set correpsonding to the setting with cycles depicted in figure 1.5, i.e. the interval in hospital is 
  included in the time between events.;

title '4.9.1';
data data491; 
    set affective_data;
    by id;
    retain prev;
    if first.id then prev=0; 
    output; 
    if state=1 then prev=start; if state=0 then prev=stop;
run;

data data491;
    set data491;
    if state = 0 or status = 2 or status = 3;
run;

* Thus the entry and exit time are now 'prev' and 'stop';

* We can estimate S(X-) using the phreg procedure. Since the event of interest is death (status = 2) we include 'status(0 1 3)' as 
  censoring variables in the model statement. The result is saved as 'kmdata491';

proc phreg data=data491;
    class bip;
    model (prev,stop)*status(0 1 3)=;
    id id;
    strata bip;
    baseline out=kmdata491 survival=km;
run;

* Likewise, we can estimate dN(X)/Y(X) using the phreg procedure with censoring variables 'status(0 2 3)' in the model statement. The 
  result is saved as 'naadata491';

proc phreg data=data491;
    class bip;
    model (prev,stop)*status(0 2 3)=;
    id id;
    strata bip;
    baseline out=naadata491 cumhaz=naa;
run;

* We then create a data set for the unipolar patients and one for the bipolar patients containing the estimates of S(X-) and
  dN(X)/Y(X);

data naa_uni;
    set naadata491;
    if bip = 0;
run;

data km_uni; 
    set kmdata491;
    if bip = 0;
run;

data uni;
    merge naa_uni km_uni;
    by stop;
run;

data naa_bip;
    set naadata491;
    if bip = 1;
run;

data km_bip;
    set kmdata491;
    if bip = 1;
run;

data bip;
    merge naa_bip km_bip;
    by stop;
run;

* We then fill the empty cells in the data set with the previous value of S(X-) and dN(X)/Y(X);


data uni;
    set uni;
    retain _km _naa;
    if km ne . then _km = km;
    if naa ne . then _naa = naa;
    years = stop/12;
run;

data bip;
    set bip;
    retain _km _naa;
    if km ne . then _km = km;
    if naa ne . then _naa = naa;
    years = stop/12;
run;

* Finally, we estimate m(t) for unipolar and bipolar patients respectively;

data uni; 
    set uni;
    dA = dif(_naa); 
    if years = 0 then dA = 0;
    dmu = _km*dA;
    retain mu;
    mu + dmu;
    bip = 0;
    keep years _naa mu bip;
run;


data bip; 
    set bip;
    dA = dif(_naa); 
    if years = 0 then dA = 0;
    dmu = _km*dA;
    retain mu;
    mu + dmu;
    bip = 1;
    keep years _naa mu bip;
run;

* We merge the data sets for unipolar and bipolar patients and plot the data using the gplot procedure;

data plotdata491;
    set uni bip;
run;

title '4.9.1';
proc gplot data=plotdata491;
    plot mu*years=bip/ haxis=axis1 vaxis=axis2;
    axis1 order=0 to 30 by 5 minor=none label=('Years');
    axis2 order=0 to 10 by 2 minor=none label=(a=90 'Expected number of episodes');
    symbol1  v=none i=stepjl c=red;
    symbol2  v=none i=stepjl c=blue;
run;
quit;
  
* The expected number of episodes is larger for bipolar patients at all times compared to unipolar patients.;

* Less transparently, but in a way a lot easier, we can 'cheat' proc phreg to do the computations by
  fitting an empty Fine-Gray model and transform the cumulative sub-distribution hazard!;

proc phreg data=data491;
    model stop*status(0 3)=/entry=prev eventcode=1;
    strata bip;
    baseline out=mcfdata1 cif=naa1;
run;

data mcfdata1; set mcfdata1;
    cmf=-log(1-naa1);
    years=stop/12;
run;

proc gplot data=mcfdata1;
plot cmf*years=bip/haxis=axis1 vaxis=axis2;
axis1 order=0 to 30 by 5 minor=none label=('Years');
axis2 order=0 to 8 by 0.5 minor=none label=(a=90 'Expected number of episodes');
symbol1  v=none i=stepjl c=red;
symbol2  v=none i=stepjl c=blue;
run;
quit;

2.

Estimate, incorrectly, the same mean curves by treating death as censoring and compare with the correct curves from the first question, thereby, re-constructing the cover figure from this book (unipolar patients).

We will estimate the mean number of episodes in \([0,t]\) for unipolar and bipolar patients neglecting mortality with the Nelson-Aalen estimator

\[\hat{m}(t) = \sum_{x \leq t} \frac{dN(x)}{Y(x)},\] where \(x\) is event time.

This is exactly the Nelson-Aalen estimate obtained in naafit491 in the previous exercise. We will save this data as plotdata492

Code show/hide
# Collecting the data for plotting
plotdata492 <- data.frame(time = naafit491$time/12, 
                       mu = naafit491$cumhaz, 
                       disorder = c(rep("Unipolar", naafit491$strata[[1]]), 
                               rep("Bipolar", naafit491$strata[[2]])))

We will now plot the curves for the two estimates of the mean number of episodes for unipolar patients

Code show/hide
# Plotting the estimate of the expected number of episodes for patients with unipolar disorder
fig492_uni <- ggplot()  + geom_step(data = subset(plotdata492, disorder == "Unipolar"), aes(x = time, y = mu, color = "Not accounting for mortality")) + geom_step(data = subset(plotdata491, disorder == "Unipolar"), aes(x = time, y = mu, color = "Accounting for mortality")) +  ylab("Expected number of episodes") +
  xlab("Time since first admission (years)") +
  theme(legend.title=element_blank()) + ggtitle("Unipolar disorder")
fig492_uni

We repeat for the bipolar patients

Code show/hide
fig492_bip <- ggplot() + geom_step(data = subset(plotdata491, disorder == "Bipolar"), aes(x = time, y = mu, color = "Accounting for mortality")) + geom_step(data = subset(plotdata492, disorder == "Bipolar"), aes(x = time, y = mu, color = "Not accounting for mortality")) +   ylab("Expected number of episodes") +
  xlab("Time since first admission (years)") +
  theme(legend.title=element_blank()) + ggtitle("Bipolar disorder")
fig492_bip

For both unipolar and bipolar patients we get larger estimates of the mean number of episodes when we do not account for mortality.

Code show/hide
* We can estimate the expected number of episodes neglecting mortality by equation (4.11). This is 
  in fact the Nelson-Aalen estimate we saved in the data set 'naadata491';

* We use the gplot procedure to plot the two estimates of the expected number of episodes;

legend1 label = ('Accounting for mortality') value = ('Yes' 'No');

title '4.9.2 - unipolar';
proc gplot data=uni;
    plot mu*years _naa*years/ overlay haxis=axis1 vaxis=axis2 legend =legend1;
    axis1 order=0 to 30 by 5 minor=none label=('Years');
    axis2 order=0 to 6 by 2 minor=none label=(a=90 'Expected number of episodes');
    symbol1  v=none i=stepjl c=red;
    symbol2  v=none i=stepjl c=blue;
run;
quit;

title '4.9.2 - bipolar';
proc gplot data=bip;
    plot mu*years _naa*years/ overlay haxis=axis1 vaxis=axis2 legend =legend1;
    axis1 order=0 to 30 by 5 minor=none label=('Years');
    axis2 order=0 to 10 by 2 minor=none label=(a=90 'Expected number of episodes');
    symbol1  v=none i=stepjl c=red;
    symbol2  v=none i=stepjl c=blue;
run;
quit;

* For both unipolar and bipolar patients we get larger estimates of the mean number of episodes when we do not account for mortality.;

Exercise 4.10

Consider the data from the Copenhagen Holter study.

1.

Estimate the distribution, \(G(t)\) of censoring.

We will estimate the censoring distribution \(G(t)\) with the Kaplan-Meier estimator where censoring is the event and death acts as censoring.

Code show/hide
km4101 <- survfit(Surv(timedeath, 1 - death) ~ 1, data = chs_data)


# Plotting the Kaplan-Meier estimate
(fig41 <- ggplot() + geom_step(aes(x = km4101$time, y = km4101$surv), size = 1)  + 
          ylim(c(0,1)) +
          xlab("Time (years)") + 
          ylab("Probability of censoring")) 

Code show/hide
*We will estimate the censoring distribution G(t) with the Kaplan-Meier estimator where 'censoring' is the event and 'death' acts 
 as censoring.;


title "4.10.1";
proc phreg data=chs_data;
    model timedeath*death(1)=;
    baseline out=survdat survival=km;
run;

* Then the estimates are plotted using the gplot procedure;

proc gplot data=survdat;
plot km*timedeath/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 16 by 2 label=('Years');
    axis2 order=0 to 1 by 0.1 label=(a=90 'Censoring probability');
run;
quit;

2.

Examine to what extent this distribution depends on the variables ESVEA, sex, age, and systolic blood pressure.

To examine to what extent the censoring distribution depends on the variables ESVEA, sex, age, and systolic blood pressure, we will fit a Cox model including these covariates and where censoring is the event and death acts as censoring.

Code show/hide
cox4102 <- coxph(Surv(timedeath, 1- death) ~ esvea + sex + age + sbp, data = chs_data, ties = "breslow")
summary(cox4102)
Call:
coxph(formula = Surv(timedeath, 1 - death) ~ esvea + sex + age + 
    sbp, data = chs_data, ties = "breslow")

  n= 675, number of events= 416 
   (3 observations deleted due to missingness)

           coef exp(coef)  se(coef)      z Pr(>|z|)   
esvea  0.439265  1.551567  0.164580  2.669  0.00761 **
sex   -0.285701  0.751487  0.120830 -2.364  0.01805 * 
age   -0.019642  0.980550  0.009230 -2.128  0.03334 * 
sbp   -0.001516  0.998485  0.002169 -0.699  0.48466   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

      exp(coef) exp(-coef) lower .95 upper .95
esvea    1.5516     0.6445    1.1238    2.1422
sex      0.7515     1.3307    0.5930    0.9523
age      0.9806     1.0198    0.9630    0.9985
sbp      0.9985     1.0015    0.9942    1.0027

Concordance= 0.562  (se = 0.017 )
Likelihood ratio test= 11.54  on 4 df,   p=0.02
Wald test            = 11.75  on 4 df,   p=0.02
Score (logrank) test = 11.8  on 4 df,   p=0.02

The \(p\)-values indicate that censoring depends on ESVEA, sex, and age but not on systolic blood pressure.

Code show/hide
*To examine to what extent the censoring distribution depends on the variables ESVEA, sex, age, and systolic blood pressure we will
 fit a Cox model including these covariates and where 'censoring' is the event and 'death' acts as censoring.;

title "4.10.2";
proc phreg data=chs_data;
    model timedeath*death(1)=esvea sex age sbp;
run;

*The p-values indicate that censoring depends strongly on ESVEA, sex, and age but not on systolic blood pressure.;