2  Intuition for intensity models

PBC3 trial in liver cirrhosis

Read data

Code show/hide
pbc3 <- read.csv("data/pbc3.csv")
pbc3$fail <- ifelse(pbc3$status != 0, 1, 0) # event/failure indicator
pbc3$tment_char <- ifelse(pbc3$tment == 0, "Placebo", "CyA")

# Add transformations of covariates 
pbc3$albnorm <- with(pbc3, (alb-35)*(alb>35))
pbc3$alb10 <- with(pbc3, alb/10)
pbc3$alb2 <- with(pbc3, alb10*alb10)

pbc3$bilihigh <- with(pbc3, (bili-17.1)*(bili>17.1))
pbc3$bilitoohigh <- with(pbc3, (bili-34.2)*(bili>34.2))
pbc3$bilimuchtoohigh <- with(pbc3, (bili-51.3)*(bili>51.3))
pbc3$bili100 <- with(pbc3, bili/100)
pbc3$bili2 <- with(pbc3, bili100*bili100)

pbc3$log2bili <- with(pbc3, log2(bili))
pbc3$log2bili2 <- with(pbc3, log2bili*log2bili)

pbc3$logbilihigh <- with(pbc3, (log2bili-log2(17.1))*(bili>17.1))
pbc3$logbilitoohigh <- with(pbc3, (log2bili-log2(34.2))*(bili>34.2))
pbc3$logbilimuchtoohigh <- with(pbc3, (log2bili-log2(51.3))*(bili>51.3))
Code show/hide
proc import out=pbc3
    datafile="data/pbc3.csv"
    dbms=csv replace;
run;
data pbc3; 
    set pbc3;
    albnorm=(alb-35)*(alb>35);
    alb10=alb/10;
    alb2=alb*alb;
    bilihigh=(bili-17.1)*(bili>17.1);
    bilitoohigh=(bili-34.2)*(bili>34.2);
    bilimuchtoohigh=(bili-51.3)*(bili>51.3);
    bili100=bili/100;
    bili2=bili*bili;
    log2bili=log2(bili);
    logbilihigh=(log2bili-log2(17.1))*(bili>17.1);
    logbilitoohigh=(log2bili-log2(34.2))*(bili>34.2);
    logbilimuchtoohigh=(log2bili-log2(51.3))*(bili>51.3);
    log2bili2=log2bili*log2bili;
run;

Figure 2.1

Code show/hide
# General plotting style 
library(ggplot2)
theme_general <- theme_bw() +
  theme(text = element_text(size = 20), 
        axis.text.x = element_text(size = 20), 
        axis.text.y = element_text(size = 20),
        legend.position = "bottom", 
        legend.title=element_blank(),
        legend.text = element_text(size = 20),
        legend.key.size = unit(2,"line"))

# Nelson Aalen estimate by treatment
library(survival)
nafit <- survfit(Surv(days, status != 0) ~ tment, data = pbc3)
# Collect data for plot
nadata <- data.frame(cumhaz = nafit$cumhaz, time = nafit$time, 
                     sd = nafit$std.err,
                     tment = c(rep(names(nafit$strata)[1], nafit$strata[1]), 
                               rep(names(nafit$strata)[2], nafit$strata[2])))

# Create Figure 2.1
library(ggplot2)
fig2.1 <- ggplot(aes(x = time / 365.25, y = cumhaz, linetype = tment),
                 data = nadata) + 
  geom_step(size = 1) + 
  scale_linetype_discrete("Treatment", labels = c("Placebo", "CyA")) + 
  xlab("Time since randomization (years)") + 
  ylab("Cumulative hazard") + 
  scale_x_continuous(expand = expansion(mult = c(0.001, 0.05)),
                     limits = c(0, 6), breaks = seq(0, 6, 1))  + 
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.05))) +
  theme_general 

fig2.1

Code show/hide
proc phreg data=pbc3; 
    model days*status(0)=;
    strata tment;
    baseline out=hazdat cumhaz=naa stdcumhaz=sdnaa;
run;
data hazdat; 
    set hazdat; 
    daysyears = days/365.25; 
run; 
proc gplot data=hazdat;
    plot naa*daysyears=tment/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 6 by 1 minor=none
    label=('Years');
    axis2 order=0 to 0.7 by 0.1 minor=none
    label=(a=90 'Cumulative hazard');
    symbol1  v=none i=stepjl c=red;
    symbol2  v=none i=stepjl c=blue;
run;

In-text, p. 37: Nelson-Aalen estimates at 2 years

Code show/hide
tail(subset(nadata, nadata$time <= 2*365.25 & nadata$tment == "tment=0"),1)
     cumhaz time         sd   tment
60 0.183065  724 0.03560398 tment=0
Code show/hide
tail(subset(nadata, nadata$time <= 2*365.25 & nadata$tment == "tment=1"),1)
       cumhaz time         sd   tment
209 0.1668539  725 0.03449431 tment=1
Code show/hide
proc print data=hazdat;
    where 1.5<daysyears<=2; 
run;

Figure 2.2

Code show/hide
# Rates + cuts from Table 2.1
rateCyA <- c(8.1,13.1,9.6)
ratePbo <- c(9.4,12.5,8.5)
pcwtime <- c(0,2,4,5)
# Collect data
plotdata <- data.frame(rates = c(rateCyA, ratePbo),
                       tment = c(rep("CyA", length(rateCyA)), 
                                 rep("Placebo", length(ratePbo))),
                       times_s = rep(pcwtime[-4], 2),
                       times = rep(pcwtime[-1], 2))

# Create Figure 2.2
fig2.2 <- ggplot(aes(x = time, y = rates, linetype = tment), data = plotdata) +
  geom_segment(aes(x = times_s, y = rates, xend = times, yend = rates),size=1) +
  scale_linetype_discrete("Treatment", labels = c("Placebo", "CyA"))  +
  xlab("Time since randomization (years)") +
  ylab("Estimated hazard function (per 100 years)") +
  scale_x_continuous(expand = expansion(mult = c(0.001, 0.05)),
                     limits = c(0, 5),
                     breaks = seq(0, 5, 1))  +
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     limits = c(0,14), breaks = seq(0, 14, 2)) +
  theme_general

fig2.2

Table 2.1

Code show/hide
# Data for Table 2.1
cuts <- c(0, 2, 4) * 365.25
# Make the data ready using survSplit from survival package
library(survival)
pbc3mult <- as.data.frame(survSplit(Surv(days, fail) ~ ., pbc3,
                      cut = cuts[-1], 
                      episode = "timegroup"))
pbc3mult$risktime <- pbc3mult$days - cuts[pbc3mult$timegroup] 
pbc3mult$timegroup <- as.factor(pbc3mult$timegroup)

library(dplyr)
pcwdata <- pbc3mult %>% 
  group_by(tment, timegroup) %>% 
  summarise(fail = sum(fail), risktime = sum(days - cuts[timegroup]))
pcwdata$risktimeyrs <- pcwdata$risktime/365.25
pcwdata$hazard <- 36525*pcwdata$fail/pcwdata$risktime
pcwdata$SD <- 36525*sqrt(pcwdata$fail)/pcwdata$risktime
data.frame(pcwdata)
  tment timegroup fail risktime risktimeyrs    hazard       SD
1     0         1   27 104856.0   287.08008  9.405041 1.810001
2     0         2   17  49673.0   135.99726 12.500252 3.031756
3     0         3    2   8642.0    23.66051  8.452904 5.977106
4     1         1   24 107931.5   295.50034  8.121818 1.657859
5     1         2   18  50284.5   137.67146 13.074605 3.081714
6     1         3    2   7599.0    20.80493  9.613107 6.797493
Code show/hide
/* prepare for poisson model */
data pbc3mult; 
    set pbc3;
    fail=(days<=2 * 365.25)*(status ne 0);
    risktime=min(2 * 365.25, days);
    logrisk=log(risktime); interv=1; output;  
    if days>2 * 365.25 then do;
    fail=(days<=4 * 365.25)*(status ne 0);
    risktime=min(2 * 365.25 ,days-2 * 365.25);
    logrisk=log(risktime); interv=2; output; end;
    if days>4 * 365.25  then do;
    fail=status ne 0; 
    risktime=days-4 * 365.25;
    logrisk=log(risktime); interv=3; output; end;
run;
proc means sum  data=pbc3mult;
    class tment interv;
    var fail risktime;
run;

proc genmod data=pbc3mult;
    where tment=1;
    class interv;
    model fail=interv/dist=poi offset=logrisk;
    estimate '0-2 years' intercept 1  interv 1 0 0/exp;
    estimate '2-4 years' intercept 1  interv 0 1 0/exp;
    estimate '4-6 years' intercept 1  interv 0 0 1/exp;
run;
proc genmod data=pbc3mult;
    where tment=0;
    class interv;
    model fail=interv/dist=poi offset=logrisk;
    estimate '0-2 years' intercept 1  interv 1 0 0/exp;
    estimate '2-4 years' intercept 1  interv 0 1 0/exp;
    estimate '4-6 years' intercept 1  interv 0 0 1/exp;
run;

In-text, p. 38: LRT test piece-wise exponential model

Code show/hide
library(stats)
poisfull <- glm(fail ~ tment + timegroup + tment*timegroup +
                  offset(log(risktime/36525)), data = pbc3mult, family=poisson)
poisred <- glm(fail ~  timegroup +
                 offset(log(risktime/36525)), data = pbc3mult, family=poisson)
anova(poisred,poisfull,test="LRT")
Analysis of Deviance Table

Model 1: fail ~ timegroup + offset(log(risktime/36525))
Model 2: fail ~ tment + timegroup + tment * timegroup + offset(log(risktime/36525))
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1       620     459.66                     
2       617     459.35  3  0.30813   0.9585
Code show/hide
# or
library(lmtest)
lrtest(poisred,poisfull)
Likelihood ratio test

Model 1: fail ~ timegroup + offset(log(risktime/36525))
Model 2: fail ~ tment + timegroup + tment * timegroup + offset(log(risktime/36525))
  #Df  LogLik Df  Chisq Pr(>Chisq)
1   3 -319.83                     
2   6 -319.68  3 0.3081     0.9585
Code show/hide
ods output modelfit=full;
proc genmod data=pbc3mult;
    class interv tment;
    model fail=interv|tment /dist=poi offset=logrisk;
run;
data full;
  set full;
     where Criterion="Deviance";
     full_like=value;
     full_df=df;
     keep full_like full_df;
run;
proc print;run;
ods output modelfit=reduced;
proc genmod data=pbc3mult;
    class interv tment;
    model fail=interv /dist=poi offset=logrisk;
run;
data reduced;
  set reduced;
     where Criterion="Deviance";
     reduced_like=value;
     reduced_df=df;
     keep reduced_like reduced_df;
run;
data lrt_pval;
 merge full reduced;
  lrt = reduced_like - full_like;
    df  = reduced_df - full_df;
  p_value = 1 - probchi(lrt,df);
run;
proc print data=lrt_pval;
  title "LR test statistic and p-value";
run;

In-text, p. 40: Logrank test and p-value

Code show/hide
library(survival)
lr<-survdiff(Surv(days, status != 0) ~ tment, data = pbc3)
print(c(lr$obs[1],lr$exp[1],lr$var[1,1],lr$chisq,lr$pvalue), d=4)
[1] 46.00000 44.68367 22.47967  0.07708  0.78129
Code show/hide
proc lifetest data=pbc3 notable; 
    time days*status(0);
    strata tment;
run;

Figure 2.3

Code show/hide
# NA data from figure 2.1 model fit
tment1 <- subset(nadata, tment == "tment=1")
# Estimated hazard per time group
pcwdata$hazard_timegroup <- pcwdata$fail / pcwdata$risktime
# Add a numeric version of the treatment to the NA estimates
nadata$tmentnum <- ifelse(nadata$tment == "tment=1", 1, 0)
# Add piecewise constant hazard to data
nadata$pwch <- NULL
# Between time 0 and 2
nadata$pwch[nadata$time <= 2 * 365.25] <- nadata$time[nadata$time <= 2 * 365.25] *
  (pcwdata$hazard_timegroup[1] * (1-nadata$tmentnum[nadata$time <= 2 * 365.25]) +
     pcwdata$hazard_timegroup[4] * (nadata$tmentnum[nadata$time <= 2 * 365.25]))
# Between time 2 and 4
nadata$pwch[nadata$time > 2 * 365.25 & nadata$time <= 4* 365.25 ] <- 2 * 365.25 * 
  (pcwdata$hazard_timegroup[1] * (1-nadata$tmentnum[nadata$time > 2 * 365.25 & nadata$time <= 4 * 365.25]) + 
     pcwdata$hazard_timegroup[4] * (nadata$tmentnum[nadata$time > 2 * 365.25 & nadata$time <= 4 * 365.25])) + 
  (nadata$time[nadata$time > 2 * 365.25 & nadata$time <= 4 * 365.25] - 2 * 365.25) * 
  (pcwdata$hazard_timegroup[2] * (1-nadata$tmentnum[nadata$time > 2 * 365.25 & nadata$time <= 4 * 365.25]) + 
     pcwdata$hazard_timegroup[5] * (nadata$tmentnum[nadata$time > 2 * 365.25 & nadata$time <= 4 * 365.25]))
# After time 4
nadata$pwch[nadata$time > 4 * 365.25] <- 2 * 365.25 * 
  (pcwdata$hazard_timegroup[1] * (1-nadata$tmentnum[nadata$time > 4 * 365.25]) + 
     pcwdata$hazard_timegroup[4] * (nadata$tmentnum[nadata$time > 4 * 365.25])) + 
   2 * 365.25 * 
  (pcwdata$hazard_timegroup[2] * (1-nadata$tmentnum[nadata$time > 4 * 365.25]) + 
     pcwdata$hazard_timegroup[5] * (nadata$tmentnum[nadata$time > 4 * 365.25])) + 
  (nadata$time[nadata$time > 4 * 365.25] - 4 * 365.25) * 
  (pcwdata$hazard_timegroup[3] * (1-nadata$tmentnum[nadata$time > 4 * 365.25]) + 
     pcwdata$hazard_timegroup[6] * (nadata$tmentnum[nadata$time > 4 * 365.25]))
# Reformat for plot
piecepdata <- data.frame(cumhaz = c(nadata$cumhaz, nadata$pwch), 
                         time = rep(nadata$time, 2),
                         tmentnum = rep(nadata$tmentnum, 2),
                         type = c(rep("Nelson-Aalen", length(nadata$time)), 
                                  rep("Piece-wise exponential", length(nadata$time))))
# Only for treatment 1
piecepdata1 <- subset(piecepdata, tmentnum == 1)

# Create Figure 2.3
fig2.3 <- ggplot(aes(x = time / 365.25, y = cumhaz, linetype = type), 
                data = subset(piecepdata1, type == "Nelson-Aalen")) + 
  geom_step(size = 1) + 
  geom_line(aes(x = time / 365.25, y = cumhaz, linetype = type), size = 1,
            data = subset(piecepdata1, type == "Piece-wise exponential")) + 
  labs(linetype = "Type") + 
  xlab("Time since randomization (years)") + 
  ylab("Cumulative hazard") + 
  scale_x_continuous(expand = expansion(mult = c(0.001, 0.05)),
                     limits = c(0, 6),
                     breaks = seq(0, 6, 1))  + 
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.05))) +
  theme_general+
  theme(legend.title=element_blank(),
        legend.text = element_text(size = 20),
        legend.key.size = unit(2,"line"))

fig2.3

Code show/hide
data hazdat; 
    set hazdat;
    if days<=2 * 365.25 then 
        pwch=days*(27.0000000/104856*(1-tment)+24.0000000/107931.5*tment);

    if 2 * 365.25 <days<=4 * 365.25 then
        pwch=2* 365.25*(27.0000000/104856*(1-tment)+24.0000000/107931.5*tment)
        +(days-2* 365.25)*(17.0000000/49673*(1-tment)+18.0000000/50284*tment);

    if 4 * 365.25 <days then
        pwch=2* 365.25 *(27.0000000/104856*(1-tment)+24.0000000/107931.5*tment)
        +(2* 365.25)*(17.0000000/49673*(1-tment)+18.0000000/50284*tment)
        +(days-4* 365.25)*(2.0000000/8642*(1-tment)+2.0000000/7599*tment);
    daysyears = days/365.25; 
run; 

proc gplot data=hazdat; where tment=1;
    plot (naa pwch)*daysyears/overlay haxis=axis1 vaxis=axis2;
    axis1 order=0 to 6 by 1 minor=none label=('Years');
    axis2 order=0 to 0.7 by 0.1 minor=none
      label=(a=90 'Cumulative hazard');
    symbol1  v=none i=stepjl r=1 c=red;
    symbol2 v=none i=join r=1 c=blue;
run;

In-text, p. 43: Cox model with treatment only

Code show/hide
# Fit a Cox model using the pbc3 data with treatment as a covariate
library(survival)
coxfit <- coxph(Surv(days, status != 0) ~ tment, data = pbc3, method = "breslow")
summary(coxfit)
Call:
coxph(formula = Surv(days, status != 0) ~ tment, data = pbc3, 
    method = "breslow")

  n= 349, number of events= 90 

          coef exp(coef) se(coef)      z Pr(>|z|)
tment -0.05854   0.94314  0.21092 -0.278    0.781

      exp(coef) exp(-coef) lower .95 upper .95
tment    0.9431       1.06    0.6238     1.426

Concordance= 0.517  (se = 0.029 )
Likelihood ratio test= 0.08  on 1 df,   p=0.8
Wald test            = 0.08  on 1 df,   p=0.8
Score (logrank) test = 0.08  on 1 df,   p=0.8
Code show/hide
proc phreg data=pbc3;
    model days*status(0)=tment/rl;
run;

Figure 2.5

Code show/hide
# Extract cumulative baseline hazard
coxcumhaz <- basehaz(coxfit, centered = FALSE)
# Collect data for plot (Breslow estimate)
coxdata <- data.frame(cumhaz = coxcumhaz$hazard, 
                      time = coxcumhaz$time, 
                      tment = rep("0", nrow(coxcumhaz)), 
                      type = rep("Breslow estimate", nrow(coxcumhaz)))

fig2.5 <- ggplot(aes(x = time / 365.25, y = cumhaz, linetype = tment),
                 data = coxdata) + 
  geom_step(size = 1) + 
  xlab("Time since randomization (years)") + 
  ylab("Cumulative baseline hazard") + 
  scale_linetype_discrete("Treatment", labels = c("Placebo")) + 
  scale_x_continuous(expand = expansion(mult = c(0.001, 0.05)),
                     limits = c(0, 6),
                     breaks = seq(0, 6, 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.05))) +
  theme_general+theme(legend.position = "none")

fig2.5

Code show/hide
data cov;
    tment=0;
run;
proc phreg data=pbc3;
    model days*status(0)=tment/rl;
    baseline out=breslow cumhaz=breslow covariates=cov;
run;
data breslow; 
    set breslow; 
    daysyears = days/365.25; 
run; 
proc gplot data=breslow;
    plot breslow*daysyears=tment/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 6 by 1 minor=none label=('Years');
    axis2 order=0 to 0.7 by 0.1 minor=none label=(a=90 'Cumulative baseline hazard');
    symbol1  v=none i=stepjl c=blue;
run;

Table 2.3

Code show/hide
# Average covariate values of albumin and bilirubin per treatment 
library(dplyr)
tabledata <- pbc3 %>% 
  group_by(tment, tment_char) %>% 
  summarise(n = sum(id != 0),  
            average_albumin = mean(alb, na.rm = TRUE), 
            # NOTE: Removes missing observations from mean computation
            average_bilirubin = mean(bili, na.rm = TRUE), 
            )
data.frame(tabledata)
  tment tment_char   n average_albumin average_bilirubin
1     0    Placebo 173        39.26101          42.33674
2     1        CyA 176        37.50731          48.55540
Code show/hide
proc means data=pbc3 mean;
  class tment;
    var alb bili;
run;

Table 2.4

Code show/hide
# Cox model with treatment, albumin and bilirubin as covariates 
library(survival)
coxfit <- coxph(Surv(days, status != 0) ~ tment + alb + bili,
                data = pbc3)
coxfit
Call:
coxph(formula = Surv(days, status != 0) ~ tment + alb + bili, 
    data = pbc3)

            coef  exp(coef)   se(coef)      z        p
tment -0.4965639  0.6086184  0.2256162 -2.201   0.0277
alb   -0.1156596  0.8907784  0.0212810 -5.435 5.48e-08
bili   0.0089494  1.0089895  0.0009801  9.131  < 2e-16

Likelihood ratio test=99.06  on 3 df, p=< 2.2e-16
n= 343, number of events= 88 
   (6 observations deleted due to missingness)
Code show/hide
proc phreg data=pbc3;
    model days*status(0)=tment alb bili / rl;
run;

In-text, p. 45: Poisson model with treatment

Code show/hide
library(stats)
library(jtools) # For using summ() function to get exp(beta)
options(contrasts=c("contr.treatment", "contr.poly"))
poism <- glm(fail ~ -1 +tment + timegroup +
               offset(log(risktime/365.25)), data = pbc3mult,family = poisson)
summ(poism, exp = T,digits = 3, model.info = F, model.fit = F)
exp(Est.) 2.5% 97.5% z val. p
tment 0.942 0.623 1.424 -0.284 0.776
timegroup1 0.090 0.064 0.127 -13.801 0.000
timegroup2 0.132 0.089 0.194 -10.242 0.000
timegroup3 0.092 0.034 0.251 -4.677 0.000
Standard errors: MLE
Code show/hide
proc genmod data=pbc3mult;
  class tment(ref="0") interv ;
  model fail = tment interv / dist=poi offset=logrisk;
    estimate 'CyA  vs placebo' tment 1 -1 / exp;
  estimate '0-2 years' intercept 1 tment 0 1 interv 1 0 0 / exp;
  estimate '2-4 years' intercept 1 tment 0 1 interv 0 1 0 / exp;
  estimate '4+ years'  intercept 1 tment 0 1 interv 0 0 1 / exp;
run;

Table 2.5

Code show/hide
# Poisson model with treatment, albumin and bilirubin as covariates
poismod_t25 <- glm(fail ~ tment + alb + bili+ timegroup-1 +
                     offset(log(risktime/365.25)),data=pbc3mult,family=poisson)
summary(poismod_t25)

Call:
glm(formula = fail ~ tment + alb + bili + timegroup - 1 + offset(log(risktime/365.25)), 
    family = poisson, data = pbc3mult)

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
tment      -0.4753406  0.2240970  -2.121  0.03391 *  
alb        -0.1123359  0.0212829  -5.278  1.3e-07 ***
bili        0.0084593  0.0009392   9.007  < 2e-16 ***
timegroup1  1.2879891  0.8058179   1.598  0.10996    
timegroup2  2.1457404  0.8328628   2.576  0.00999 ** 
timegroup3  1.6076330  1.0120683   1.588  0.11218    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 1637.87  on 611  degrees of freedom
Residual deviance:  350.73  on 605  degrees of freedom
  (12 observations deleted due to missingness)
AIC: 538.73

Number of Fisher Scoring iterations: 6
Code show/hide
proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= tment alb  bili interv / dist=poi offset=logrisk;
    estimate "alpha1" intercept 1 tment 0 1 alb 0 bili 0 interv 1 0 0 ;
    estimate "alpha2" intercept 1 tment 0 1 alb 0 bili 0 interv 0 1 0 ;
    estimate "alpha3" intercept 1 tment 0 1 alb 0 bili 0 interv 0 0 1 ;
run;

* proc icphreg can fit the same model with less code and data manipulations;
data pbc3; 
  set pbc3;
    years=days/365.25;
run;
proc icphreg data=pbc3;
  class tment(ref="0");
    model years*status(0) = tment alb bili / 
        basehaz=pch(intervals=(2 4));
run;

Table 2.6

Code show/hide
#------------------------------------------------------------------#
# Table 2.6 Cox models 
#------------------------------------------------------------------#

# Models for LR tests
library(lmtest)
base_coxmod <- coxph(Surv(days, status != 0) ~ tment + alb + bili, 
                     eps = 1e-8, method = "breslow", data = pbc3)
base_coxmod_log <- coxph(Surv(days, status != 0) ~ tment + alb + log2bili,
                         eps = 1e-8, method = "breslow", data = pbc3)

# Splines Cox 1
coxmod_t26_l1 <- coxph(Surv(days, status != 0) ~ tment + alb + albnorm + bili,
                       eps = 1e-8, method = "breslow", data = pbc3)
coxmod_t26_l1
Call:
coxph(formula = Surv(days, status != 0) ~ tment + alb + albnorm + 
    bili, data = pbc3, method = "breslow", eps = 1e-08)

              coef  exp(coef)   se(coef)      z      p
tment   -0.4750722  0.6218402  0.2261462 -2.101 0.0357
alb     -0.0854244  0.9181225  0.0453150 -1.885 0.0594
albnorm -0.0557001  0.9458228  0.0726447 -0.767 0.4432
bili     0.0089810  1.0090214  0.0009843  9.124 <2e-16

Likelihood ratio test=99.66  on 4 df, p=< 2.2e-16
n= 343, number of events= 88 
   (6 observations deleted due to missingness)
Code show/hide
lrtest(coxmod_t26_l1, base_coxmod)
Likelihood ratio test

Model 1: Surv(days, status != 0) ~ tment + alb + albnorm + bili
Model 2: Surv(days, status != 0) ~ tment + alb + bili
  #Df  LogLik Df  Chisq Pr(>Chisq)
1   4 -413.12                     
2   3 -413.42 -1 0.6001     0.4386
Code show/hide
# Quadratic Cox 1
coxmod_t26_q1 <- coxph(Surv(days, status != 0) ~ tment + alb + I(alb*alb) + bili,
                       eps = 1e-8, method = "breslow", data = pbc3)
coxmod_t26_q1
Call:
coxph(formula = Surv(days, status != 0) ~ tment + alb + I(alb * 
    alb) + bili, data = pbc3, method = "breslow", eps = 1e-08)

                   coef  exp(coef)   se(coef)      z      p
tment        -0.4983093  0.6075570  0.2274284 -2.191 0.0284
alb          -0.1295361  0.8785029  0.2130539 -0.608 0.5432
I(alb * alb)  0.0001944  1.0001945  0.0029754  0.065 0.9479
bili          0.0089499  1.0089901  0.0009801  9.132 <2e-16

Likelihood ratio test=99.07  on 4 df, p=< 2.2e-16
n= 343, number of events= 88 
   (6 observations deleted due to missingness)
Code show/hide
lrtest(coxmod_t26_q1, base_coxmod)
Likelihood ratio test

Model 1: Surv(days, status != 0) ~ tment + alb + I(alb * alb) + bili
Model 2: Surv(days, status != 0) ~ tment + alb + bili
  #Df  LogLik Df  Chisq Pr(>Chisq)
1   4 -413.41                     
2   3 -413.42 -1 0.0042      0.948
Code show/hide
# Splines Cox 2
coxmod_t26_l2 <- coxph(Surv(days, status != 0) ~ tment + alb + 
                         bili + bilihigh + bilitoohigh + bilimuchtoohigh,
                       eps = 1e-8, method = "breslow", data = pbc3)
coxmod_t26_l2
Call:
coxph(formula = Surv(days, status != 0) ~ tment + alb + bili + 
    bilihigh + bilitoohigh + bilimuchtoohigh, data = pbc3, method = "breslow", 
    eps = 1e-08)

                     coef exp(coef)  se(coef)      z        p
tment           -0.598494  0.549639  0.229637 -2.606  0.00915
alb             -0.090970  0.913045  0.021894 -4.155 3.25e-05
bili             0.062425  1.064414  0.062296  1.002  0.31631
bilihigh        -0.014631  0.985476  0.085327 -0.171  0.86386
bilitoohigh     -0.002584  0.997419  0.052961 -0.049  0.96109
bilimuchtoohigh -0.040017  0.960773  0.026486 -1.511  0.13082

Likelihood ratio test=123.5  on 6 df, p=< 2.2e-16
n= 343, number of events= 88 
   (6 observations deleted due to missingness)
Code show/hide
lrtest(coxmod_t26_l2, base_coxmod)
Likelihood ratio test

Model 1: Surv(days, status != 0) ~ tment + alb + bili + bilihigh + bilitoohigh + 
    bilimuchtoohigh
Model 2: Surv(days, status != 0) ~ tment + alb + bili
  #Df  LogLik Df  Chisq Pr(>Chisq)    
1   6 -401.22                         
2   3 -413.42 -3 24.396  2.064e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code show/hide
# Quadratic Cox 2
coxmod_t26_q2 <- coxph(Surv(days, status != 0) ~ tment+alb+bili + I(bili*bili),
                       eps = 1e-8, method = "breslow", data = pbc3)
coxmod_t26_q2
Call:
coxph(formula = Surv(days, status != 0) ~ tment + alb + bili + 
    I(bili * bili), data = pbc3, method = "breslow", eps = 1e-08)

                     coef  exp(coef)   se(coef)      z        p
tment          -5.210e-01  5.939e-01  2.233e-01 -2.333 0.019640
alb            -1.019e-01  9.031e-01  2.168e-02 -4.703 2.56e-06
bili            1.998e-02  1.020e+00  3.263e-03  6.122 9.21e-10
I(bili * bili) -3.051e-05  1.000e+00  9.098e-06 -3.354 0.000798

Likelihood ratio test=111.4  on 4 df, p=< 2.2e-16
n= 343, number of events= 88 
   (6 observations deleted due to missingness)
Code show/hide
lrtest(coxmod_t26_q2, base_coxmod)
Likelihood ratio test

Model 1: Surv(days, status != 0) ~ tment + alb + bili + I(bili * bili)
Model 2: Surv(days, status != 0) ~ tment + alb + bili
  #Df  LogLik Df  Chisq Pr(>Chisq)    
1   4 -407.24                         
2   3 -413.42 -1 12.351  0.0004408 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code show/hide
# Splines Cox 3
coxmod_t26_l3 <- coxph(Surv(days, status != 0) ~ tment + alb + log2bili +
                         logbilihigh + logbilitoohigh + logbilimuchtoohigh,
                       eps = 1e-8, method = "breslow", data = pbc3)
coxmod_t26_l3
Call:
coxph(formula = Surv(days, status != 0) ~ tment + alb + log2bili + 
    logbilihigh + logbilitoohigh + logbilimuchtoohigh, data = pbc3, 
    method = "breslow", eps = 1e-08)

                       coef exp(coef) se(coef)      z        p
tment              -0.58019   0.55979  0.22957 -2.527   0.0115
alb                -0.08852   0.91528  0.02182 -4.056 4.99e-05
log2bili            0.20135   1.22306  0.46511  0.433   0.6651
logbilihigh         0.93567   2.54893  0.91503  1.023   0.3065
logbilitoohigh     -0.38647   0.67945  1.29333 -0.299   0.7651
logbilimuchtoohigh -0.18140   0.83410  0.98801 -0.184   0.8543

Likelihood ratio test=121.6  on 6 df, p=< 2.2e-16
n= 343, number of events= 88 
   (6 observations deleted due to missingness)
Code show/hide
lrtest(coxmod_t26_l3, base_coxmod_log)
Likelihood ratio test

Model 1: Surv(days, status != 0) ~ tment + alb + log2bili + logbilihigh + 
    logbilitoohigh + logbilimuchtoohigh
Model 2: Surv(days, status != 0) ~ tment + alb + log2bili
  #Df  LogLik Df  Chisq Pr(>Chisq)
1   6 -402.13                     
2   3 -402.94 -3 1.6144     0.6561
Code show/hide
# Quadratic Cox 3
coxmod_t26_q3 <- coxph(Surv(days, status != 0) ~ tment+alb+log2bili + log2bili2,
                       eps = 1e-8, method = "breslow", data = pbc3)
coxmod_t26_q3
Call:
coxph(formula = Surv(days, status != 0) ~ tment + alb + log2bili + 
    log2bili2, data = pbc3, method = "breslow", eps = 1e-08)

              coef exp(coef) se(coef)      z        p
tment     -0.57408   0.56323  0.22454 -2.557   0.0106
alb       -0.09150   0.91256  0.02192 -4.175 2.98e-05
log2bili   0.58231   1.79017  0.49992  1.165   0.2441
log2bili2  0.00715   1.00717  0.04277  0.167   0.8672

Likelihood ratio test=120  on 4 df, p=< 2.2e-16
n= 343, number of events= 88 
   (6 observations deleted due to missingness)
Code show/hide
lrtest(coxmod_t26_q3, base_coxmod_log)
Likelihood ratio test

Model 1: Surv(days, status != 0) ~ tment + alb + log2bili + log2bili2
Model 2: Surv(days, status != 0) ~ tment + alb + log2bili
  #Df  LogLik Df  Chisq Pr(>Chisq)
1   4 -402.93                     
2   3 -402.94 -1 0.0277     0.8677
Code show/hide
#------------------------------------------------------------------#
# Table 2.6 Poisson models 
#------------------------------------------------------------------#

# Make the data ready for Poisson models using survSplit from survival package
library(survival)
pbc3mult <- as.data.frame(survSplit(Surv(days, fail) ~ ., pbc3,
                                    cut = cuts[-1], 
                                    episode = "timegroup"))
pbc3mult$risktime <- pbc3mult$days - cuts[pbc3mult$timegroup] 
pbc3mult$timegroup <- as.factor(pbc3mult$timegroup)

# Models for LR test
base_poismod <- glm(fail ~ timegroup + tment + alb + bili +
                      offset(log(risktime)), data = pbc3mult, family = poisson)
base_poismod_log <- glm(fail ~ timegroup + tment + alb + log2bili +
                          offset(log(risktime)), data=pbc3mult,family=poisson)

# Splines Poisson 1
poismod_t26_l1 <- glm(fail ~ timegroup + tment + alb + albnorm + bili +
                        offset(log(risktime)), data=pbc3mult, family=poisson)
summary(poismod_t26_l1)

Call:
glm(formula = fail ~ timegroup + tment + alb + albnorm + bili + 
    offset(log(risktime)), family = poisson, data = pbc3mult)

Coefficients:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept) -5.1177409  1.6273358  -3.145  0.00166 ** 
timegroup1  -0.3106216  0.6042733  -0.514  0.60722    
timegroup2   0.5433694  0.6022062   0.902  0.36690    
tment       -0.4576984  0.2247281  -2.037  0.04168 *  
alb         -0.0864388  0.0451742  -1.913  0.05569 .  
albnorm     -0.0473832  0.0720301  -0.658  0.51065    
bili         0.0084777  0.0009419   9.001  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 449.14  on 610  degrees of freedom
Residual deviance: 350.28  on 604  degrees of freedom
  (12 observations deleted due to missingness)
AIC: 540.28

Number of Fisher Scoring iterations: 6
Code show/hide
lrtest(poismod_t26_l1, base_poismod)
Likelihood ratio test

Model 1: fail ~ timegroup + tment + alb + albnorm + bili + offset(log(risktime))
Model 2: fail ~ timegroup + tment + alb + bili + offset(log(risktime))
  #Df  LogLik Df  Chisq Pr(>Chisq)
1   7 -263.14                     
2   6 -263.36 -1 0.4404     0.5069
Code show/hide
# Quadratic Poisson 1
poismod_t26_q1 <- glm(fail ~ timegroup + tment + alb + I(alb*alb) + bili +
                        offset(log(risktime)), data = pbc3mult, family = poisson)
summary(poismod_t26_q1)

Call:
glm(formula = fail ~ timegroup + tment + alb + I(alb * alb) + 
    bili + offset(log(risktime)), family = poisson, data = pbc3mult)

Coefficients:
               Estimate Std. Error z value Pr(>|z|)    
(Intercept)  -3.8279840  3.8141837  -1.004   0.3156    
timegroup1   -0.3227765  0.6048377  -0.534   0.5936    
timegroup2    0.5362133  0.6024251   0.890   0.3734    
tment        -0.4786804  0.2258499  -2.119   0.0341 *  
alb          -0.1387716  0.2101463  -0.660   0.5090    
I(alb * alb)  0.0003705  0.0029295   0.126   0.8993    
bili          0.0084614  0.0009391   9.010   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 449.14  on 610  degrees of freedom
Residual deviance: 350.71  on 604  degrees of freedom
  (12 observations deleted due to missingness)
AIC: 540.71

Number of Fisher Scoring iterations: 6
Code show/hide
lrtest(poismod_t26_q1, base_poismod)
Likelihood ratio test

Model 1: fail ~ timegroup + tment + alb + I(alb * alb) + bili + offset(log(risktime))
Model 2: fail ~ timegroup + tment + alb + bili + offset(log(risktime))
  #Df  LogLik Df  Chisq Pr(>Chisq)
1   7 -263.36                     
2   6 -263.36 -1 0.0158     0.8999
Code show/hide
# Splines Poisson 2
poismod_t26_l2 <- glm(fail ~ timegroup + tment + alb + 
                        bili + bilihigh + bilitoohigh + bilimuchtoohigh +
                        offset(log(risktime)), data = pbc3mult, family = poisson)
summary(poismod_t26_l2)

Call:
glm(formula = fail ~ timegroup + tment + alb + bili + bilihigh + 
    bilitoohigh + bilimuchtoohigh + offset(log(risktime)), family = poisson, 
    data = pbc3mult)

Coefficients:
                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)     -6.453990   1.368380  -4.717  2.4e-06 ***
timegroup1      -0.427435   0.604700  -0.707   0.4797    
timegroup2       0.425435   0.603554   0.705   0.4809    
tment           -0.569111   0.226892  -2.508   0.0121 *  
alb             -0.086487   0.021876  -3.954  7.7e-05 ***
bili             0.061654   0.062308   0.989   0.3224    
bilihigh        -0.016757   0.085194  -0.197   0.8441    
bilitoohigh      0.002652   0.052566   0.050   0.9598    
bilimuchtoohigh -0.042801   0.026193  -1.634   0.1022    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 449.14  on 610  degrees of freedom
Residual deviance: 326.18  on 602  degrees of freedom
  (12 observations deleted due to missingness)
AIC: 520.18

Number of Fisher Scoring iterations: 6
Code show/hide
lrtest(poismod_t26_l2, base_poismod)
Likelihood ratio test

Model 1: fail ~ timegroup + tment + alb + bili + bilihigh + bilitoohigh + 
    bilimuchtoohigh + offset(log(risktime))
Model 2: fail ~ timegroup + tment + alb + bili + offset(log(risktime))
  #Df  LogLik Df  Chisq Pr(>Chisq)    
1   9 -251.09                         
2   6 -263.36 -3 24.545  1.922e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code show/hide
# Quadratic Poisson 2
poismod_t26_q2 <- glm(fail ~ timegroup + tment + alb + bili + I(bili*bili) +
                        offset(log(risktime)), data = pbc3mult, family = poisson)
summary(poismod_t26_q2)

Call:
glm(formula = fail ~ timegroup + tment + alb + bili + I(bili * 
    bili) + offset(log(risktime)), family = poisson, data = pbc3mult)

Coefficients:
                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)    -5.163e+00  1.053e+00  -4.902 9.47e-07 ***
timegroup1     -3.819e-01  6.047e-01  -0.631  0.52774    
timegroup2      4.555e-01  6.033e-01   0.755  0.45030    
tment          -4.987e-01  2.217e-01  -2.250  0.02447 *  
alb            -9.774e-02  2.163e-02  -4.518 6.24e-06 ***
bili            1.998e-02  3.266e-03   6.118 9.49e-10 ***
I(bili * bili) -3.152e-05  9.069e-06  -3.475  0.00051 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 449.14  on 610  degrees of freedom
Residual deviance: 337.38  on 604  degrees of freedom
  (12 observations deleted due to missingness)
AIC: 527.38

Number of Fisher Scoring iterations: 6
Code show/hide
lrtest(poismod_t26_q2, base_poismod)
Likelihood ratio test

Model 1: fail ~ timegroup + tment + alb + bili + I(bili * bili) + offset(log(risktime))
Model 2: fail ~ timegroup + tment + alb + bili + offset(log(risktime))
  #Df  LogLik Df  Chisq Pr(>Chisq)    
1   7 -256.69                         
2   6 -263.36 -1 13.345  0.0002591 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code show/hide
# Splines Poisson 3
poismod_t26_l3 <- glm(fail ~ timegroup + tment + alb + 
                        log2bili + logbilihigh + logbilitoohigh + logbilimuchtoohigh +
                        offset(log(risktime)), data = pbc3mult, family = poisson)
summary(poismod_t26_l3)

Call:
glm(formula = fail ~ timegroup + tment + alb + log2bili + logbilihigh + 
    logbilitoohigh + logbilimuchtoohigh + offset(log(risktime)), 
    family = poisson, data = pbc3mult)

Coefficients:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)        -6.52003    1.99572  -3.267 0.001087 ** 
timegroup1         -0.40679    0.60392  -0.674 0.500579    
timegroup2          0.41834    0.60355   0.693 0.488223    
tment              -0.55563    0.22703  -2.447 0.014390 *  
alb                -0.08440    0.02181  -3.870 0.000109 ***
log2bili            0.19800    0.46549   0.425 0.670583    
logbilihigh         0.88154    0.91202   0.967 0.333756    
logbilitoohigh     -0.23363    1.27745  -0.183 0.854884    
logbilimuchtoohigh -0.31390    0.97076  -0.323 0.746425    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 449.14  on 610  degrees of freedom
Residual deviance: 327.54  on 602  degrees of freedom
  (12 observations deleted due to missingness)
AIC: 521.54

Number of Fisher Scoring iterations: 6
Code show/hide
lrtest(poismod_t26_l3, base_poismod_log)
Likelihood ratio test

Model 1: fail ~ timegroup + tment + alb + log2bili + logbilihigh + logbilitoohigh + 
    logbilimuchtoohigh + offset(log(risktime))
Model 2: fail ~ timegroup + tment + alb + log2bili + offset(log(risktime))
  #Df  LogLik Df  Chisq Pr(>Chisq)
1   9 -251.77                     
2   6 -252.62 -3 1.7135     0.6339
Code show/hide
# Quadratic Poisson 3
poismod_t26_q3 <- glm(fail ~ timegroup + tment + alb + log2bili + log2bili2 +
                        offset(log(risktime)), data = pbc3mult, family = poisson)
summary(poismod_t26_q3)

Call:
glm(formula = fail ~ timegroup + tment + alb + log2bili + log2bili2 + 
    offset(log(risktime)), family = poisson, data = pbc3mult)

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept) -7.860086   1.851557  -4.245 2.18e-05 ***
timegroup1  -0.406020   0.604625  -0.672   0.5019    
timegroup2   0.439074   0.603467   0.728   0.4669    
tment       -0.545495   0.222667  -2.450   0.0143 *  
alb         -0.087106   0.021892  -3.979 6.92e-05 ***
log2bili     0.627828   0.497489   1.262   0.2070    
log2bili2    0.001641   0.042356   0.039   0.9691    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 449.14  on 610  degrees of freedom
Residual deviance: 329.25  on 604  degrees of freedom
  (12 observations deleted due to missingness)
AIC: 519.25

Number of Fisher Scoring iterations: 6
Code show/hide
lrtest(poismod_t26_q3, base_poismod_log)
Likelihood ratio test

Model 1: fail ~ timegroup + tment + alb + log2bili + log2bili2 + offset(log(risktime))
Model 2: fail ~ timegroup + tment + alb + log2bili + offset(log(risktime))
  #Df  LogLik Df  Chisq Pr(>Chisq)
1   7 -252.62                     
2   6 -252.62 -1 0.0015     0.9691
Code show/hide
** Cox models **;

* Base models for LR tests; 
proc phreg data=pbc3;
    class tment (ref='0');
    model days*status(0)=tment alb bili/rl ties=breslow CONVERGELIKE=1E-8 type3(lr);
run;
proc phreg data=pbc3;
    class tment (ref='0');
    model days*status(0)=tment alb log2bili/rl ties=breslow CONVERGELIKE=1E-8 type3(lr);
run;

* Splines Cox 1; 
* LRT can here be read of type3 test result; 
proc phreg data=pbc3;
    class tment (ref='0');
    model days*status(0)=tment alb albnorm bili/rl ties=breslow CONVERGELIKE=1E-8 type3(lr);
run;

* Quadratic Cox 1; 
* LRT can here be read of type3 test result; 
proc phreg data=pbc3;
  alb2=alb*alb;
    class tment (ref='0');
    model days*status(0)=tment alb10 alb2 bili100/rl type3(LR);
run;

* Splines Cox 2;
proc phreg data=pbc3;
    class tment (ref='0');
    model days*status(0)=tment alb bili bilihigh bilitoohigh 
                             bilimuchtoohigh/rl type3(lr) ties=breslow CONVERGELIKE=1E-8;
  Wald_same_as_LRT: test  bilihigh=bilitoohigh=bilimuchtoohigh=0;
run;
* LRT; 
data p;
    chi2=826.830-802.434;
    p=1-probchi(chi2,3);
proc print;
run;

* Quadratic Cox 2; 
* LRT can here be read of type3 test result; 
proc phreg data=pbc3;
  bili2=bili*bili;
    class tment (ref='0');
    model days*status(0)=tment alb10 bili bili2 / rl type3(LR);
run;

* Splines Cox 3;
proc phreg data=pbc3;
    class tment (ref='0');
    model days*status(0)=tment alb log2bili logbilihigh 
                             logbilitoohigh logbilimuchtoohigh/rl ties=breslow CONVERGELIKE=1E-8;
  Wald_same_as_LRT: test  logbilihigh=logbilitoohigh=logbilimuchtoohigh=0;
run;
* LRT; 
data p;
    chi2=805.881-804.267;
    p=1-probchi(chi2,3);
proc print;
run;

* Quadratic Cox 3; 
* LRT can here be read of type3 test result; 
proc phreg data=pbc3;
    class tment (ref='0');
    model days*status(0)=tment alb10 log2bili log2bili2/rl type3(LR);
run;

* Split data into timegroups for Poisson models; 
data pbc3mult; 
    set pbc3;
    fail=(days<=2 * 365.25)*(status ne 0);
    risktime=min(2 * 365.25,days);
    logrisk=log(risktime); interv=1; output;  
    if days>2* 365.25 then do;
    fail=(days<=4* 365.25)*(status ne 0);
    risktime=min(2* 365.25,days-2* 365.25);
    logrisk=log(risktime); interv=2; output; end;
    if days>4* 365.25 then do;
    fail=status ne 0; 
    risktime=days-4* 365.25;
    logrisk=log(risktime); interv=3; output; end;
run;


** Possion models **;

* Models to compare with for the LR tests; 
proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= interv tment alb bili/ dist=poi offset=logrisk type3;
run;

proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= interv tment alb log2bili/ dist=poi offset=logrisk type3;
run;

* Splines Poisson 1; 
* LRT can here be read of type3 test result; 
proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= interv tment alb albnorm bili/ dist=poi offset=logrisk type3;
run;

* Quadratic Poisson 1;
* LRT can here be read of type3 test result; 
proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= interv tment alb10 alb2 bili100/ dist=poi offset=logrisk type3;
run;

* Splines Poisson 2; 
proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= interv tment alb bili bilihigh bilitoohigh  
                 bilimuchtoohigh/ dist=poi offset=logrisk type3;
run;
* LRT; 
data p;
    chi2=350.7253-326.1805;
    p=1-probchi(chi2,3);
proc print;
run;

* Quadratic Poisson 2;
* LRT can here be read of type3 test result; 
proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= interv tment alb10 bili100 bili2/ dist=poi offset=logrisk type3;
run;

* Splines Poisson 3;
proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= interv tment alb log2bili logbilihigh logbilitoohigh 
                 logbilimuchtoohigh / dist=poi offset=logrisk type3;
run;
* LRT; 
data p;
    chi2=329.2509-327.5375;
    p=1-probchi(chi2,3);
proc print;
run;

* Quadratic Poisson 3;
* LRT can here be read of type3 test result; 
proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= interv tment alb10 log2bili log2bili2/ dist=poi offset=logrisk type3;
run;

Figure 2.6

Code show/hide
# The below linear predictors include estimates from the following models
pbc3mult$timegroup <- relevel(as.factor(pbc3mult$timegroup), ref = "3")
pbc3mult$tment_char <- as.factor(pbc3mult$tment_char)
pbc3mult$tment_char <- relevel(pbc3mult$tment_char, ref = "Placebo")
base_poismod <- glm(fail ~ offset(log(risktime)) + as.factor(timegroup) + 
                      tment_char + alb + bili,
                    data = pbc3mult, family = poisson)
poismod_t26_l1 <- glm(fail ~ offset(log(risktime)) + as.factor(timegroup) + 
                        tment_char + alb + bili + albnorm,
                      data = pbc3mult, family = poisson)
poismod_t26_l3 <- glm(fail ~ offset(log(risktime)) + as.factor(timegroup) + 
                        tment_char + alb + 
                        bili + bilihigh + bilitoohigh + bilimuchtoohigh,
                      data = pbc3mult,family = poisson)
# Make a dataset with linear predictor information
lin2 <- pbc3
lin2$lp1 <- with(pbc3, 1.6076-0.1123*alb)
lin2$lp2 <- with(pbc3, 0.7828-0.0864*alb-0.0474*albnorm)
lin2$lp3 <- with(pbc3, 1.6076+0.0085*bili-38.7*0.1123)
lin2$lp4 <- with(pbc3, -0.5534+0.0617*bili-0.0168*bilihigh+
                   0.0027*bilitoohigh-0.0428*bilimuchtoohigh-0.0865*38.7)
lin2row <- data.frame(effect = c(rep("Linear effect", nrow(lin2)),
                                 rep("Effect as linear spline", nrow(lin2))), 
                      lp = c(lin2$lp1, lin2$lp2), alb = c(lin2$alb, lin2$alb))
library(ggplot2)
fig2.6 <- ggplot(aes(x = alb, y = lp, linetype = effect), data = lin2row) +
  geom_line(size = 1) + 
  xlab("Albumin") + 
  ylab("Linear predictor") + 
  scale_linetype_discrete("Effect") + 
  scale_x_continuous(expand = expansion(mult = c(0.03, 0.05))) + 
  scale_y_continuous(expand = expansion(mult = c(0.03, 0.05))) +
  theme_general

fig2.6

Code show/hide
* Below is estimates from the following models collected; 
proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= interv tment alb bili/ dist=poi offset=logrisk type3;
run;
proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= interv tment alb albnorm bili/ dist=poi offset=logrisk type3;
run;
proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= interv tment alb bili bilihigh bilitoohigh 
             logbilimuchtoohigh / dist=poi offset=logrisk type3;
run;

* Collect linear predictor information; 
data lin2; set pbc3;
    lp1=1.6076-0.1123*alb; * bilirubin = 0; 
    lp2=0.7828-0.0864*alb-0.0474*albnorm;
    lp3=1.6076+0.0085*bili-38.7*0.1123; *also an effect of albumin at 38.7; 
    lp4=-0.5534+0.0617*bili-0.0168*bilihigh+0.0027*bilitoohigh
        -0.0428*bilimuchtoohigh-0.0865*38.7;
run;
proc sort data=lin2; by alb; run;
legend1 label=none;
proc gplot data=lin2;
    plot (lp2 lp1)*alb/overlay haxis=axis1 vaxis=axis2 legend=legend1;
    axis1 order=20 to 60 by 10 minor=none label=('Se-albumin');
    axis2 order=-6 to 0 by 1 minor=none label=(a=90 'Linear predictor');
    symbol1 v=none i=join r=1 c=red;
    symbol2 v=none i=join r=1 c=blue;
    label lp2="Effect as linear spline";
    label lp1="Linear effect";
run; 
quit;

Figure 2.7

Code show/hide
# bilirubin and the two last linear predictors
lin2row2 <- data.frame(effect = c(rep("Linear effect", nrow(lin2)),
                                  rep("Effect as linear spline", nrow(lin2))), 
                      lp = c(lin2$lp3, lin2$lp4),
                      bili = c(lin2$bili, lin2$bili))

fig2.7 <- ggplot(aes(x = bili, y = lp, linetype = effect), data = lin2row2) + 
  geom_line(size = 1) + 
  xlab("Bilirubin") + 
  ylab("Linear predictor") + 
  scale_linetype_discrete("Effect") + 
  scale_x_continuous(expand = expansion(mult = c(0.03, 0.05))) + 
  scale_y_continuous(expand = expansion(mult = c(0.03, 0.05))) +
  theme_general

fig2.7

Code show/hide
proc sort data=lin2; by bili; run;
proc gplot data=lin2;
    plot (lp4 lp3)*bili/overlay haxis=axis1 vaxis=axis2 legend=legend1;
    axis1 order=0 to 500 by 100 minor=none label=('Se-bilirubin');
    axis2 order=-5 to 2 by 1 minor=none label=(a=90 'Linear predictor');
    symbol1 v=none i=join r=1 c=red;
    symbol2 v=none i=join r=1 c=blue;
    label lp4="Effect as linear spline";
    label lp3="Linear effect";
run;quit;

Figure 2.8

Code show/hide
# The below linear predictors include estimates from the following models
base_poismod2_log <- glm(fail ~ offset(log(risktime)) + as.factor(timegroup) + 
                           tment_char + alb + log2bili,
                         data = pbc3mult, family = poisson)
poismod_t26_l3 <- glm(fail ~ offset(log(risktime)) + as.factor(timegroup) + 
                        tment_char + alb + log2bili + 
                        logbilihigh + logbilitoohigh + logbilimuchtoohigh,
                        data = pbc3mult, family = poisson)
# Make a dataset with linear predictor information
log2 <- pbc3
log2$lp3 <- with(pbc3, -2.0162+0.6469*log2bili-38.7*0.087)
log2$lp4 <- with(pbc3, -0.6194+0.198*log2bili
                 +0.8815*logbilihigh-0.2336*logbilitoohigh
                 -0.3139*logbilimuchtoohigh-0.0844*38.7)
log2row <- data.frame(effect = c(rep("Linear effect", nrow(log2)),
                                 rep("Effect as linear spline", nrow(log2))), 
                      lp = c(log2$lp3, log2$lp4),
                      log2bili = c(log2$log2bili, log2$log2bili)
                      )

fig2.8 <- ggplot(aes(x = log2bili, y = lp, linetype = effect), data = log2row) + 
  geom_line(size = 1) + 
  xlab(expression(log[2] * "(bilirubin)")) +
  ylab("Linear predictor") + 
  scale_linetype_discrete("Effect") + 
  scale_x_continuous(expand = expansion(mult = c(0.03, 0.05))) + 
  scale_y_continuous(expand = expansion(mult = c(0.03, 0.05))) +
  theme_general

fig2.8

Code show/hide
* Linear predictors from the following models; 
proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= interv tment alb log2bili / dist=poi offset=logrisk type3;
run;
proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= interv tment alb log2bili logbilihigh logbilitoohigh 
                 logbilimuchtoohigh / dist=poi offset=logrisk type3;
run;

* Create data with linear predictors as a function of bilirubin values; 
data log2; 
    set pbc3;
    lp3=-2.0162+0.6469*log2bili-38.7*0.087;
    lp4=-0.6194+0.198*log2bili+0.8815*logbilihigh-0.2336*logbilitoohigh
        -0.3139*logbilimuchtoohigh-0.0844*38.7;
run; 
proc sort data=log2;by bili; run;
proc gplot data=log2;
    plot (lp4 lp3)*log2bili/overlay haxis=axis1 vaxis=axis2 legend=legend1;
    axis1 order=1 to 9 by 1 minor=none label=('log2(Se-bilirubin)');
    axis2 order=-5 to 1 by 1 minor=none label=(a=90 'Linear predictor');
    symbol1 v=none i=join r=1 c=red;
    symbol2 v=none i=join r=1 c=blue;
    label lp4="Effect as linear spline";
    label lp3="Linear effect";
run;quit;

Table 2.7

Code show/hide
# Cox model
coxmod_t27 <- coxph(Surv(days, status != 0) ~ tment + alb + log2bili,
                    eps = 1e-8, method = "breslow", data = pbc3)
coxmod_t27
Call:
coxph(formula = Surv(days, status != 0) ~ tment + alb + log2bili, 
    data = pbc3, method = "breslow", eps = 1e-08)

             coef exp(coef) se(coef)      z        p
tment    -0.57406   0.56323  0.22447 -2.557   0.0105
alb      -0.09093   0.91308  0.02164 -4.201 2.65e-05
log2bili  0.66500   1.94449  0.07443  8.935  < 2e-16

Likelihood ratio test=120  on 3 df, p=< 2.2e-16
n= 343, number of events= 88 
   (6 observations deleted due to missingness)
Code show/hide
# Poisson model
poismod_t27 <- glm(fail ~ tment + alb + log2bili + timegroup + 
                     offset(log(risktime)), data = pbc3mult, family = poisson)
summary(poismod_t27) 

Call:
glm(formula = fail ~ tment + alb + log2bili + timegroup + offset(log(risktime)), 
    family = poisson, data = pbc3mult)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -7.47780    0.97577  -7.663 1.81e-14 ***
tment       -0.54549    0.22265  -2.450 0.014288 *  
alb         -0.08697    0.02162  -4.024 5.73e-05 ***
log2bili     0.64689    0.07326   8.830  < 2e-16 ***
timegroup3  -0.43894    0.60346  -0.727 0.466999    
timegroup1  -0.84401    0.22867  -3.691 0.000223 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 449.14  on 610  degrees of freedom
Residual deviance: 329.25  on 605  degrees of freedom
  (12 observations deleted due to missingness)
AIC: 517.25

Number of Fisher Scoring iterations: 6
Code show/hide
* Cox model; 
proc phreg data=pbc3;
    class tment (ref='0');
    model days*status(0)=tment alb log2bili / rl;
run;
* Poisson model;
proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= interv tment alb log2bili / dist=poi offset=logrisk;
run;

Table 2.8

Code show/hide
# Cox models
# Model for LR comparison
coxmod_t28_base <- coxph(Surv(days, status != 0) ~ tment + alb + log2bili,
                      eps = 1e-8, method = "breslow", data = pbc3)
# Cox model 1
pbc3$alb0 <- (pbc3$tment==0)*pbc3$alb
pbc3$alb1 <- (pbc3$tment==1)*pbc3$alb
coxmod_t28_1 <- coxph(Surv(days, status != 0) ~ tment + alb0 + alb1 + log2bili,
                      eps = 1e-8, method = "breslow", data = pbc3)
coxmod_t28_1
Call:
coxph(formula = Surv(days, status != 0) ~ tment + alb0 + alb1 + 
    log2bili, data = pbc3, method = "breslow", eps = 1e-08)

              coef exp(coef)  se(coef)      z        p
tment    -0.009736  0.990312  1.559670 -0.006 0.995020
alb0     -0.081319  0.921900  0.034031 -2.390 0.016869
alb1     -0.097043  0.907517  0.027344 -3.549 0.000387
log2bili  0.664413  1.943350  0.074494  8.919  < 2e-16

Likelihood ratio test=120.2  on 4 df, p=< 2.2e-16
n= 343, number of events= 88 
   (6 observations deleted due to missingness)
Code show/hide
lrtest(coxmod_t28_1, coxmod_t28_base)
Likelihood ratio test

Model 1: Surv(days, status != 0) ~ tment + alb0 + alb1 + log2bili
Model 2: Surv(days, status != 0) ~ tment + alb + log2bili
  #Df  LogLik Df  Chisq Pr(>Chisq)
1   4 -402.87                     
2   3 -402.94 -1 0.1338     0.7145
Code show/hide
# Cox model 2
pbc3$bili0 <- (pbc3$tment==0)*pbc3$log2bili
pbc3$bili1 <- (pbc3$tment==1)*pbc3$log2bili
coxmod_t28_2 <- coxph(Surv(days, status != 0) ~ tment + alb + bili0 + bili1,
                      eps = 1e-8, method = "breslow", data = pbc3)
coxmod_t28_2
Call:
coxph(formula = Surv(days, status != 0) ~ tment + alb + bili0 + 
    bili1, data = pbc3, method = "breslow", eps = 1e-08)

          coef exp(coef) se(coef)      z        p
tment  0.19880   1.21994  0.85653  0.232    0.816
alb   -0.09331   0.91091  0.02191 -4.260 2.05e-05
bili0  0.72558   2.06593  0.09865  7.355 1.91e-13
bili1  0.59345   1.81022  0.10604  5.596 2.19e-08

Likelihood ratio test=120.9  on 4 df, p=< 2.2e-16
n= 343, number of events= 88 
   (6 observations deleted due to missingness)
Code show/hide
lrtest(coxmod_t28_2, coxmod_t28_base)
Likelihood ratio test

Model 1: Surv(days, status != 0) ~ tment + alb + bili0 + bili1
Model 2: Surv(days, status != 0) ~ tment + alb + log2bili
  #Df  LogLik Df  Chisq Pr(>Chisq)
1   4 -402.51                     
2   3 -402.94 -1 0.8644     0.3525
Code show/hide
# Poisson models
# Model for LR comparison - no interaction
poismod_t28_base <- glm(fail ~ timegroup + tment + alb + log2bili+
                          offset(log(risktime)), data=pbc3mult, family=poisson)

# Poisson model 1
pbc3mult$alb0 <- (pbc3mult$tment==0)*pbc3mult$alb
pbc3mult$alb1 <- (pbc3mult$tment==1)*pbc3mult$alb
poismod_t28_1 <- glm(fail ~ tment + alb0 + alb1 + log2bili + timegroup +
                       offset(log(risktime)), data = pbc3mult, family = poisson)
summary(poismod_t28_1) 

Call:
glm(formula = fail ~ tment + alb0 + alb1 + log2bili + timegroup + 
    offset(log(risktime)), family = poisson, data = pbc3mult)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -7.86906    1.36181  -5.778 7.54e-09 ***
tment        0.09200    1.55226   0.059 0.952738    
alb0        -0.07628    0.03359  -2.271 0.023162 *  
alb1        -0.09406    0.02755  -3.414 0.000639 ***
log2bili     0.64624    0.07334   8.812  < 2e-16 ***
timegroup3  -0.43125    0.60379  -0.714 0.475080    
timegroup1  -0.84426    0.22851  -3.695 0.000220 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 449.14  on 610  degrees of freedom
Residual deviance: 329.08  on 604  degrees of freedom
  (12 observations deleted due to missingness)
AIC: 519.08

Number of Fisher Scoring iterations: 6
Code show/hide
lrtest(poismod_t28_base, poismod_t28_1)
Likelihood ratio test

Model 1: fail ~ timegroup + tment + alb + log2bili + offset(log(risktime))
Model 2: fail ~ tment + alb0 + alb1 + log2bili + timegroup + offset(log(risktime))
  #Df  LogLik Df  Chisq Pr(>Chisq)
1   6 -252.62                     
2   7 -252.54  1 0.1723      0.678
Code show/hide
# Poisson model 2
pbc3mult$bili0 <- (pbc3mult$tment==0)*pbc3mult$log2bili
pbc3mult$bili1 <- (pbc3mult$tment==1)*pbc3mult$log2bili
poismod_t28_2 <- glm(fail ~ tment + alb + bili0 + bili1 + timegroup +
                       offset(log(risktime)), data = pbc3mult, family = poisson)
summary(poismod_t28_2) 

Call:
glm(formula = fail ~ tment + alb + bili0 + bili1 + timegroup + 
    offset(log(risktime)), family = poisson, data = pbc3mult)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -7.71515    1.01592  -7.594 3.10e-14 ***
tment        0.18058    0.84828   0.213 0.831422    
alb         -0.08909    0.02185  -4.078 4.54e-05 ***
bili0        0.70368    0.09717   7.242 4.42e-13 ***
bili1        0.57982    0.10472   5.537 3.08e-08 ***
timegroup3  -0.41713    0.60400  -0.691 0.489808    
timegroup1  -0.85694    0.22932  -3.737 0.000186 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 449.14  on 610  degrees of freedom
Residual deviance: 328.47  on 604  degrees of freedom
  (12 observations deleted due to missingness)
AIC: 518.47

Number of Fisher Scoring iterations: 6
Code show/hide
lrtest(poismod_t28_base, poismod_t28_2)
Likelihood ratio test

Model 1: fail ~ timegroup + tment + alb + log2bili + offset(log(risktime))
Model 2: fail ~ tment + alb + bili0 + bili1 + timegroup + offset(log(risktime))
  #Df  LogLik Df  Chisq Pr(>Chisq)
1   6 -252.62                     
2   7 -252.24  1 0.7787     0.3775
Code show/hide
* Cox model 1; 
proc phreg data=pbc3;
    class tment (ref='0');
    model days*status(0)=tment alb tment*alb log2bili /rl type3 (LR);
    estimate 'alb, tment=0' alb 1;
    estimate 'alb, tment=1' alb 1 tment*alb 1;
run;
* Cox model 2; 
proc phreg data=pbc3;
    class tment (ref='0');
    model days*status(0)=tment alb log2bili tment*log2bili/rl  type3 (LR);
    estimate 'log2bili, tment=0' log2bili 1;
    estimate 'log2bili, tment=1' log2bili 1 tment*log2bili 1;
run;

* Poisson model 1; 
proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= interv tment log2bili alb(tment) / dist=poi offset=logrisk type3;
    contrast 'LRT' alb(tment) 1 -1;
run;
* Poisson model 2; 
proc genmod data=pbc3mult;
    class tment (ref='0') interv;
    model fail= interv tment alb log2bili(tment)/ dist=poi offset=logrisk type3;
    contrast 'LRT' log2bili(tment) 1 -1;
run;

Table 2.9

Code show/hide
pbc3mult$tment1 <- (pbc3mult$timegroup==1)*pbc3mult$tment
pbc3mult$tment2 <- (pbc3mult$timegroup==2)*pbc3mult$tment
pbc3mult$tment3 <- (pbc3mult$timegroup==3)*pbc3mult$tment
pbc3mult$alb1 <- (pbc3mult$timegroup==1)*pbc3mult$alb
pbc3mult$alb2 <- (pbc3mult$timegroup==2)*pbc3mult$alb
pbc3mult$alb3 <- (pbc3mult$timegroup==3)*pbc3mult$alb
pbc3mult$bili1 <- (pbc3mult$timegroup==1)*pbc3mult$log2bili
pbc3mult$bili2 <- (pbc3mult$timegroup==2)*pbc3mult$log2bili
pbc3mult$bili3 <- (pbc3mult$timegroup==3)*pbc3mult$log2bili

options(contrasts=c("contr.treatment", "contr.poly"))
poismod_t29_base <- glm(fail~tment+alb+log2bili+timegroup+offset(log(risktime)),
                     data = pbc3mult, family = poisson)

# Treatment
poismod_t29_tment <- glm(fail ~ tment1 + tment2 + tment3 + alb + log2bili+
                       timegroup + offset(log(risktime)),
                     data = pbc3mult, family = poisson)
summary(poismod_t29_tment) 

Call:
glm(formula = fail ~ tment1 + tment2 + tment3 + alb + log2bili + 
    timegroup + offset(log(risktime)), family = poisson, data = pbc3mult)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -7.64593    1.17030  -6.533 6.43e-11 ***
tment1      -0.56166    0.29091  -1.931   0.0535 .  
tment2      -0.46193    0.34460  -1.340   0.1801    
tment3      -1.26627    1.22939  -1.030   0.3030    
alb         -0.08640    0.02166  -3.988 6.65e-05 ***
log2bili     0.64871    0.07367   8.806  < 2e-16 ***
timegroup1  -0.69911    0.74501  -0.938   0.3480    
timegroup2   0.09537    0.75000   0.127   0.8988    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 449.14  on 610  degrees of freedom
Residual deviance: 328.82  on 603  degrees of freedom
  (12 observations deleted due to missingness)
AIC: 520.82

Number of Fisher Scoring iterations: 6
Code show/hide
lrtest(poismod_t29_base, poismod_t29_tment)
Likelihood ratio test

Model 1: fail ~ tment + alb + log2bili + timegroup + offset(log(risktime))
Model 2: fail ~ tment1 + tment2 + tment3 + alb + log2bili + timegroup + 
    offset(log(risktime))
  #Df  LogLik Df  Chisq Pr(>Chisq)
1   6 -252.62                     
2   8 -252.41  2 0.4312     0.8061
Code show/hide
# Albumin
poismod_t29_alb <- glm(fail ~ tment + alb1 + alb2 + alb3 + log2bili+
                       timegroup + offset(log(risktime)),
                     data = pbc3mult, family = poisson)
summary(poismod_t29_alb) 

Call:
glm(formula = fail ~ tment + alb1 + alb2 + alb3 + log2bili + 
    timegroup + offset(log(risktime)), family = poisson, data = pbc3mult)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -8.73232    5.94065  -1.470   0.1416    
tment       -0.56130    0.22300  -2.517   0.0118 *  
alb1        -0.11026    0.02791  -3.951 7.79e-05 ***
alb2        -0.05231    0.03472  -1.507   0.1319    
alb3        -0.06542    0.15336  -0.427   0.6697    
log2bili     0.64608    0.07361   8.777  < 2e-16 ***
timegroup1   1.22609    6.02995   0.203   0.8389    
timegroup2  -0.01433    6.09060  -0.002   0.9981    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 449.14  on 610  degrees of freedom
Residual deviance: 327.45  on 603  degrees of freedom
  (12 observations deleted due to missingness)
AIC: 519.45

Number of Fisher Scoring iterations: 6
Code show/hide
lrtest(poismod_t29_base, poismod_t29_alb)
Likelihood ratio test

Model 1: fail ~ tment + alb + log2bili + timegroup + offset(log(risktime))
Model 2: fail ~ tment + alb1 + alb2 + alb3 + log2bili + timegroup + offset(log(risktime))
  #Df  LogLik Df Chisq Pr(>Chisq)
1   6 -252.62                    
2   8 -251.72  2 1.803      0.406
Code show/hide
# Bilirubin
poismod_t29_bili <- glm(fail ~ tment + alb + bili1 + bili2 + bili3 +
                       timegroup + offset(log(risktime)),
                     data = pbc3mult, family = poisson)
summary(poismod_t29_bili) 

Call:
glm(formula = fail ~ tment + alb + bili1 + bili2 + bili3 + timegroup + 
    offset(log(risktime)), family = poisson, data = pbc3mult)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -6.36112    2.36067  -2.695  0.00705 ** 
tment       -0.51454    0.22373  -2.300  0.02146 *  
alb         -0.08570    0.02174  -3.942 8.10e-05 ***
bili1        0.71044    0.09260   7.672 1.69e-14 ***
bili2        0.55747    0.12113   4.602 4.18e-06 ***
bili3        0.30512    0.48171   0.633  0.52646    
timegroup1  -2.41998    2.30859  -1.048  0.29452    
timegroup2  -0.69600    2.32124  -0.300  0.76430    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 449.14  on 610  degrees of freedom
Residual deviance: 327.67  on 603  degrees of freedom
  (12 observations deleted due to missingness)
AIC: 519.67

Number of Fisher Scoring iterations: 6
Code show/hide
lrtest(poismod_t29_base, poismod_t29_bili)
Likelihood ratio test

Model 1: fail ~ tment + alb + log2bili + timegroup + offset(log(risktime))
Model 2: fail ~ tment + alb + bili1 + bili2 + bili3 + timegroup + offset(log(risktime))
  #Df  LogLik Df  Chisq Pr(>Chisq)
1   6 -252.62                     
2   8 -251.84  2 1.5777     0.4544
Code show/hide
* Treatment; 
* Estimates ;
proc genmod data=pbc3mult;
    class tment (ref='0') interv(ref='1');
    model fail= interv tment(interv) alb log2bili/ dist=poi offset=logrisk type3;
run;
* LRT ;
proc genmod data=pbc3mult;
    class tment (ref='0') interv(ref='1');
    model fail= interv tment interv*tment alb log2bili/ dist=poi offset=logrisk type3;
run;

* Albumin;
* Estimates ;
proc genmod data=pbc3mult;
    class tment (ref='0') interv(ref='1');
    model fail= interv tment log2bili alb(interv) / dist=poi offset=logrisk type3;
run;
* LRT ;
proc genmod data=pbc3mult;
    class tment (ref='0') interv(ref='1');
    model fail= interv tment alb log2bili alb*interv/ dist=poi offset=logrisk type3;
run;

* Bilirubin;
* Estimates ;
proc genmod data=pbc3mult;
    class tment (ref='0') interv(ref='1');
    model fail= interv tment alb log2bili(interv)/ dist=poi offset=logrisk type3;
run;
* LRT ;
proc genmod data=pbc3mult;
    class tment (ref='0') interv(ref='1');
    model fail= interv tment alb log2bili log2bili*interv/ dist=poi offset=logrisk type3;
run;

In-text, p. 51: stratified Cox model

Code show/hide
coxmod_f29 <- coxph(Surv(days, status != 0) ~ strata(tment) + alb + log2bili,
                      eps = 1e-8, method = "breslow", data = pbc3)
coxmod_f29
Call:
coxph(formula = Surv(days, status != 0) ~ strata(tment) + alb + 
    log2bili, data = pbc3, method = "breslow", eps = 1e-08)

             coef exp(coef) se(coef)      z        p
alb      -0.09002   0.91391  0.02168 -4.153 3.28e-05
log2bili  0.66328   1.94114  0.07531  8.807  < 2e-16

Likelihood ratio test=119.3  on 2 df, p=< 2.2e-16
n= 343, number of events= 88 
   (6 observations deleted due to missingness)
Code show/hide
proc phreg data=pbc3;
    model days*status(0)= alb log2bili / rl;
    strata tment;
run;

Figure 2.10

Code show/hide
# Extracting cumulative baseline hazards per treatment
cumhaz_treat <- basehaz(coxmod_f29, centered = FALSE)
cumhaz_treat <- as.data.frame(cumhaz_treat)
# Per treatment
hazard_t0 <- cumhaz_treat[cumhaz_treat$strata == "tment=0",]
hazard_t0[1,] <- c(0, 0, "tment=0")
hazard_t0$time<- as.numeric(hazard_t0$time)
hazard_t0$hazard <- as.numeric(hazard_t0$hazard)
hazard_t1 <- cumhaz_treat[cumhaz_treat$strata == "tment=1",]
# Match times
alltimes <- sort(unique(cumhaz_treat$time))
hazard_t0_allt <- as.numeric(sapply(1:length(alltimes),function(k) 
  tail(hazard_t0$hazard[hazard_t0$time <= alltimes[k]], 1)))
hazard_t1_allt <- as.numeric(sapply(1:length(alltimes), function(k)
  tail(hazard_t1$hazard[hazard_t1$time <= alltimes[k]], 1))) 
hazards <- data.frame(hazard_t0_allt, hazard_t1_allt)
# Extract coefficient
coxmod_f29_t <- coxph(Surv(days, status != 0) ~ tment + alb + log2bili,
                      eps = 1e-8, method = "breslow", data = pbc3)
# Make plot
library(ggplot2)
fig2.10 <- ggplot(aes(x = hazard_t0_allt, y = hazard_t1_allt), data = hazards) + 
  geom_step(linewidth = 1) + 
  geom_abline(intercept = 0, slope = exp(coef(coxmod_f29_t)[["tment"]]), 
              linetype = "dashed", size = 1) + 
  xlab("Cumulative baseline hazard: placebo") +
  ylab("Cumulative baseline hazard: CyA") + 
  scale_x_continuous(expand = expansion(mult = c(0.03, 0.05))) + 
  scale_y_continuous(expand = expansion(mult = c(0.03, 0.05))) +
  theme_general

fig2.10

Code show/hide
data covstr;
    alb=0; log2bili=0;
run;
proc phreg data=pbc3;
    model days*status(0)= alb log2bili/rl;
    strata tment;
    baseline out=breslowstr cumhaz=breslow covariates=covstr;
run;
data breslow0;
    set breslowstr; 
    if tment=0; 
  a00=breslow; 
run;
data breslow1; 
    set breslowstr; 
    if tment=1;     
    a01=breslow; 
run;
data breslow01; 
    merge breslow0 breslow1; 
    by days; 
run;
data breslowrev; 
  set breslow01;
    by days;
    retain last1 last2;
    if a00=. then cumhaz0=last1; if a00 ne . then cumhaz0=a00; 
    if a01=. then cumhaz1=last2; if a01 ne . then cumhaz1=a01;
    output;
    last1=cumhaz0; last2=cumhaz1; 
run;
data breslowrev; 
    set breslowrev;
    line=exp(-0.574)*cumhaz0;
run;
proc gplot data=breslowrev;
    plot cumhaz1*cumhaz0 line*cumhaz0/haxis=axis1 vaxis=axis2 overlay;
    axis1 order=0 to 1.5 by 0.5 minor=none label=('Cumulative baseline hazard: placebo');
    axis2 order=0 to 1 by 0.5 minor=none label=(a=90 'Cumulative baseline hazard: CyA');
    symbol1  v=none i=stepjl c=red;
    symbol2  v=none i=rl c=blue;
run;
quit;

Figure 2.11

Code show/hide
# Additive Aalen models - available with timereg
library(timereg)
nonparmod <- aalen(Surv(days, status != 0) ~ tment, data = pbc3)
summary(nonparmod)
Additive Aalen Model 

Test for nonparametric terms 

Test for non-significant effects 
            Supremum-test of significance p-value H_0: B(t)=0
(Intercept)                          5.96               0.000
tment                                1.60               0.714

Test for time invariant effects 
                  Kolmogorov-Smirnov test p-value H_0:constant effect
(Intercept)                         0.107                       0.565
tment                               0.136                       0.648
                    Cramer von Mises test p-value H_0:constant effect
(Intercept)                          7.62                       0.444
tment                                4.54                       0.730

   
   
  Call: 
aalen(formula = Surv(days, status != 0) ~ tment, data = pbc3)
Code show/hide
cumhazdata <- data.frame(eventtimes = nonparmod$cum[,1],
                         basecumhaz = nonparmod$cum[,2], 
                         cumhaztreat = nonparmod$cum[,3], 
                         cumhaztreat_ll = nonparmod$cum[,3]
                                          -1.96*sqrt(nonparmod$var.cum[,3]),
                         cumhaztreat_ul = nonparmod$cum[,3]
                                          +1.96*sqrt(nonparmod$var.cum[,3]))
# Extend lines to last observed time
cumhazdata[nrow(cumhazdata)+1,] <- c(max(pbc3$days), tail(cumhazdata, 1)[-1])

# Left figure
fig2.11.left <- ggplot(aes(x = eventtimes / 365.25, y = basecumhaz), 
                       data = cumhazdata) + 
  geom_step(size = 1) + 
  xlab("Time since randomization (years)") +
  ylab("Cumulative baseline hazard") + 
  scale_x_continuous(expand = expansion(mult = c(0.03, 0.05)), 
                     limits = c(0, 6),breaks = seq(0, 6, 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.03, 0.05))) +
  theme_general

fig2.11.left

Code show/hide
# Right figure
fig2.11.right <- ggplot(aes(x = eventtimes / 365.25, y = cumhaztreat), 
                        data = cumhazdata) + 
  geom_step(size = 1) + 
  geom_step(size = 1, aes(x = eventtimes / 365.25, y = cumhaztreat_ll), 
            linetype = "dashed") +  
  geom_step(size = 1, aes(x = eventtimes / 365.25, y = cumhaztreat_ul), 
            linetype = "dashed") +
  xlab("Time since randomization (years)") +
  ylab("Cumulative treatment effect") + 
  scale_x_continuous(expand = expansion(mult = c(0.03, 0.05)), 
                     limits = c(0, 6), breaks = seq(0, 6, 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.03, 0.05))) +
  theme_general

fig2.11.right

In-text, p. 55: simple Aalen model

Code show/hide
# Additive model treatment only
# p-values not exactly as in book because seed changes
nonparmod0 <- aalen(Surv(days, status != 0) ~ tment, data = pbc3) 
summary(nonparmod0)
Additive Aalen Model 

Test for nonparametric terms 

Test for non-significant effects 
            Supremum-test of significance p-value H_0: B(t)=0
(Intercept)                          5.96               0.000
tment                                1.60               0.738

Test for time invariant effects 
                  Kolmogorov-Smirnov test p-value H_0:constant effect
(Intercept)                         0.107                       0.563
tment                               0.136                       0.608
                    Cramer von Mises test p-value H_0:constant effect
(Intercept)                          7.62                       0.426
tment                                4.54                       0.721

   
   
  Call: 
aalen(formula = Surv(days, status != 0) ~ tment, data = pbc3)
Code show/hide
# Constant effect of treatment per year
nonparmod01 <- aalen(Surv(days/365.25, status != 0) ~ const(tment), data = pbc3) 
summary(nonparmod01)
Additive Aalen Model 

Test for nonparametric terms 

Test for non-significant effects 
            Supremum-test of significance p-value H_0: B(t)=0
(Intercept)                          6.62                   0

Test for time invariant effects 
                  Kolmogorov-Smirnov test p-value H_0:constant effect
(Intercept)                        0.0914                         0.4
                    Cramer von Mises test p-value H_0:constant effect
(Intercept)                        0.0136                        0.28

Parametric terms :     
                Coef.    SE Robust SE     z P-val lower2.5% upper97.5%
const(tment) -0.00587 0.021     0.021 -0.28 0.779    -0.047     0.0353
   
  Call: 
aalen(formula = Surv(days/365.25, status != 0) ~ const(tment), 
    data = pbc3)

Figure 2.12

Code show/hide
# Make Aalen model fit
library(timereg)
nonparmod2 <- aalen(Surv(days, status != 0) ~ tment + alb + bili, data = pbc3) 
cumhazdata <- data.frame(eventtimes = nonparmod2$cum[,1],
                         basecumhaz = nonparmod2$cum[,2], 
                         cumhaztreat = nonparmod2$cum[,3], 
                         cumhazalb = nonparmod2$cum[,4], 
                         cumhazbili= nonparmod2$cum[,5])
# Extend lines to last observed time
cumhazdata[nrow(cumhazdata)+1,] <- c(max(pbc3$days), tail(cumhazdata, 1)[-1])

# Figure treatment
fig2.12.1 <- ggplot(aes(x = eventtimes / 365.25, y = cumhaztreat), 
                    data = cumhazdata) + 
  geom_step(size = 1) + 
  xlab("Time since randomization (years)") +
  ylab("Cumulative treatment effect") + 
  scale_x_continuous(expand = expansion(mult = c(0.03, 0.05)), 
                     limits = c(0, 6), breaks = seq(0, 6, 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.03, 0.05))) +
  theme_general

# Figure albumin
fig2.12.2 <- ggplot(aes(x = eventtimes / 365.25, y = cumhazalb), 
                    data = cumhazdata) + 
  geom_step(size = 1) + 
  xlab("Time since randomization (years)") +
  ylab("Cumulative albumin effect") + 
  scale_x_continuous(expand = expansion(mult = c(0.03, 0.05)), 
                     limits = c(0, 6), breaks = seq(0, 6, 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.03, 0.05))) +
  theme_general

# Figure bilirubin
fig2.12.3 <- ggplot(aes(x = eventtimes / 365.25, y = cumhazbili), 
                    data = cumhazdata) + 
  geom_step(size = 1) + 
  xlab("Time since randomization (years)") +
  ylab("Cumulative bilirubin effect") + 
  scale_x_continuous(expand = expansion(mult = c(0.03, 0.05)),
                     limits = c(0, 6), breaks = seq(0, 6, 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.03, 0.05))) +
  theme_general

fig2.12.1

Code show/hide
fig2.12.2

Code show/hide
fig2.12.3

Table 2.10

Code show/hide
# Additive model with treatment, albumin, bilirubin
# Table 2.10, first two columns
# p-values not exactly as in book because seed changes
nonparmod1 <- aalen(Surv(days, status != 0) ~ tment + alb + bili, data = pbc3) 
summary(nonparmod1)
Additive Aalen Model 

Test for nonparametric terms 

Test for non-significant effects 
            Supremum-test of significance p-value H_0: B(t)=0
(Intercept)                          3.62               0.003
tment                                2.66               0.120
alb                                  3.82               0.002
bili                                 4.83               0.000

Test for time invariant effects 
                  Kolmogorov-Smirnov test p-value H_0:constant effect
(Intercept)                       0.30300                       0.931
tment                             0.12100                       0.666
alb                               0.00666                       0.966
bili                              0.00300                       0.201
                    Cramer von Mises test p-value H_0:constant effect
(Intercept)                      14.20000                       0.992
tment                             3.29000                       0.769
alb                               0.00826                       0.993
bili                              0.00203                       0.344

   
   
  Call: 
aalen(formula = Surv(days, status != 0) ~ tment + alb + bili, 
    data = pbc3)
Code show/hide
# Constant effects per year
# Table 2.10, last two columns
nonparmod2 <- aalen(Surv(days/365.25, status != 0) ~ 
                      const(tment) + const(alb) + const(bili), data = pbc3) 
summary(nonparmod2)
Additive Aalen Model 

Test for nonparametric terms 

Test for non-significant effects 
            Supremum-test of significance p-value H_0: B(t)=0
(Intercept)                          4.03                   0

Test for time invariant effects 
                  Kolmogorov-Smirnov test p-value H_0:constant effect
(Intercept)                         0.119                       0.194
                    Cramer von Mises test p-value H_0:constant effect
(Intercept)                        0.0263                       0.123

Parametric terms :     
                Coef.       SE Robust SE     z    P-val lower2.5% upper97.5%
const(tment) -0.04130 0.021600  0.020100 -2.05 4.02e-02  -0.08360    0.00104
const(alb)   -0.00842 0.002290  0.002230 -3.77 1.63e-04  -0.01290   -0.00393
const(bili)   0.00230 0.000483  0.000384  5.98 2.17e-09   0.00135    0.00325
   
  Call: 
aalen(formula = Surv(days/365.25, status != 0) ~ const(tment) + 
    const(alb) + const(bili), data = pbc3)

In-text, p. 57: Aalen models

Code show/hide
# In-text
# Constant effect of treatment, adjusted for albumin and bilirubin
nonparmod3 <- aalen(Surv(days/365.25, status != 0) ~ 
                      const(tment) + alb + bili, data = pbc3) 
summary(nonparmod3)
Additive Aalen Model 

Test for nonparametric terms 

Test for non-significant effects 
            Supremum-test of significance p-value H_0: B(t)=0
(Intercept)                          3.60               0.005
alb                                  3.80               0.001
bili                                 4.83               0.000

Test for time invariant effects 
                  Kolmogorov-Smirnov test p-value H_0:constant effect
(Intercept)                       0.28500                       0.950
alb                               0.00954                       0.805
bili                              0.00315                       0.130
                    Cramer von Mises test p-value H_0:constant effect
(Intercept)                      3.16e-02                       0.996
alb                              5.26e-05                       0.913
bili                             6.52e-06                       0.257

Parametric terms :     
               Coef.     SE Robust SE     z P-val lower2.5% upper97.5%
const(tment) -0.0401 0.0216    0.0204 -1.97 0.049   -0.0824    0.00224
   
  Call: 
aalen(formula = Surv(days/365.25, status != 0) ~ const(tment) + 
    alb + bili, data = pbc3)
Code show/hide
# Quadratic effect for albumin; p-values not exactly as in book because seed changes
nonparmod44 <- aalen(Surv(days/365.25, status != 0) ~ const(tment)
                       + I(alb/10) + I(bili/100) + I((alb/10)^2), data = pbc3) 
summary(nonparmod44)
Additive Aalen Model 

Test for nonparametric terms 

Test for non-significant effects 
              Supremum-test of significance p-value H_0: B(t)=0
(Intercept)                            3.23               0.018
I(alb/10)                              3.00               0.045
I(bili/100)                            4.88               0.000
I((alb/10)^2)                          2.77               0.080

Test for time invariant effects 
                    Kolmogorov-Smirnov test p-value H_0:constant effect
(Intercept)                           9.420                       0.241
I(alb/10)                             4.950                       0.240
I(bili/100)                           0.365                       0.064
I((alb/10)^2)                         0.635                       0.249
                      Cramer von Mises test p-value H_0:constant effect
(Intercept)                         83.6000                       0.347
I(alb/10)                           24.4000                       0.341
I(bili/100)                          0.0949                       0.173
I((alb/10)^2)                        0.4160                       0.336

Parametric terms :     
               Coef.     SE Robust SE     z  P-val lower2.5% upper97.5%
const(tment) -0.0421 0.0215    0.0201 -2.09 0.0366   -0.0842   3.92e-05
   
  Call: 
aalen(formula = Surv(days/365.25, status != 0) ~ const(tment) + 
    I(alb/10) + I(bili/100) + I((alb/10)^2), data = pbc3)
Code show/hide
# Quadratic effect for bilirubin; p-values not exactly as in book as seed changes
nonparmod43 <- aalen(Surv(days/365.25, status != 0) ~ const(tment)
                       + I(alb/10) + I(bili/100) + I((bili/100)^2), data = pbc3) 
summary(nonparmod43)
Additive Aalen Model 

Test for nonparametric terms 

Test for non-significant effects 
                Supremum-test of significance p-value H_0: B(t)=0
(Intercept)                              3.82               0.005
I(alb/10)                                4.02               0.003
I(bili/100)                              3.85               0.005
I((bili/100)^2)                          3.01               0.041

Test for time invariant effects 
                      Kolmogorov-Smirnov test p-value H_0:constant effect
(Intercept)                             0.440                       0.687
I(alb/10)                               0.117                       0.625
I(bili/100)                             0.612                       0.402
I((bili/100)^2)                         0.515                       0.152
                        Cramer von Mises test p-value H_0:constant effect
(Intercept)                            0.1320                       0.776
I(alb/10)                              0.0124                       0.626
I(bili/100)                            0.4640                       0.293
I((bili/100)^2)                        0.2880                       0.148

Parametric terms :     
               Coef.     SE Robust SE     z  P-val lower2.5% upper97.5%
const(tment) -0.0395 0.0213    0.0208 -1.89 0.0582   -0.0812    0.00225
   
  Call: 
aalen(formula = Surv(days/365.25, status != 0) ~ const(tment) + 
    I(alb/10) + I(bili/100) + I((bili/100)^2), data = pbc3)
Code show/hide
# Interactions
nonparmod51 <- aalen(Surv(days/365.25, status != 0) ~ const(tment) + const(alb)
                     + const(bili) + const(tment * bili),data = pbc3) 
summary(nonparmod51)
Additive Aalen Model 

Test for nonparametric terms 

Test for non-significant effects 
            Supremum-test of significance p-value H_0: B(t)=0
(Intercept)                          4.03                   0

Test for time invariant effects 
                  Kolmogorov-Smirnov test p-value H_0:constant effect
(Intercept)                         0.123                        0.15
                    Cramer von Mises test p-value H_0:constant effect
(Intercept)                        0.0287                         0.1

Parametric terms :     
                       Coef.       SE Robust SE       z    P-val lower2.5%
const(tment)        -0.00194 0.029400  0.022800 -0.0851 9.32e-01  -0.05960
const(alb)          -0.00859 0.002260  0.002200 -3.9000 9.69e-05  -0.01300
const(bili)          0.00299 0.000869  0.000437  6.8400 7.77e-12   0.00129
const(tment * bili) -0.00116 0.001020  0.000658 -1.7600 7.92e-02  -0.00316
                    upper97.5%
const(tment)          0.055700
const(alb)           -0.004160
const(bili)           0.004690
const(tment * bili)   0.000839
   
  Call: 
aalen(formula = Surv(days/365.25, status != 0) ~ const(tment) + 
    const(alb) + const(bili) + const(tment * bili), data = pbc3)
Code show/hide
nonparmod52 <- aalen(Surv(days/365.25, status != 0) ~ const(tment) + const(bili) 
                     + const(alb) + const(tment * alb), data = pbc3) 
summary(nonparmod52)
Additive Aalen Model 

Test for nonparametric terms 

Test for non-significant effects 
            Supremum-test of significance p-value H_0: B(t)=0
(Intercept)                          3.06               0.003

Test for time invariant effects 
                  Kolmogorov-Smirnov test p-value H_0:constant effect
(Intercept)                          0.12                       0.198
                    Cramer von Mises test p-value H_0:constant effect
(Intercept)                        0.0265                       0.126

Parametric terms :     
                      Coef.       SE Robust SE       z    P-val lower2.5%
const(tment)        0.01010 0.199000  0.176000  0.0576 9.54e-01  -0.38000
const(bili)         0.00230 0.000482  0.000385  5.9600 2.45e-09   0.00136
const(alb)         -0.00771 0.003150  0.002780 -2.7700 5.54e-03  -0.01390
const(tment * alb) -0.00132 0.004790  0.004300 -0.3060 7.59e-01  -0.01070
                   upper97.5%
const(tment)          0.40000
const(bili)           0.00324
const(alb)           -0.00154
const(tment * alb)    0.00807
   
  Call: 
aalen(formula = Surv(days/365.25, status != 0) ~ const(tment) + 
    const(bili) + const(alb) + const(tment * alb), data = pbc3)

In-text, p. 58: additive Poisson model

Code show/hide
# Additive hazards model with piecewise constant baseline hazards
# Model with only treatment as covariate
# update data set
pbc3add <- pbc3mult
pbc3add$time1 <-  with(pbc3add, (timegroup == 1)*risktime/365.25)
pbc3add$time2 <-  with(pbc3add, (timegroup == 2)*risktime/365.25)
pbc3add$time3 <-  with(pbc3add, (timegroup == 3)*risktime/365.25)
pbc3add$tment0 <- with(pbc3add, (tment == 0)*risktime/365.25)
pbc3add$tment1 <- with(pbc3add, (tment == 1)*risktime/365.25)
pbc3add$albny <-  with(pbc3add, ((alb-35)/100)*risktime/365.25)
pbc3add$biliny <- with(pbc3add, ((bili-50)/1000)*risktime/365.25)

# In-text
additive_pcw <- glm(fail ~ time1 + time2 + time3 + tment1 - 1,
                    data = pbc3add, start = c(0.1, 0.1, 0.1, 0),
                    family = poisson(link = "identity"))
summary(additive_pcw)

Call:
glm(formula = fail ~ time1 + time2 + time3 + tment1 - 1, family = poisson(link = "identity"), 
    data = pbc3add, start = c(0.1, 0.1, 0.1, 0))

Coefficients:
        Estimate Std. Error z value Pr(>|z|)    
time1   0.091109   0.016429   5.546 2.93e-08 ***
time2   0.131729   0.024143   5.456 4.87e-08 ***
time3   0.093734   0.046162   2.031   0.0423 *  
tment1 -0.007261   0.020758  -0.350   0.7265    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance:    Inf  on 623  degrees of freedom
Residual deviance: 459.54  on 619  degrees of freedom
AIC: 647.54

Number of Fisher Scoring iterations: 4
Code show/hide
* Only constant effect per year possible;
* Create new variables;
data pbc3add; 
    set pbc3mult;
    time1=(interv=1)*risktime/365.25; 
    time2=(interv=2)*risktime/365.25; 
    time3=(interv=3)*risktime/365.25;
    tment0=(tment=0)*risktime/365.25; 
    tment1=(tment=1)*risktime/365.25;
run;

* link=id;
* Constant effect of treatment per year;
proc genmod data=pbc3add;
    model fail=time1 time2 time3 tment1/dist=poi link=id noint;
run;

Table 2.11

Questionable fit.

Code show/hide
#------------------------------------------------------------------#
# Table 2.11 
#------------------------------------------------------------------#

# Table 2.11 - questionable fit
additive_pcw2.11 <- glm(fail ~ time1 + time2 + time3 + tment1 + albny + biliny -1,
                    data = pbc3add, start = c(0.3, 0.35, 0.4, -0.05, -0.8, 2),
                    family = poisson(link = "identity"))
summary(additive_pcw2.11)

Call:
glm(formula = fail ~ time1 + time2 + time3 + tment1 + albny + 
    biliny - 1, family = poisson(link = "identity"), data = pbc3add, 
    start = c(0.3, 0.35, 0.4, -0.05, -0.8, 2))

Coefficients:
       Estimate Std. Error z value Pr(>|z|)    
time1   0.18451    0.02129   8.666  < 2e-16 ***
time2   0.24074    0.02584   9.316  < 2e-16 ***
time3   0.22943    0.05441   4.217 2.48e-05 ***
tment1 -0.02243    0.01732  -1.295 0.195235    
albny  -0.43637    0.12842  -3.398 0.000679 ***
biliny  2.22490    0.40809   5.452 4.98e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance:    Inf  on 611  degrees of freedom
Residual deviance: 368.29  on 605  degrees of freedom
  (12 observations deleted due to missingness)
AIC: 556.29

Number of Fisher Scoring iterations: 25
Code show/hide
* Only constant effect per year possible;
* Create new variables;
data pbc3add; 
    set pbc3mult;
    time1=(interv=1)*risktime/365.25; 
    time2=(interv=2)*risktime/365.25; 
    time3=(interv=3)*risktime/365.25;
    tment0=(tment=0)*risktime/365.25; 
    tment1=(tment=1)*risktime/365.25;
    albny=(alb-35)*risktime/365.25;
    biliny=(bili-50)*risktime/365.25;
run;

* link=id;
* Constant effects per year;
proc genmod data=pbc3add;
    model fail=time1 time2 time3 tment1 albny biliny/dist=poi link=id noint;
run;

Table 2.13

Code show/hide
# Death without transplantation
coxph(Surv(days, status == 2) ~ tment + alb + log2bili + sex + age,
           method = "breslow", data = pbc3)
Call:
coxph(formula = Surv(days, status == 2) ~ tment + alb + log2bili + 
    sex + age, data = pbc3, method = "breslow")

             coef exp(coef) se(coef)      z        p
tment    -0.42049   0.65672  0.26822 -1.568   0.1169
alb      -0.06992   0.93247  0.02906 -2.406   0.0161
log2bili  0.69178   1.99726  0.09303  7.436 1.04e-13
sex       0.48557   1.62510  0.31943  1.520   0.1285
age       0.07335   1.07611  0.01621  4.524 6.06e-06

Likelihood ratio test=98.71  on 5 df, p=< 2.2e-16
n= 343, number of events= 60 
   (6 observations deleted due to missingness)
Code show/hide
# Transplantation
coxph(Surv(days, status == 1) ~ tment + alb + log2bili + sex + age,
           method = "breslow", data = pbc3) 
Call:
coxph(formula = Surv(days, status == 1) ~ tment + alb + log2bili + 
    sex + age, data = pbc3, method = "breslow")

             coef exp(coef) se(coef)      z        p
tment    -0.67305   0.51015  0.41318 -1.629   0.1033
alb      -0.09400   0.91029  0.03871 -2.428   0.0152
log2bili  0.83213   2.29820  0.14655  5.678 1.36e-08
sex       0.20378   1.22602  0.56329  0.362   0.7175
age      -0.04805   0.95309  0.02138 -2.247   0.0246

Likelihood ratio test=64.95  on 5 df, p=1.15e-12
n= 343, number of events= 28 
   (6 observations deleted due to missingness)
Code show/hide
# Failure of medical treatment
coxph(Surv(days, status != 0) ~ tment + alb + log2bili + sex + age,
           method = "breslow", data = pbc3)
Call:
coxph(formula = Surv(days, status != 0) ~ tment + alb + log2bili + 
    sex + age, data = pbc3, method = "breslow")

             coef exp(coef) se(coef)      z       p
tment    -0.50964   0.60071  0.22339 -2.281 0.02252
alb      -0.07136   0.93112  0.02293 -3.112 0.00186
log2bili  0.73778   2.09128  0.07768  9.497 < 2e-16
sex       0.58536   1.79563  0.26738  2.189 0.02858
age       0.03077   1.03125  0.01199  2.566 0.01029

Likelihood ratio test=134.3  on 5 df, p=< 2.2e-16
n= 343, number of events= 88 
   (6 observations deleted due to missingness)
Code show/hide
proc phreg data=pbc3;
    class tment (ref='0');
    model days*status(0 1)=tment alb log2bili sex age / rl;
run;

proc phreg data=pbc3;
    class tment (ref='0');
    model days*status(0 2)=tment alb log2bili sex age / rl;
run;

proc phreg data=pbc3;
    class tment (ref='0');
    model days*status(0)=tment alb log2bili sex age / rl;
run;

Guinea-Bissau childhood vaccination study

Read data

Code show/hide
bissau <- read.csv("data/bissau.csv")
bissau$agem <- as.integer(bissau$age/30.4)
Code show/hide
proc import out=bissau
    datafile="data/bissau.csv"
    dbms=csv replace;
run;
data bissau; 
    set bissau; 
    agem = int(age/30.4); * Age in months; 
run;

In-text, p. 58: Cox model unadjusted for age

Code show/hide
library(survival)
coxfit0 <- coxph(Surv(fuptime, dead) ~ bcg, 
                 data = bissau, method = "breslow")
coxfit0
Call:
coxph(formula = Surv(fuptime, dead) ~ bcg, data = bissau, method = "breslow")

       coef exp(coef) se(coef)      z      p
bcg -0.2821    0.7542   0.1353 -2.085 0.0371

Likelihood ratio test=4.28  on 1 df, p=0.03851
n= 5274, number of events= 222 
Code show/hide
proc phreg data=bissau;
    model fuptime*dead(0)=bcg / rl;
run;

Table 2.12

Code show/hide
# Cox model in column 1
coxfit1 <- coxph(Surv(fuptime, dead) ~ bcg + agem, 
                 data = bissau, method = "breslow")
coxfit1
Call:
coxph(formula = Surv(fuptime, dead) ~ bcg + agem, data = bissau, 
    method = "breslow")

         coef exp(coef) se(coef)      z      p
bcg  -0.35346   0.70225  0.14423 -2.451 0.0143
agem  0.05472   1.05624  0.03844  1.424 0.1546

Likelihood ratio test=6.29  on 2 df, p=0.04301
n= 5274, number of events= 222 
Code show/hide
# Make age the time variable instead
bissau$agein <- bissau$age/(365.24/12)
bissau$ageout <- bissau$agein + bissau$fuptime/(365.24/12)

# Cox model in column 2
# option timefix=F aligns to SAS calculation
# see vignette 'Roundoff error and tied times' for survival package
coxfit2 <- coxph(Surv(agein, ageout, dead) ~ bcg, 
                 data = bissau, method = "breslow",timefix=F)
coxfit2
Call:
coxph(formula = Surv(agein, ageout, dead) ~ bcg, data = bissau, 
    method = "breslow", timefix = F)

       coef exp(coef) se(coef)      z      p
bcg -0.3558    0.7006   0.1407 -2.529 0.0114

Likelihood ratio test=6.28  on 1 df, p=0.01218
n= 5274, number of events= 222 
Code show/hide
proc phreg data=bissau;
    model fuptime*dead(0)=bcg agem / rl;
run;

* Make age the time variable instead;
data bissau; 
    set bissau;
    agein=age/(365.24/12);
    ageout=agein+fuptime/(365.24/12);
run;

* Cox model fit - column 2; 
proc phreg data=bissau;
    model ageout*dead(0)=bcg / entry=agein rl;
run;

Figure 2.13

Code show/hide
# General plotting style 
library(ggplot2)
theme_general <- theme_bw() +
  theme(text = element_text(size = 20), 
        axis.text.x = element_text(size = 20), 
        axis.text.y = element_text(size = 20),
        legend.position = "bottom", 
        legend.title=element_blank(),
        legend.text = element_text(size = 20),
        legend.key.size = unit(2,"line")) 


# Extract cumulative baseline hazard for covariate pattern
covdata<-data.frame(rbind(c(bcg=0, agem=3)))
coxcumhaz <- basehaz(coxfit1, covdata)
# Create figure
fig2.13 <- ggplot(aes(x = time / (365.24/12), y = hazard), data = coxcumhaz) + 
  geom_step(size = 1) + 
  xlab("Follow-up time (months)") + 
  ylab("Cumulative hazard") + 
  scale_x_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     breaks = seq(0, 7, by = 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.05))) +
  theme_general

fig2.13

Code show/hide
* Cox model fit and get cumulative baseline hazard ; 
proc phreg data=bissau;
    model fuptime*dead(0)=bcg agem/rl;
    baseline out=hazfuptime covariates=cov1 cumhaz=basefup;
run;
data hazfuptime_months; 
    set hazfuptime; 
    fuptime_m = fuptime / (365.24/12); 
run;
proc gplot data=hazfuptime_months;
    plot basefup*fuptime_m/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 7 by 1 minor=none
    label=('Follow-up time (months)');
    axis2 order=0 to 0.06 by 0.01 minor=none
    label=(a=90 'Cumulative hazard');
    symbol1 v=none i=stepjl c=blue;
run;
quit;
* or using proc sgplot;
ods graphics on;
proc sgplot data=hazfuptime_months;
  step x=fuptime_m y=basefup / justify=left;
    xaxis grid values=(0 to 7 by 1);
    yaxis grid values=(0 to 0.06 by 0.01);
    label fuptime_m="Follow-up time (months)";
    label basefup="Cumulative hazard";
run;

Figure 2.14

Code show/hide
# Extract cumulative baseline hazard
coxcumhaz <- basehaz(coxfit2, centered = FALSE)
fig2.14 <- ggplot(aes(x = time, y = hazard), data = coxcumhaz) + 
  geom_step(size = 1) + 
  xlab("Age (months)") + 
  ylab("Cumulative hazard") + 
  scale_x_continuous(expand = expansion(mult = c(0.005, 0.05)), 
                     breaks = seq(0, 15, by = 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.1)), 
                     breaks = seq(0, 0.15, by = 0.05), 
                     labels = c("0", "0.05", "0.10", "0.15")) +
  theme_general

fig2.14

Code show/hide
data cov2;
input bcg;
datalines;
0
;
run;
* Cox model fit and get cumulative baseline hazard ; 
proc phreg data=bissau;
    model ageout*dead(0)=bcg / entry=agein rl;
    baseline out=hazage covariates=cov2 cumhaz=baseage;
run;
proc gplot data=hazage;
    plot baseage*ageout/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 15 by 5 minor=none
        label=('Age (months)');
    axis2 order=0 to 0.15 by 0.05 minor=none 
        label=(a=90 'Cumulative baseline hazard');
    symbol1  v=none i=stepjl c=blue;
run;
quit;

Recurrent episodes in affective disorders

Read data

Code show/hide
affective <- read.csv("data/affective.csv")
affective$wait <- with(affective, stop - start)
Code show/hide
proc import out=affective
    datafile="data/affective.csv"
    dbms=csv replace;
run;
data affective; 
    set affective; 
    wait = stop - start; 
run; 

Table 2.14

NB: Small differences in estimates between R and SAS (SAS output used for table in book).

Code show/hide
# Cox model for 1., 2., 3., 4. episode 'Markov': Column 1
library(survival)
coxph(Surv(start, stop, status == 1) ~ bip, method = "breslow",
                        data = subset(affective, episode == 1 & state == 0))
Call:
coxph(formula = Surv(start, stop, status == 1) ~ bip, data = subset(affective, 
    episode == 1 & state == 0), method = "breslow")

      coef exp(coef) se(coef)     z     p
bip 0.3552    1.4264   0.2500 1.421 0.155

Likelihood ratio test=1.89  on 1 df, p=0.1692
n= 116, number of events= 99 
Code show/hide
coxph(Surv(start, stop, status == 1) ~ bip, method = "breslow",
                        data = subset(affective, episode == 2 & state == 0))
Call:
coxph(formula = Surv(start, stop, status == 1) ~ bip, data = subset(affective, 
    episode == 2 & state == 0), method = "breslow")

      coef exp(coef) se(coef)     z     p
bip 0.1890    1.2080   0.2604 0.726 0.468

Likelihood ratio test=0.51  on 1 df, p=0.4751
n= 91, number of events= 82 
Code show/hide
coxph(Surv(start, stop, status == 1) ~ bip, method = "breslow",
                        data = subset(affective, episode == 3 & state == 0)) 
Call:
coxph(formula = Surv(start, stop, status == 1) ~ bip, data = subset(affective, 
    episode == 3 & state == 0), method = "breslow")

       coef exp(coef) se(coef)      z     p
bip -0.1175    0.8891   0.3005 -0.391 0.696

Likelihood ratio test=0.16  on 1 df, p=0.6936
n= 74, number of events= 62 
Code show/hide
coxph(Surv(start, stop, status == 1) ~ bip, method = "breslow",
                        data = subset(affective, episode == 4 & state == 0)) 
Call:
coxph(formula = Surv(start, stop, status == 1) ~ bip, data = subset(affective, 
    episode == 4 & state == 0), method = "breslow")

      coef exp(coef) se(coef)     z       p
bip 1.1500    3.1581   0.3536 3.252 0.00114

Likelihood ratio test=9.93  on 1 df, p=0.001623
n= 56, number of events= 47 
Code show/hide
# Cox model for 1., 2., 3., 4. episode 'Gap time': Column 2
coxph(Surv(wait, status == 1) ~ bip, method = "breslow",
                        data = subset(affective, episode == 1 & state == 0)) 
Call:
coxph(formula = Surv(wait, status == 1) ~ bip, data = subset(affective, 
    episode == 1 & state == 0), method = "breslow")

      coef exp(coef) se(coef)     z     p
bip 0.3991    1.4905   0.2487 1.605 0.109

Likelihood ratio test=2.39  on 1 df, p=0.1222
n= 116, number of events= 99 
Code show/hide
coxph(Surv(wait, status == 1) ~ bip, method = "breslow",
                     data = subset(affective, episode == 2 & state == 0))
Call:
coxph(formula = Surv(wait, status == 1) ~ bip, data = subset(affective, 
    episode == 2 & state == 0), method = "breslow")

      coef exp(coef) se(coef)    z     p
bip 0.2165    1.2418   0.2579 0.84 0.401

Likelihood ratio test=0.68  on 1 df, p=0.41
n= 91, number of events= 82 
Code show/hide
coxph(Surv(wait, status == 1) ~ bip, method = "breslow",
                     data = subset(affective, episode == 3 & state == 0)) 
Call:
coxph(formula = Surv(wait, status == 1) ~ bip, data = subset(affective, 
    episode == 3 & state == 0), method = "breslow")

       coef exp(coef) se(coef)      z     p
bip -0.1114    0.8946   0.2867 -0.389 0.698

Likelihood ratio test=0.15  on 1 df, p=0.6953
n= 74, number of events= 62 
Code show/hide
coxph(Surv(wait, status == 1) ~ bip, method = "breslow",
                     data = subset(affective, episode == 4 & state == 0)) 
Call:
coxph(formula = Surv(wait, status == 1) ~ bip, data = subset(affective, 
    episode == 4 & state == 0), method = "breslow")

      coef exp(coef) se(coef)     z     p
bip 0.5964    1.8155   0.3183 1.874 0.061

Likelihood ratio test=3.31  on 1 df, p=0.06905
n= 56, number of events= 47 
Code show/hide
# AG cox model, total time
coxph(Surv(start, stop, status == 1) ~ bip, method = "breslow",
                        data = subset(affective, state == 0))
Call:
coxph(formula = Surv(start, stop, status == 1) ~ bip, data = subset(affective, 
    state == 0), method = "breslow")

       coef exp(coef) se(coef)     z        p
bip 0.36593   1.44186  0.09448 3.873 0.000107

Likelihood ratio test=14.24  on 1 df, p=0.0001612
n= 626, number of events= 542 
Code show/hide
# AG cox model, gap time
coxph(Surv(wait, status == 1) ~ bip, method = "breslow",
                   data = subset(affective, state == 0))
Call:
coxph(formula = Surv(wait, status == 1) ~ bip, data = subset(affective, 
    state == 0), method = "breslow")

       coef exp(coef) se(coef)     z     p
bip 0.12555   1.13377  0.09445 1.329 0.184

Likelihood ratio test=1.74  on 1 df, p=0.1875
n= 626, number of events= 542 
Code show/hide
# PWP cox model, total time
coxph(Surv(start, stop, status == 1) ~ strata(episode) + bip, method = "breslow",
                   data = subset(affective, state == 0)) 
Call:
coxph(formula = Surv(start, stop, status == 1) ~ strata(episode) + 
    bip, data = subset(affective, state == 0), method = "breslow")

      coef exp(coef) se(coef)     z     p
bip 0.2418    1.2736   0.1121 2.157 0.031

Likelihood ratio test=4.54  on 1 df, p=0.03312
n= 626, number of events= 542 
Code show/hide
# PWP cox model, gap time
coxph(Surv(wait, status == 1) ~ strata(episode) + bip, method = "breslow", 
                    data = subset(affective, state == 0)) 
Call:
coxph(formula = Surv(wait, status == 1) ~ strata(episode) + bip, 
    data = subset(affective, state == 0), method = "breslow")

       coef exp(coef) se(coef)     z     p
bip 0.02781   1.02820  0.10040 0.277 0.782

Likelihood ratio test=0.08  on 1 df, p=0.7821
n= 626, number of events= 542 
Code show/hide
proc sort data=affective out=state0;
  where state=0;
  by state episode;
run;

* Cox model for 1., 2., 3., 4. episode 'Markov': Column 1; 
proc phreg data=state0;
  where episode<=4;
    model stop*status (2 3)= bip / entry=start;
    by episode;
run;

* AG model, no past; 
proc phreg data=state0;
    model stop*status (2 3)= bip / entry=start;
run;

* PWP model; 
proc phreg data=state0; 
    model stop*status (2 3)= bip / entry=start;
    strata episode;
run; 

* Cox model for 1., 2., 3., 4. episode 'Gap time': Column 2; 
proc phreg data=state0;
  where episode<=4;
    model wait*status (2 3)= bip;
    by episode;
run;

* AG gap time model; 
proc phreg data=state0;
    model wait*status (2 3)= bip;
run;

* PWP gap time model; 
proc phreg data=state0;
    model wait*status (2 3)= bip;
    strata episode;
run;

LEADER cardiovascular trial in type 2 diabetes

Assume that the LEADER data set is loaded in data set leader_mi.

Figure 2.15

Code show/hide
library(ggplot2)
theme_general <- theme_bw() +
  theme(legend.position = "bottom",
        legend.title=element_blank(),
        legend.text = element_text(size = 20),
        legend.key.size = unit(2,"line"),
        text = element_text(size = 20), 
        axis.text.x = element_text(size = 20), 
        axis.text.y = element_text(size = 20)) 

# Nelson-Aalen estimate of the cumulative hazard of MI
library(survival)
fit <- survfit(Surv(start, stop, status == 1) ~ treat, data = leader_mi,
               id = id, ctype = 1)

plotdata <- data.frame(cumhaz = fit$cumhaz, time = fit$time,
                       treat = c(rep("Placebo", fit$strata[[1]]),
                                 rep("Liraglutide", fit$strata[[2]])))

fig2.15 <- ggplot(aes(x = time * 1 / (365.25 / 12), y = cumhaz), data = plotdata) +
  geom_step(aes(linetype = treat), linewidth = 1) +
  scale_linetype_discrete("Treatment") +
  ylab("Cumulative hazard") +
  xlab("Time since randomization (months)") +
  theme_bw() +
  scale_x_continuous(expand = expansion(mult = c(0.005, 0.05)),
                     limits = c(0, 65), breaks = seq(0, 60, by = 12)) +
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.05)),
                     limits = c(0, 0.12), breaks = seq(0, 0.12, by = 0.02)) +
  theme_general

fig2.15

Code show/hide
* Nelson-Aalen estimates; 
proc phreg data=leader_mi noprint;
    model stop*status(0 2)=/entry=start;
    id id;
    strata treat;
  baseline out=na_data cumhaz=naa;
run;
data na_est;
    set na_data; 
    time = stop/(365.25/12);
run; 
proc sgplot data=na_est;
    step x=time y=naa / group=treat justify=left;
    xaxis grid values=(0 to 60 by 12);
    yaxis grid values=(0 to 0.12 by 0.02);
    label time="Time since randomisation (months)";
    label cumevent="Cumulative hazard"; 
run; 

Table 2.15

Code show/hide
# Cox model first event
library(survival)
coxph(Surv(start, stop, status == 1) ~ treat,
      data = subset(leader_mi, eventno == 1), method = "breslow",)
Call:
coxph(formula = Surv(start, stop, status == 1) ~ treat, data = subset(leader_mi, 
    eventno == 1), method = "breslow")

          coef exp(coef) se(coef)      z      p
treat -0.15949   0.85258  0.07984 -1.998 0.0458

Likelihood ratio test=4  on 1 df, p=0.04545
n= 9340, number of events= 631 
Code show/hide
# AG Cox type
coxph(Surv(start, stop, status == 1) ~ treat, data = leader_mi,
                   method = "breslow")
Call:
coxph(formula = Surv(start, stop, status == 1) ~ treat, data = leader_mi, 
    method = "breslow")

          coef exp(coef) se(coef)      z      p
treat -0.16418   0.84859  0.07184 -2.285 0.0223

Likelihood ratio test=5.24  on 1 df, p=0.02208
n= 10120, number of events= 780 
Code show/hide
# AG model, piece-wise constant hazards
# Calculating cuts -> 5 equally-sized intervals
alltimes <- seq(0,max(leader_mi$stop),length=99)
FunctionIntervalM <- function(a,b) {
  seq(from=min(a), to = max(a), by = (max(a)-min(a))/b)
  }
cuts <- FunctionIntervalM(a = alltimes, b = 5)
cut_data <- survSplit(Surv(start, stop, status == 1) ~ .,
                      leader_mi,
                      cut = cuts[-1],
                      episode = "timegroup")
options(contrasts=c("contr.treatment", "contr.poly"))
summary(glm(event ~ treat + factor(timegroup) + offset(log(stop-start)),
            data=cut_data,family=poisson))

Call:
glm(formula = event ~ treat + factor(timegroup) + offset(log(stop - 
    start)), family = poisson, data = cut_data)

Coefficients:
                   Estimate Std. Error  z value Pr(>|z|)    
(Intercept)        -9.58726    0.07463 -128.462   <2e-16 ***
treat              -0.16413    0.07184   -2.285   0.0223 *  
factor(timegroup)2  0.08099    0.09329    0.868   0.3853    
factor(timegroup)3 -0.10998    0.09873   -1.114   0.2653    
factor(timegroup)4 -0.26954    0.11448   -2.355   0.0185 *  
factor(timegroup)5 -0.15954    0.27552   -0.579   0.5626    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 7673.5  on 39069  degrees of freedom
Residual deviance: 7656.9  on 39064  degrees of freedom
AIC: 9228.9

Number of Fisher Scoring iterations: 7
Code show/hide
# PWP 2nd event
coxph(Surv(start, stop, status == 1) ~ treat,
                     method = "breslow", subset = (eventno == 2),
                     data = leader_mi)
Call:
coxph(formula = Surv(start, stop, status == 1) ~ treat, data = leader_mi, 
    subset = (eventno == 2), method = "breslow")

          coef exp(coef) se(coef)      z     p
treat -0.04662   0.95445  0.19686 -0.237 0.813

Likelihood ratio test=0.06  on 1 df, p=0.8126
n= 631, number of events= 105 
Code show/hide
# PWP 3rd event
coxph(Surv(start, stop, status == 1) ~ treat,
                   method = "breslow", subset = (eventno == 3),
                   data = leader_mi)
Call:
coxph(formula = Surv(start, stop, status == 1) ~ treat, data = leader_mi, 
    subset = (eventno == 3), method = "breslow")

          coef exp(coef) se(coef)      z     p
treat -0.02328   0.97699  0.39979 -0.058 0.954

Likelihood ratio test=0  on 1 df, p=0.9536
n= 105, number of events= 27 
Code show/hide
# PWP 4th event
coxph(Surv(start, stop, status == 1) ~ treat,
                   method = "breslow", subset = (eventno == 4),
                   data = leader_mi)
Call:
coxph(formula = Surv(start, stop, status == 1) ~ treat, data = leader_mi, 
    subset = (eventno == 4), method = "breslow")

        coef exp(coef) se(coef)     z     p
treat 0.6288    1.8753   0.7370 0.853 0.394

Likelihood ratio test=0.75  on 1 df, p=0.3852
n= 27, number of events= 9 
Code show/hide
# PWP 5th event
coxph(Surv(start, stop, status == 1) ~ treat,
                   method = "breslow", subset = (eventno == 5),
                   data = leader_mi)
Call:
coxph(formula = Surv(start, stop, status == 1) ~ treat, data = leader_mi, 
    subset = (eventno == 5), method = "breslow")

         coef exp(coef) se(coef)      z     p
treat -0.4287    0.6514   1.2299 -0.349 0.727

Likelihood ratio test=0.13  on 1 df, p=0.7221
n= 9, number of events= 3 
Code show/hide
# PWP all
coxph(Surv(start, stop, status == 1) ~ treat + strata(eventno),
                   method = "breslow",
                   data = leader_mi)
Call:
coxph(formula = Surv(start, stop, status == 1) ~ treat + strata(eventno), 
    data = leader_mi, method = "breslow")

          coef exp(coef) se(coef)      z      p
treat -0.13020   0.87792  0.07222 -1.803 0.0714

Likelihood ratio test=3.26  on 1 df, p=0.07103
n= 10120, number of events= 780 
Code show/hide
proc sort data=leader_mi;
  by eventno;
run;

* AG Cox type;
proc phreg data=leader_mi;
  model stop*status(0 2)= treat / entry=start; 
run;


* AG model, piece-wise constant hazards with 5 equally-sized intervals;
proc icphreg data=leader_mi;
    model stop*status(0 2) = treat / entry=start basehaz=pch(nintervals=5);
run;

                     Analysis of Maximum Likelihood Parameter Estimates

                                 Standard       95% Confidence          Chi-
  Effect       DF    Estimate       Error           Limits            Square    Pr > ChiSq

  Haz1          1      0.0001      0.0000      0.0000      0.0001
  Haz2          1      0.0001      0.0000      0.0001      0.0001
  Haz3          1      0.0001      0.0000      0.0001      0.0001
  Haz4          1      0.0001      0.0000      0.0001      0.0001
  Haz5          1      0.0001      0.0000      0.0000      0.0001
  treat         1     -0.1643      0.0718     -0.3051     -0.0235       5.23        0.0222




* Alternatively, one can split data at time points and use proc genmod:
* For the piece-wise constant model we use the macro `lexis` by Bendix Carstensen,
  The macrp split observation time into time interval. 
  For more information visit https://bendixcarstensen.com and look for Software; 

filename lexis url 'https://bendixcarstensen.com/Lexis/Lexis.sas';
%inc lexis;

/* we name the failure 'mi' and the time intervals 'timegroup' */
data leader_mi; 
    set leader_mi;
    mi=status=1;
run;

/* We name the failure 'mi' and the time intervals 'timegroup' */
%Lexis(data   = leader_mi,
             out    = pois,
             entry  = start,
             exit   = stop,
             fail   = mi,
             left   = timegroup,
       breaks = %str(0,379.2,758.4,1137.6,1516.8,1896.0));
proc genmod data=pois;
  class treat(ref="0") timegroup;
    model mi = treat timegroup / dist=poisson  offset=lrisk;
run;

* Cox model first event and PWP models 2nd, ..., 5th event;
proc phreg data=leader_mi;
  where eventno<=5;
  model stop*status(0 2)= treat / entry=start; 
    by eventno;
run;

* PWP model all events;
proc phreg data=leader_mi;
  model stop*status(0 2)= treat / entry=start; 
    strata eventno;
run;

Exercises

The SAS solutions are available as single files for download:

Exercise 2.1

Consider the data from the Copenhagen Holter study (Example 1.1.8).

The data should be loaded as chs_data:

Code show/hide
chs_data <- read.csv("data/cphholter.csv")

Note that the time variables, timeafib, timestroke and timedeath, are measured in days. We will first convert them to years for easier interpretations.

Code show/hide
chs_data$timeafib   <- chs_data$timeafib/365.25
chs_data$timestroke <- chs_data$timestroke/365.25
chs_data$timedeath  <- chs_data$timedeath/365.25

Load the relevant packages:

Code show/hide
library(tidyverse) #Data manipulations and plots
library(survival) #Core survival analysis routines
library(ggplot2)
theme_set(theme_bw())
Code show/hide
* We first load the data;

proc import out = chs_data
datafile = 'data/cphholter.csv'
dbms= csv replace;
getnames=yes;
run;

* We will convert the time variables ('timeafib', 'timestroke', and 'timedeath') from days to years;

data chs_data;
set chs_data;
timeafib = timeafib/365.25;
timestroke = timestroke/365.25;
timedeath = timedeath/365.25;
run;

1.

Estimate non-parametrically the cumulative hazards of death for subjects with or without ESVEA.

The cumulative hazard can be estimated non-parametrically using the Nelson-Aalen estimator. It is implemented in the survfit function from the survival package. The formula argument must have a Surv object on the left side of ‘~’. Since the event of interest is death the time variable of the Surv object is timedeath and the status indicator is death. The covariate(s), esvea in this analysis, should be on the right side of the ‘~’. The cumulative hazard is then stored as the object cumhaz, and can be plotted against time.

Code show/hide
# Estimating the cumulative hazards for subjects with or without ESVEA non-parametrically.
naafit211 <- survfit(formula = Surv(timedeath, death) ~ esvea, data = chs_data)

naadata211 <- data.frame(time = naafit211$time,
                         cumhaz = naafit211$cumhaz, 
                         esvea = c(rep(names(naafit211$strata)[1], naafit211$strata[1]),
                                   rep(names(naafit211$strata)[2], naafit211$strata[2])))

(fig211 <- ggplot(data = naadata211) + geom_step(aes(x = time, y = cumhaz, linetype = esvea), linewidth = 1) + 
    scale_linetype_discrete("ESVEA", labels = c("0", "1")) + 
    xlab("Time since randomization (years)") + 
    ylab("Cumulative hazard"))

Code show/hide
* We will estimate the cumulative hazards of death for subjects with or without ESVEA non-parametrically with the Nelson-Aalen 
  estimator. This can be done with the 'phreg' procedure. Since the event of interest is death the time variable used in the model 
  statement is 'timedeath' and the censoring variable is 'death'. Furthermore, 'esvea' is specified in the strata statement to 
  obtain different estimates of the hazards for subjects with or without ESVEA. The cumulative hazard are the stored in the
  'hazdata' file created with the baseline statement;

proc phreg data=chs_data noprint; 
    model timedeath*death(0)=;
    strata esvea;
    baseline out=hazdata cumhaz=naa;
run;

* The estimates are then plotted using the gplot procedure;

title "2.1.1: Nelson-Aalen estimates for subjects with or without ESVEA";
proc gplot data=hazdata;
    plot naa*timedeath=esvea /haxis=axis1 vaxis=axis2;
    axis1 order=0 to 15.19 by 1 label=('Time (Years)');
    axis2 order=0 to 1 by 0.2 label=(a=90 'Cumulative hazard');
    symbol1 i=stepjl c=red;
    symbol2 i=stepjl c=blue;
run;

2.

Make a non-parametric test for comparison of the two.

The Nelson-Aalen estimate of the cumulative hazards for subjects with or without ESVEA can be compared with a logrank test which is implemented as the function survdiff from the survival package.

Code show/hide
# Logrank test
survdiff(formula = Surv(timedeath, death) ~ esvea, data = chs_data)
Call:
survdiff(formula = Surv(timedeath, death) ~ esvea, data = chs_data)

          N Observed Expected (O-E)^2/E (O-E)^2/V
esvea=0 579      206    228.4       2.2      17.7
esvea=1  99       55     32.6      15.4      17.7

 Chisq= 17.7  on 1 degrees of freedom, p= 3e-05 

We get a chi-square statistic of 17.7 on 1 degree of freedom and a corresponding p-value of \(3 \cdot 10^{-5}\).

Code show/hide
* The hazards can be compared with a log rank test which is implemented in the lifetest procedure. The relevant time variable 
  'timedeath' and censoring variable 'death' are specified in the time statement and 'esvea' is used in the strata statement.;

title "2.1.2: Log rank test comparing the hazards for the subjects with or without ESVEA";
proc lifetest data=chs_data notable plots=none; 
    time timedeath*death(0);
    strata esvea;
run; 

* We get a Chi-square statistic of 17.66 on 1 degree of freedom with a corresponding p-value <0.0001;

3.

Make a similar analysis based on a model where the hazard is assumed constant within 5-year intervals.

To estimate the cumulative hazard under the assumption that the hazard is constant within 5-year intervals we must first split each record into sub-records with cut times at 5 and 10 years. This can be done using the survSplit function from the survival package. Duration of each sub-record is then stored as the column duration.

Code show/hide
#Splitting each record into subrecords
data_pch <- survSplit(formula = Surv(timedeath, death) ~ esvea, 
          data = chs_data, 
          cut = c(5,10), #Cut points
          episode = "interval")
#Adding column with name interval
data_pch$duration <- data_pch$timedeath - data_pch$tstart #Duration of each subrecord

We can then estimate the hazard within each of the intervals \([0,5), [5,10), [10,15.19)\) using an occurrence/exposure rate.

For subjects without ESVEA we get the following estimates:

Code show/hide
int1esvea0 <- subset(data_pch, interval == 1 & esvea == 0) #All subrecords in the interval [0,5) with ESVEA = 0
int2esvea0 <- subset(data_pch, interval == 2 & esvea == 0) #All subrecords in the interval [5,10) with ESVEA = 0
int3esvea0 <- subset(data_pch, interval == 3 & esvea == 0) #All subrecords in the interval [10,15.19) with ESVEA = 0

haz1esvea0 <- sum(int1esvea0$death)/sum(int1esvea0$duration) #Hazard estimate for the interval [0,5)
haz2esvea0 <- sum(int2esvea0$death)/sum(int2esvea0$duration) #Hazard estimate for the interval [5,10) 
haz3esvea0 <- sum(int3esvea0$death)/sum(int3esvea0$duration) #Hazard estimate for the interval [10,15.19)

#Table with hazard estimates for subjects without ESVEA
cbind(c("0-5 years","5-10 years","10+ years"), c(haz1esvea0,haz2esvea0,haz3esvea0))
     [,1]         [,2]                
[1,] "0-5 years"  "0.0157326574030543"
[2,] "5-10 years" "0.0296926508258624"
[3,] "10+ years"  "0.0454654436742393"

And for the group with ESVEA we get

Code show/hide
int1esvea1 <- subset(data_pch, interval == 1 & esvea == 1) #All subrecords in the interval [0,5) with ESVEA = 1
int2esvea1 <- subset(data_pch, interval ==2 & esvea == 1) #All subrecords in the interval [5,10) with ESVEA = 1
int3esvea1 <- subset(data_pch, interval ==3 & esvea == 1) #All subrecords in the interval [10,15,19) with ESVEA = 1

haz1esvea1 <- sum(int1esvea1$death)/sum(int1esvea1$duration) #Hazard estimate for the interval [0,5)
haz2esvea1 <- sum(int2esvea1$death)/sum(int2esvea1) #Hazard estimate for the interval [5,10)
haz3esvea1 <- sum(int3esvea1$death)/sum(int3esvea1$duration) #Hazard estimate for the interval [10,15)

#Table with estimates for subjects with ESVEA
cbind(c("0-5 years","5-10 years","10+ years"), c(haz1esvea1,haz2esvea1,haz3esvea1))
     [,1]         [,2]                 
[1,] "0-5 years"  "0.0386657394025027" 
[2,] "5-10 years" "0.00940130241177833"
[3,] "10+ years"  "0.0813048849711176" 

Alternatively, all estimates can be obtained at once through a Poisson regression using the glm function with death as the dependent variable, the categorical variable of the different combinations of interval and ESVEA status, int_esvea, as the covariate and the logarithm of duration as an offset. Furthermore, we must include ‘-1’ as a ‘covariate to’ avoid estimating an intercept.

We get the hazards by taking the exponential of the coefficients estimated from this Poisson model.

Code show/hide
# Adding a categorical covariate indicating the interval and ESVEA status for each subrecord
data_pch$int_esvea <- data_pch$interval + (data_pch$esvea*3)

#Poisson regression 
poisson213 <- glm(death ~ factor(int_esvea) - 1 + offset(log(duration)), family = poisson(), data = data_pch)
poisson_est <- as.numeric(exp(coefficients(poisson213)))

#Table with estimates
cbind(c("0-5 years, ESVEA = 0","5-10 years, ESVEA = 0","10+ years, ESVEA = 0", "0-5 years, ESVEA = 1","5-10 years, ESVEA = 1","10+ years, ESVEA = 1"), poisson_est)
                             poisson_est         
[1,] "0-5 years, ESVEA = 0"  "0.0157326574052703"
[2,] "5-10 years, ESVEA = 0" "0.0296926510588424"
[3,] "10+ years, ESVEA = 0"  "0.0454654437768701"
[4,] "0-5 years, ESVEA = 1"  "0.0386657394029923"
[5,] "5-10 years, ESVEA = 1" "0.0460542296617026"
[6,] "10+ years, ESVEA = 1"  "0.0813048849874842"

The cumulative hazards for subjects with or without ESVEA under the piece-wise constant hazard assumption can be compared using a likelihood ratio test (LRT). This is done by fitting a model under the null hypothesis (i.e only considering the intervals \([0,5), [5,10)\) and \([10,15.19)\) but not whether subjects have ESVEA or not) and then comparing the two models using anova with the argument ‘test = “LRT”’.

Code show/hide
#Null model
poisson_null213 <- glm(death ~ factor(interval) - 1 + offset(log(duration)), family = poisson(), data = data_pch)

#Likelihood ratio test (LRT)
anova(poisson_null213,poisson213, test = "LRT")
Analysis of Deviance Table

Model 1: death ~ factor(interval) - 1 + offset(log(duration))
Model 2: death ~ factor(int_esvea) - 1 + offset(log(duration))
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)   
1      1816     1444.7                        
2      1813     1428.5  3    16.22 0.001022 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We get a chi-squared statistic of 16.22 with 3 degrees of freedom and a corresponding p-value of 0.001.

The piece-wise constant cumulative hazards are plotted together with the Nelson-Aalen cumulative hazards below

Code show/hide
#Data frame with time and cumulative hazard under poisson assumption for subjects without ESVEA
poisson_esvea0 <- as.data.frame(
  cbind(time = c(0,5,10,15.19), 
        cumhaz = c(0,5*poisson_est[1], 5*poisson_est[1] + 5*poisson_est[2], 5*poisson_est[1] + 5*poisson_est[2] + 5.19*poisson_est[3])))

#Data frame with time and cumulative hazard under poisson assumption for subjects with ESVEA
poisson_esvea1 <- as.data.frame(
  cbind(time = c(0,5,10,15.19), 
        cumhaz = c(0,5*poisson_est[4], 5*poisson_est[4] + 5*poisson_est[5], 5*poisson_est[4] + 5*poisson_est[5] + 5.19*poisson_est[6])))

#Plot of Nelson-Aalen together with Poisson cumulative hazard
fig211 + geom_line(data = poisson_esvea0, aes(x = time, y = cumhaz), linewidth = 1) + 
         geom_line(data = poisson_esvea1, aes(x = time, y = cumhaz), linewidth = 1, linetype = "dotted") 

Code show/hide
#  geom_line(data =poisson_esvea1, aes(x = x, y = y), color ="#00BFC4", linewidth = 0.8)
Code show/hide
* To estimate the cumulative hazard under the assumption that the hazard is constant within 5-year intervals we must first split 
  each record into subrecords with cut times at 5 and 10 years. We will create a new data set 'chs_pch213' where 'cens' indicates if the
  subject died during each 5-year interval, 'risktime' is the time at risk during each 5-year interval, logrisk is the logarithm of
  the risk time and 'interval' is a categorical variable with 6 levels corresponding to the different combinations of interval and 
  ESVEA status;
  
data chs_pch213; 
    set chs_data;
    cens=(timedeath<=5)*(death = 1);
    risktime=min(5,timedeath);
    logrisk = log(risktime);
    interval=1;
    if esvea = 1 then do; interval =4; end; output;
    if timedeath>5 then do;
    cens=(timedeath<=10)*(death = 1);
    risktime=min(5,timedeath-5);
    logrisk = log(risktime);
    interval=2;
    if esvea = 1 then do; interval = 5; end; output; end;
    if timedeath>10 then do;
    cens= (death = 1); 
    risktime=timedeath-10;
    logrisk = log(risktime);
    interval=3;
    if esvea = 1 then do; interval = 6; end; output; end;
run;


* We can then estimate the hazard within each of the intervals [0,5), [5,10), [10,15.19). 
  This can be done with the sql procedure;

title "2.1.3: Estimate of the hazards for subjects with or without ESVEA under the piece-wise constant hazards assumption";
proc sql;
    select interval,
    sum(cens) as sum_death, 
    sum(risktime) as sum_risktime,
    calculated sum_death / calculated sum_risktime as hazard
    from chs_pch213
    group by interval;
quit;

* Alternatively, all estimates can be obtained at once through a Poisson regression using the genmod procedure. Since 'interval' is 
  a categorical variable it must be included in the class statement. 'cens' is included in the model statement as the dependent
  variable whilst interval is included as the only explanatory variable. Furthermore the probability distribution is specified through 
  'dist = poi' and the logarithm of the offset is included as well as the argument 'noint' which is added to obtain a model without an 
  intercept. The estimates for the hazard for each of interval and status of ESVEA are then given using the estimate statement. These 
  estimates corresponds to the exponential of the estimates from the table of the maximum likelihood parameter estimation;

proc genmod data=chs_pch213;
    class interval;
    model cens=interval / dist=poi offset=logrisk noint;
    estimate '0-5 years, ESVEA = 0' interval 1 0 0 0 0 0;
    estimate '5-10 years, ESVEA = 0' interval 0 1 0 0 0 0;
    estimate '10+ years, ESVEA = 0' interval 0 0 1 0 0 0;
    estimate '0-5 years, ESVEA = 1' interval 0 0 0 1 0 0;
    estimate '5-10 years, ESVEA = 1' interval 0 0 0 0 1 0;
    estimate '10+ years, ESVEA = 1' interval 0 0 0 0 0 1;
run;

* To investigate if the hazards differ between the two groups we must fit a Poisson model where we do not stratify by ESVEA 
  and then compare the two Poisson models using a likelihood ratio test (LRT). Thus, we repeat the procedure and first create
  a new data set 'chs_pch_null213' where no distinction is made between subjects with or without ESVEA;

data chs_pch_null213;
  set chs_pch213;
  if interval in (4,5,6) then interval = interval - 3;
run;

* Then, the Poisson model is fitted;

title "2.1.3: Estimate of the hazard under the piece-wise constant hazards assumption with no distinction between subjects with or 
       without ESVEA";
proc genmod data=chs_pch_null213;
    class interval;
    model cens=interval/dist=poi offset=logrisk noint;
    estimate '0-5 years' interval 1 0 0;
    estimate '5-10 years' interval 0 1 0;
    estimate '10+ years' interval 0 0 1;
run;

* Lastly, the two models are compared with a likelihood ratio test. This is done by comparing the max log likelihoods for the two models 
  and then calculating the p-value using the Chi-square distribution with 3 degrees of freedom. The likelihood scores are found in the 'Criteria For
  Accessing Goodness Of Fit' table;
  
title "2.1.3: Likelihood ratio test comparing the two piece-wise constant hazards models";
data p;
    chi2=1444.6862-1428.4662;
    p=1-probchi(chi2,3);
proc print;
run;

* We get a Chi-square statistics of 16.22 on 3 degrees of freedom with a corresponding p-value of 0.001
  We conclude once more that the hazards seem different for the subjects with or without ESVEA also under this model.; 

Exercise 2.2

Consider the data from the Copenhagen Holter study.

The data should be loaded as chs_data:

Code show/hide
chs_data <- read.csv("data/cphholter.csv")

Note that the time variables, timeafib, timestroke and timedeath, are measured in days. We will first convert them to years for easier interpretations.

Code show/hide
chs_data$timeafib   <- chs_data$timeafib/365.25
chs_data$timestroke <- chs_data$timestroke/365.25
chs_data$timedeath  <- chs_data$timedeath/365.25

Load the relevant packages:

Code show/hide
library(tidyverse) #Data manipulations and plots
library(survival) #Core survival analysis routines
library(ggplot2)
theme_set(theme_bw())
Code show/hide
* We first load the data;

proc import out = chs_data
datafile = 'data/cphholter.csv'
dbms= csv replace;
getnames=yes;
run;

* We will convert the time variables ('timeafib', 'timestroke', and 'timedeath') from days to years;

data chs_data;
set chs_data;
timeafib = timeafib/365.25;
timestroke = timestroke/365.25;
timedeath = timedeath/365.25;
run;

1.

Make a version of the data set enabling an analysis of the composite endpoint of stroke or death without stroke (‘stroke-free survival’, i.e. define the relevant Time and Status variables), see Section 1.2.4.

To estimate the cumulative hazards for the composite end-point of stroke or death we must first create a suitable time variable, timestrokeordeath and status indicator, strokeordeath.

Code show/hide
chs_data$timestrokeordeath <- ifelse(chs_data$stroke == 1, chs_data$timestroke, chs_data$timedeath) 
chs_data$strokeordeath <- ifelse(chs_data$stroke == 1, 1, chs_data$death) 
Code show/hide
* To enable the analysis of the composite end-point stroke or death we will create a new time variable with the smallest value of
  'timestroke' and 'timedeath' called 'timestrokeordeath' and a censoring variable called 'strokeordeath' which is 1 if stroke and/
  or death occured and 0 otherwise for each subject.;

title "2.2.1";
data chs_data;
    set chs_data;
    timestrokeordeath = timedeath;
    if stroke = 1 then timestrokeordeath = timestroke;
    strokeordeath = death;
    if stroke = 1 then strokeordeath = 1;
run;

2.

Estimate non-parametrically the cumulative hazards of stroke-free survival for subjects with or without ESVEA.

We repeat the procedure from exercise 2.1.1 to obtain the Nelson-Aalen estimate of the cumulative hazards.

Code show/hide
# Estimating the cumulative hazards for subjects with or without ESVEA non-parametrically
naafit222 <- survfit(formula = Surv(timestrokeordeath, strokeordeath) ~ esvea, data = chs_data)

naadata222 <- data.frame(time = naafit222$time,
                        cumhaz = naafit222$cumhaz, 
                        esvea = c(rep(names(naafit222$strata)[1], naafit222$strata[1]),
                               rep(names(naafit222$strata)[2], naafit222$strata[2])))


(fig222 <- ggplot(data = naadata222) + geom_step(aes(x = time, y = cumhaz, linetype = esvea), linewidth = 1) + 
          scale_linetype_discrete("ESVEA", labels = c("0", "1")) + 
          xlab("Time since randomization (years)") + 
          ylab("Cumulative hazard"))

Code show/hide
* We then repeat the procedure from exercise 2.1.1 to obtain the Nelson-Aalen estimate of the cumulative hazards.;

title "2.2.2";
proc phreg data=chs_data; 
    model timestrokeordeath*strokeordeath(0)=;
    strata esvea;
    baseline out=hazdat cumhaz=naa;
run;

* The estimates are then plotted;

title "2.2.2: Nelson-Aalen estimate of the cumulative hazards for stroke-free survival for subjects with or without ESVEA ";
proc gplot data=hazdat;
    plot naa*timestrokeordeath=esvea/haxis=axis1 vaxis=axis2;
    axis1 order=0 to 15.19 by 1 minor=none label=('Years');
    axis2 order=0 to 1 by 0.1 minor=none label=(a=90 'Cumulative hazard');
    symbol1  v=none i=stepjl c=red;
    symbol2  v=none i=stepjl c=blue;
run;

3.

Make a non-parametric test for comparison of the two.

The hazards are compared using a logrank test

Code show/hide
# Logrank test
survdiff(Surv(timestrokeordeath, strokeordeath) ~ esvea, data = chs_data)
Call:
survdiff(formula = Surv(timestrokeordeath, strokeordeath) ~ esvea, 
    data = chs_data)

          N Observed Expected (O-E)^2/E (O-E)^2/V
esvea=0 579      230    253.4      2.17      18.6
esvea=1  99       57     33.6     16.37      18.6

 Chisq= 18.6  on 1 degrees of freedom, p= 2e-05 

We get a chi-squared statistic of 18.6 on 1 degree of freedom and a corresponding p-value of \(2 \cdot 10^{-5}\).

Code show/hide
* The hazards are compared with a log rank test which is implemented in the lifetest procedure;

title "2.2.3: Log rank test comparing the hazards for the subjects with or without ESVEA";
proc lifetest data=chs_data notable plots=none; 
    time timestrokeordeath*strokeordeath(0);
    strata esvea;
run; 

* With a Chi-square statistic of 18.6 and a corresponding p-value of <0.0001 we conclude that hazards are different
  for the groups with or without ESVEA under this model; 

4.

Make a similar analysis based on a model where the hazard is assumed constant within 5-year intervals.

We fit a Poisson model where the hazards are assumed constant within (approximate) 5-year intervals.

Code show/hide
#Splitting records in 5-year intervals
data_pch <- survSplit(Surv(timestrokeordeath, strokeordeath) ~ esvea, 
          data = chs_data, 
          cut = c(5,10), 
          episode = "interval")
data_pch$duration <- data_pch$timestrokeordeath - data_pch$tstart
data_pch$int_esvea <- data_pch$interval + (data_pch$esvea*3)

#Poisson regression
poisson224 <- glm(strokeordeath ~ factor(int_esvea) - 1 + offset(log(duration)), family = poisson(), data = data_pch)
poisson_est <- as.numeric(exp(coefficients(poisson224)))

#Table with estimate of the coefficients
cbind(c("0-5 years, ESVEA = 0","5-10 years, ESVEA = 0","10+ years, ESVEA = 0", "0-5 years, ESVEA = 1","5-10 years, ESVEA = 1","10+ years, ESVEA = 1"), poisson_est)
                             poisson_est         
[1,] "0-5 years, ESVEA = 0"  "0.0187322517434922"
[2,] "5-10 years, ESVEA = 0" "0.0355788210802352"
[3,] "10+ years, ESVEA = 0"  "0.0503486257754533"
[4,] "0-5 years, ESVEA = 1"  "0.0582424237782578"
[5,] "5-10 years, ESVEA = 1" "0.0631007474577195"
[6,] "10+ years, ESVEA = 1"  "0.0520302535222435"

The hazards are then compared using a likelihood ratio test

Code show/hide
#Null model
poisson_null224 <-  glm(strokeordeath ~ factor(interval) - 1 + offset(log(duration)), family = poisson(), data = data_pch)

#LRT
anova(poisson_null224, poisson224, test = "LRT")
Analysis of Deviance Table

Model 1: strokeordeath ~ factor(interval) - 1 + offset(log(duration))
Model 2: strokeordeath ~ factor(int_esvea) - 1 + offset(log(duration))
  Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
1      1769     1558.5                          
2      1766     1534.8  3   23.721 2.857e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We get a chi-squared statistic of 23.72 on 3 degrees of freedom and a corresponding p-value of \(2.86 \cdot 10^{-5}\).

The Poisson model is plotted together with the Nelson-Aalen estimates below

Code show/hide
#Data frame with time and cumulative hazard under poisson assumption for subjects without ESVEA
poisson_esvea0 <- as.data.frame(
  cbind(time = c(0,5,10,15.19), 
        cumhaz = c(0,5*poisson_est[1], 5*poisson_est[1] + 5*poisson_est[2], 5*poisson_est[1] + 5*poisson_est[2] + 5*poisson_est[3])))
#Data frame with time and cumulative hazard under poisson assumption for subjects with ESVEA
poisson_esvea1 <- as.data.frame(
  cbind(time = c(0,5,10,15.19), 
        cumhaz = c(0,5*poisson_est[4], 5*poisson_est[4] + 5*poisson_est[5], 5*poisson_est[4] + 5*poisson_est[5] + 5*poisson_est[6])))

#Plot of Nelson-Aalen together with Poisson cumulative hazard
fig222 + geom_line(data = poisson_esvea0, aes(x = time, y = cumhaz), linewidth = 1) + 
         geom_line(data = poisson_esvea1, aes(x = time, y = cumhaz), linewidth = 1, linetype = "dotted") 

Code show/hide
data chs_pch; 
    set chs_data;
    cens=(timestrokeordeath<=5)*(strokeordeath=1);
    risktime=min(5,timestrokeordeath);
    logrisk=log(risktime); interval=1;
    if esvea=1 then do; interval=4; end; output;
    if timestrokeordeath>5 then do;
    cens=(timestrokeordeath<=10)*(strokeordeath=1);
    risktime=min(5,timestrokeordeath-5);
    logrisk=log(risktime); 
    interval=2;
    if esvea=1 then do; 
    interval=5; end; output; end;
    if timestrokeordeath>10 then do;
    cens=(strokeordeath=1); 
    risktime=timestrokeordeath-10;
    logrisk=log(risktime); interval=3;
    if esvea=1 then do; 
    interval = 6; end; output; end;
run;

* Then, the Poisson model is fitted using the genmod procedure.;

proc genmod data=chs_pch;
    class interval;
    model cens=interval/dist=poi offset=logrisk noint;
    estimate '0-5 years, ESVEA = 0'  interval 1 0 0 0 0 0;
    estimate '5-10 years, ESVEA = 0' interval 0 1 0 0 0 0;
    estimate '10+ years, ESVEA = 0' interval 0 0 1 0 0 0;
    estimate '0-5 years, ESVEA = 1' interval 0 0 0 1 0 0;
    estimate '5-10 years, ESVEA = 1' interval 0 0 0 0 1 0;
    estimate '10+ years, ESVEA = 1' interval 0 0 0 0 0 1;
run;

* To investigate if the hazards are different for the two groups we will fit a model where we do not stratify by ESVEA. 
  Once again we will first make a new version of the data set.;
  
data chs_pch_null;
  set chs_pch;
  if interval in (4,5,6) then interval = interval - 3;
run;

* Then, the model which does not distinguish between subject with or without ESVEA is fitted using the genmod procedure;

proc genmod data=chs_pch_null;
    class interval;
    model cens=interval/dist=poi offset=logrisk noint;
    estimate '0-5 years' interval 1 0 0;
    estimate '5-10 years' interval 0 1 0;
    estimate '10+ years' interval 0 0 1;
run;

* Lastly, the two models are compared with a likelihood ratio test. The deviance scores are found in the table 'Criteria for 
  assesing goodness of fit'.;

title "2.2.4: Likelihood ratio test comparing the two piece-wise constant hazards models";
data p;
    chi2=1558.4777-1534.7568;
    p=1-probchi(chi2,3);
proc print;
run;

* We get a Chi-square statistic of 23.72 on 3 degrees of freedom with a very small p-value. Thus, we conclude oncemore that the hazards are not the same for the two groups;

Exercise 2.3

Consider the data from the Copenhagen Holter study and the composite end-point stroke-free survival.

The data should be loaded as chs_data:

Code show/hide
chs_data <- read.csv("data/cphholter.csv")

Note that the time variables, timeafib, timestroke and timedeath, are measured in days. We will first convert them to years for easier interpretations.

Code show/hide
chs_data$timeafib   <- chs_data$timeafib/365.25
chs_data$timestroke <- chs_data$timestroke/365.25
chs_data$timedeath  <- chs_data$timedeath/365.25

Load the relevant packages:

Code show/hide
library(tidyverse) #Data manipulations and plots
library(survival) #Core survival analysis routines
library(ggplot2)
theme_set(theme_bw())
Code show/hide
* We first load the data;

proc import out = chs_data
datafile = 'data/cphholter.csv'
dbms= csv replace;
getnames=yes;
run;

* We will convert the time variables ('timeafib', 'timestroke', and 'timedeath') from days to years;

data chs_data;
set chs_data;
timeafib = timeafib/365.25;
timestroke = timestroke/365.25;
timedeath = timedeath/365.25;
run;

1.

Fit a Cox model and estimate the hazard ratio between subjects with or without ESVEA.

The Cox proportional hazards model is implemented as coxph in the survival package. The formula argument consists of a Surv object to the left of ‘~’ and the covariates to the right. We include the argument ties = “breslow” to obtain the Breslow estimate of the cumulative hazard.

To estimate the cumulative hazards for the composite end-point of stroke or death we must first create a suitable time variable, timestrokeordeath and status indicator, strokeordeath.

Code show/hide
chs_data$timestrokeordeath <- ifelse(chs_data$stroke == 1, chs_data$timestroke, chs_data$timedeath) 
chs_data$strokeordeath <- ifelse(chs_data$stroke == 1, 1, chs_data$death) 

cox231 <- coxph(formula = Surv(timestrokeordeath, strokeordeath) ~ esvea, data = chs_data, ties = "breslow")
summary(cox231)
Call:
coxph(formula = Surv(timestrokeordeath, strokeordeath) ~ esvea, 
    data = chs_data, ties = "breslow")

  n= 678, number of events= 287 

        coef exp(coef) se(coef)     z Pr(>|z|)    
esvea 0.6285    1.8747   0.1482 4.241 2.23e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

      exp(coef) exp(-coef) lower .95 upper .95
esvea     1.875     0.5334     1.402     2.507

Concordance= 0.546  (se = 0.012 )
Likelihood ratio test= 15.79  on 1 df,   p=7e-05
Wald test            = 17.98  on 1 df,   p=2e-05
Score (logrank) test = 18.58  on 1 df,   p=2e-05

We get a hazard ratio of 1.875, i.e. the hazard is increased by a factor 1.875 for patients with ESVEA compared to those without. We get a Wald test of 4.241 with a corresponding p-value of \(2.22\cdot 10^{-5}\).

Code show/hide
* A Cox model can be fitted with the phreg procedure. The model statement requires the time varible and a censoring variable to
  be specified on the left hand side of '=' and the explanatory variables on the right hand side. The censoring variable have to be followed 
  by a parenthesis containing the censoring value(s);

title "2.3.1";
proc phreg data=chs_data;
    model timestrokeordeath*strokeordeath(0)=esvea;
run;

* We get an estimated coefficient for ESVEA of 0.62849 which corresponds to a hazard ratio of exp(0.62849) = 1.874778. Thus, having 
  ESVEA almost doubles the rate of experiencing the composite end-point.

* We can conclude that the hazards for the subjects with or without ESVEA are different under the proportional hazards
  assumption as well, since we get a Chi-square statistic of 17.99 on 1 degree of freedom with a correpsonding p-value <0.0001.

2.

Fit a Poisson model where the hazard is assumed constant within 5-year intervals and estimate the hazard ratio between subjects with or without ESVEA.

The piece-wise constant model is fitted using glm as described in exercise 2.1.3.

Code show/hide
# Poisson regression
poisson232 <- glm(strokeordeath ~ factor(interval) - 1 + esvea + offset(log(duration)), family = poisson(), data = data_pch)
summary(poisson232)

Call:
glm(formula = strokeordeath ~ factor(interval) - 1 + esvea + 
    offset(log(duration)), family = poisson(), data = data_pch)

Coefficients:
                  Estimate Std. Error z value Pr(>|z|)    
factor(interval)1  -3.8338     0.1183 -32.419  < 2e-16 ***
factor(interval)2  -3.3452     0.1014 -32.998  < 2e-16 ***
factor(interval)3  -3.0708     0.1020 -30.117  < 2e-16 ***
esvea               0.6209     0.1482   4.191 2.78e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 15090  on 1772  degrees of freedom
Residual deviance:  1543  on 1768  degrees of freedom
AIC: 2125

Number of Fisher Scoring iterations: 6

We get a coefficient of 0.6209 for the ESVEA variable. This corresponds to a hazard ratio of \(\exp(0.6209) = 1.8606\). The Wald test is 4.188 with a corresponding p-value of \(2.82 \cdot 10^{-5}\).

Code show/hide
title "2.3.2";
proc genmod data=chs_pch_null;
    class interval;
    model cens=interval esvea/dist=poi offset=logrisk;
run;

* We get a hazard ratio of exp(0.6209) = 1.860602 for ESVEA under the assumption of piece-wise constant hazards model. This 
  difference in hazards between subjects with or without ESVEA is significant, since we get a Chi-square statistic of 17.56 on 1 
  degree of freedom with a corresponding p-value <0.0001.;

3.

Compare the results from the two models.

The hazard ratio between subjects with or without ESVEA found using the Cox model is 1.87 while it was 1.86 assuming the piece-wise constant hazards model. Thus, the estimates are nearly identical both predicting an increase of the rate for subjects with ESVEA of a factor 1.86-1.87.

Exercise 2.4

Consider the data from the Copenhagen Holter study and the composite end-point stroke-free survival.

The data should be loaded as chs_data:

Code show/hide
chs_data <- read.csv("data/cphholter.csv")

Note that the time variables, timeafib, timestroke and timedeath, are measured in days. We will first convert them to years for easier interpretations.

Code show/hide
chs_data$timeafib   <- chs_data$timeafib/365.25
chs_data$timestroke <- chs_data$timestroke/365.25
chs_data$timedeath  <- chs_data$timedeath/365.25

Load the relevant packages:

Code show/hide
library(tidyverse) #Data manipulations and plots
library(survival) #Core survival analysis routines
library(ggplot2)
theme_set(theme_bw())
Code show/hide
* We first load the data;

proc import out = chs_data
datafile = 'data/cphholter.csv'
dbms= csv replace;
getnames=yes;
run;

* We will convert the time variables ('timeafib', 'timestroke', and 'timedeath') from days to years;

data chs_data;
set chs_data;
timeafib = timeafib/365.25;
timestroke = timestroke/365.25;
timedeath = timedeath/365.25;
run;

1.

Fit a Cox model and estimate the hazard ratio between subjects with or without ESVEA, now also adjusting for sex, age, and systolic blood pressure (sysBP).

We fit the Cox model as described in exercise 2.3.1. This time we also include sex, age and sbp as covariates.

Code show/hide
chs_data$timestrokeordeath <- ifelse(chs_data$stroke == 1, chs_data$timestroke, chs_data$timedeath) 
chs_data$strokeordeath <- ifelse(chs_data$stroke == 1, 1, chs_data$death) 

# Cox model 
cox241 <- coxph(formula = Surv(timestrokeordeath, strokeordeath) ~ esvea + sex + age + sbp , ties = "breslow", data = chs_data)
summary(cox241)
Call:
coxph(formula = Surv(timestrokeordeath, strokeordeath) ~ esvea + 
    sex + age + sbp, data = chs_data, ties = "breslow")

  n= 675, number of events= 285 
   (3 observations deleted due to missingness)

          coef exp(coef) se(coef)     z Pr(>|z|)    
esvea 0.318284  1.374767 0.152587 2.086   0.0370 *  
sex   0.577585  1.781731 0.126946 4.550 5.37e-06 ***
age   0.076658  1.079673 0.009362 8.189 2.64e-16 ***
sbp   0.005152  1.005165 0.002438 2.113   0.0346 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

      exp(coef) exp(-coef) lower .95 upper .95
esvea     1.375     0.7274     1.019     1.854
sex       1.782     0.5613     1.389     2.285
age       1.080     0.9262     1.060     1.100
sbp       1.005     0.9949     1.000     1.010

Concordance= 0.672  (se = 0.016 )
Likelihood ratio test= 99.45  on 4 df,   p=<2e-16
Wald test            = 104.1  on 4 df,   p=<2e-16
Score (logrank) test = 110  on 4 df,   p=<2e-16

We get a hazard ratio of 1.375 for ESVEA. The Wald test is 2.086 with a corresponding p-value of 0.0370.

Code show/hide
* We add sex, age, and systolic blood pressure as explanatory variables to our Cox model from before;

title "2.4.1";
proc phreg data=chs_data;
    model timestrokeordeath*strokeordeath(0)=esvea sex age sbp;
run;

* We now get a hazard ratio of exp(0.31830) = 1.374789 for ESVEA. The effect of ESVEA is still significant with Chi-square statistic
  of 4.3514 on 1 degree of freedom and a corresponding p-value of 0.0370;

2.

Fit a Poisson model where the hazard is assumed constant within 5-year intervals and estimate the hazard ratio between subjects with or without ESVEA, now also adjusting for sex, age, and sysBP.

To obtain estimates under the piece-wise constant hazard assumption we add sex, age and sbp on the right hand side of ‘~’ both in the survSplit and the glm functions.

Code show/hide
#Splitting data in 5-year intervals
data_pch <- survSplit(Surv(timestrokeordeath, strokeordeath) ~ esvea + age + sex + sbp,  
          data = chs_data, 
          cut = c(5,10), 
          episode = "interval")
data_pch$duration <- data_pch$timestrokeordeath - data_pch$tstart

#Poisson regression
poisson242 <- glm(strokeordeath ~ factor(interval) - 1 + esvea + sex + age + sbp + offset(log(duration)), family = poisson(), data = data_pch)
summary(poisson242)

Call:
glm(formula = strokeordeath ~ factor(interval) - 1 + esvea + 
    sex + age + sbp + offset(log(duration)), family = poisson(), 
    data = data_pch)

Coefficients:
                   Estimate Std. Error z value Pr(>|z|)    
factor(interval)1 -9.984329   0.701540 -14.232  < 2e-16 ***
factor(interval)2 -9.403023   0.691466 -13.599  < 2e-16 ***
factor(interval)3 -9.077211   0.685584 -13.240  < 2e-16 ***
esvea              0.311068   0.152551   2.039   0.0414 *  
sex                0.578858   0.126880   4.562 5.06e-06 ***
age                0.076159   0.009360   8.137 4.06e-16 ***
sbp                0.005061   0.002438   2.076   0.0379 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 15060.9  on 1767  degrees of freedom
Residual deviance:  1449.7  on 1760  degrees of freedom
  (5 observations deleted due to missingness)
AIC: 2033.7

Number of Fisher Scoring iterations: 6

We get a coefficient of 0.3111 for ESVEA and a hazard ratio of \(\exp(0.3111) = 1.3649\). The Wald test is 2.039 and the corresponding p-value is 0.0414.

Code show/hide
* To obtain an estimate of the hazard ratio for ESVEA under the piece-wise constant hazards model where we also adjust for sex, age,
  and systolic blood pressure we must include these as explanatory variables in the model statement.;

title "2.4.2";
proc genmod data=chs_pch_null;
    class interval (ref = '1');
    model cens=interval esvea sex age sbp/dist=poi offset=logrisk;
run;

* We find a hazard ratio of ESVEA of exp(0.3111) = 1.364926. The hazards seems to be different between the subjects with or without ESVEA
  since we get a Chi-square statistic of 4.16 on 1 degree of freedom with a corresponding p-value of 0.0414;

3.

Compare the results from the two models.

Again, the hazard ratios estimated using either the Cox model or the Poisson model are comparable with a factor 1.36-1.37 increase of the hazard for subjects with ESVEA. We notice that this is smaller than the hazard ratio estimated without adjusting for sex, age, and systolic blood pressure.

Exercise 2.5

1.

Check the Cox model from the previous exercise by examining proportional hazards between subjects with or without ESVEA and between men and women.

To check the assumption of proportional hazards for subjects with or without ESVEA under the Cox model we fit a stratified Cox model. This can be done by adding strata(esvea) instead of esvea as a covariate in the coxph function. The baseline hazard can then be extracted using the function basehaz from the survival package. The argument centered = FALSE is added. Otherwise, the predicted cumulative hazard at the mean values of the covariates is returned.

The hazards are plotted against each other. If the proportional hazards assumption holds then the curve should be close to a straight line through (0,0) with a slope equal to the estimated hazard ratio of ESVEA.

Code show/hide
#Cox model with separate hazards for subjects with or without ESVEA
cox251_esvea <- coxph(Surv(timestrokeordeath, strokeordeath) ~ strata(esvea) + sex + age + sbp, ties = "breslow", data =chs_data)
summary(cox251_esvea)
Call:
coxph(formula = Surv(timestrokeordeath, strokeordeath) ~ strata(esvea) + 
    sex + age + sbp, data = chs_data, ties = "breslow")

  n= 675, number of events= 285 
   (3 observations deleted due to missingness)

        coef exp(coef) se(coef)     z Pr(>|z|)    
sex 0.572119  1.772018 0.127078 4.502 6.73e-06 ***
age 0.075990  1.078952 0.009378 8.103 5.35e-16 ***
sbp 0.005321  1.005335 0.002441 2.180   0.0292 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

    exp(coef) exp(-coef) lower .95 upper .95
sex     1.772     0.5643     1.381     2.273
age     1.079     0.9268     1.059     1.099
sbp     1.005     0.9947     1.001     1.010

Concordance= 0.662  (se = 0.018 )
Likelihood ratio test= 83.24  on 3 df,   p=<2e-16
Wald test            = 86.63  on 3 df,   p=<2e-16
Score (logrank) test = 90.38  on 3 df,   p=<2e-16
Code show/hide
#Cumulative hazard for subjects without ESVEA
esvea0_haz <- rbind(c(0,0),subset(basehaz(cox251_esvea, centered = FALSE), strata == "esvea=0")[,1:2])
colnames(esvea0_haz) <- c("haz0", "time")

#Cumulative hazard for subjects with ESVEA
esvea1_haz <- rbind(c(0,0),subset(basehaz(cox251_esvea, centered = FALSE), strata == "esvea=1")[,1:2])
colnames(esvea1_haz) <- c("haz1", "time")

#Data frame with time column and both hazards
ph_data <- merge(esvea0_haz, esvea1_haz, all = TRUE)
ph_data <- ph_data %>% fill(haz0,haz1)

#Plotting the hazards against each other
ggplot(data = ph_data) + geom_step(aes(haz0, haz1)) + geom_abline(slope = 1.374825) + xlab("Cumulative hazard for subjects with ESVEA = 0") + ylab("Cumulative hazard for subjects with ESVEA = 1")

The cumulative hazard deviates a bit from the straight line. This suggests that the assumption of proportional hazards for subjects with or without ESVEA may not be reasonable.

We repeat the procedure but this time examining the proportional hazards assumption of sex.

Code show/hide
#Cox model with separate hazards for male and female subjects
cox251_sex <- coxph(Surv(timestrokeordeath, strokeordeath) ~ esvea + strata(sex) + age + sbp, ties = "breslow", data =chs_data)
summary(cox251_sex)
Call:
coxph(formula = Surv(timestrokeordeath, strokeordeath) ~ esvea + 
    strata(sex) + age + sbp, data = chs_data, ties = "breslow")

  n= 675, number of events= 285 
   (3 observations deleted due to missingness)

          coef exp(coef) se(coef)     z Pr(>|z|)    
esvea 0.322685  1.380830 0.152751 2.112   0.0346 *  
age   0.076270  1.079253 0.009363 8.146 3.77e-16 ***
sbp   0.005173  1.005187 0.002436 2.124   0.0337 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

      exp(coef) exp(-coef) lower .95 upper .95
esvea     1.381     0.7242     1.024     1.863
age       1.079     0.9266     1.060     1.099
sbp       1.005     0.9948     1.000     1.010

Concordance= 0.669  (se = 0.016 )
Likelihood ratio test= 94.47  on 3 df,   p=<2e-16
Wald test            = 95.6  on 3 df,   p=<2e-16
Score (logrank) test = 102.1  on 3 df,   p=<2e-16
Code show/hide
#Cumulative hazard for females
sex0_haz <- rbind(c(0,0),subset(basehaz(cox251_sex, centered = FALSE), strata == "sex=0")[,1:2])
colnames(sex0_haz) <- c("haz0", "time")

#Cumulative hazard for males
sex1_haz <- rbind(c(0,0),subset(basehaz(cox251_sex, centered = FALSE), strata == "sex=1")[,1:2])
colnames(sex1_haz) <- c("haz1", "time")

#Data frame with time column and both hazards
ph_data <- merge(sex0_haz, sex1_haz, all = TRUE)
ph_data <- ph_data %>% fill(haz0,haz1)

#Plotting the hazards against each other
ggplot(data = ph_data) + geom_step(aes(haz0, haz1)) + geom_abline(slope = 1.781584) + xlab("Cumulative hazard for subjects with sex = 0") + ylab("Cumulative hazard for subjects with sex = 1")

The curve coincides nicely with the straight line. Thus, the proportional hazard assumption seems reasonable for sex.

Code show/hide
* To check the assumption of proportional hazards for subjects with or without ESVEA under the Cox model we will fit seperate baseline
  hazards. This can be done by including a strata statement where ESVEA is specified. Furthermore, we specify the values of the 
  other covariates (sex, age, and systolic blood pressure) in the data frame 'covstr'. The hazard is then plotted against each 
  other. If the proportional hazards assumption holds the curve should be close to a straight line through (0,0) with a slope equal
  to the estimated hazard ratio of ESVEA.;

data covstr;
    sex=0; age=0; sbp=0;
run;

* The Cox model is fitted and a data set with the baseline hazards is saved as 'breslowstr';

title "2.5.1";
proc phreg data=chs_data;
    model timestrokeordeath*strokeordeath(0)=sex age sbp;
    strata esvea;
    baseline out=breslowstr cumhaz=breslow covariates=covstr;
run;

* Now the baseline hazards for esvea = 0,1 are extracted and merged into one data set called 'breslow01';

data breslow0;
    set breslowstr; 
    if esvea=0; 
    a00=breslow; 
run;

data breslow1; 
    set breslowstr; 
    if esvea=1;     
    a01=breslow; 
run;

data breslow01; 
    merge breslow0 breslow1; 
    by timestrokeordeath; 
run;

* Then, the empty cells of the cumulative hazards are replaced with the previous values;

data breslow01; 
set breslow01;
    by timestrokeordeath;
    retain last1 last2;
    if a00=. then cumhaz0=last1; if a00 ne . then cumhaz0=a00; 
    if a01=. then cumhaz1=last2; if a01 ne . then cumhaz1=a01;
    output;
    last1=cumhaz0; last2=cumhaz1; 
run;

* The straight line with a slope which equals the predicted hazard ratio for esvea from the model in 2.4.1, is added to the data set;

data breslow01; 
    set breslow01;
    line=1.3649*cumhaz0;
run;

* Finally, the cumulative baseline hazards for esvea = 0,1 are plotted against each other together with the straight line;

title "2.5.1: Checking the proportionality assumption of the hazards for ESVEA";
proc gplot data=breslow01;
    plot cumhaz1*cumhaz0 line*cumhaz0/haxis=axis1 vaxis=axis2 overlay;
    axis1 order=0 to 0.0013 by 0.0001 minor=none label=('Cumulative baseline hazard: ESVEA = 0');
    axis2 order=0 to 0.0016 by 0.0001 minor=none label=(a=90 'Cumulative baseline hazard: ESVEA = 1');
    symbol1  v=none i=stepjl c=red;
    symbol2  v=none i=rl c=blue;
run;

* The cumulative hazard deviates a bit from the straight line. This suggests that the assumption of proportional hazards for
  subjects with or without ESVEA is questionable;

* To examine the assumption of proportional hazards for sex we will repeat the procedure. Thus, we will first specify the 
  other covariates (ESVEA, age, and systolic blood pressure) and the fit a Cox model including the strata statement.;

data covstr;
    esvea=0; age=0; sbp=0;
run;

title "2.5.1";
proc phreg data=chs_data;
    model timestrokeordeath*strokeordeath(0)=esvea age sbp;
    strata sex;
    baseline out=breslowstr cumhaz=breslow covariates=covstr;
run;

* Now the baseline hazards for sex = 0,1 are extracted and merged into one data set called 'breslow01';

data breslow0;
    set breslowstr; 
    if sex=0; 
    a00=breslow; 
run;

data breslow1; 
    set breslowstr; 
    if sex=1;   
    a01=breslow; 
run;

data breslow01; 
    merge breslow0 breslow1; 
    by timestrokeordeath; 
run;

* Then, the empty cells of the cumulative hazards are replaced with the previous values;

data breslow01; 
set breslow01;
    by timestrokeordeath;
    retain last1 last2;
    if a00=. then cumhaz0=last1; if a00 ne . then cumhaz0=a00; 
    if a01=. then cumhaz1=last2; if a01 ne . then cumhaz1=a01;
    output;
    last1=cumhaz0; last2=cumhaz1; 
run;

* The straight line is then added to the data set;

data breslow01; 
    set breslow01;
    line=1.7840*cumhaz0;
run;

* Finally, the cumulative baseline hazards for sex = 0,1 are plotted against each other together with the line;

title "2.5.1: Examining the assumption of proportional hazards for sex";
proc gplot data=breslow01;
    plot cumhaz1*cumhaz0 line*cumhaz0/haxis=axis1 vaxis=axis2 overlay;
    axis1 order=0 to 0.0014 by 0.0001 minor=none label=('Cumulative baseline hazard: Sex = 0');
    axis2 order=0 to 0.0022 by 0.0001 minor=none label=(a=90 'Cumulative baseline hazard: Sex = 1');
    symbol1  v=none i=stepjl c=red;
    symbol2  v=none i=rl c=blue;
run;

* The curve coincides nicely with the straight line. Thus, the proportional hazard assumption seems reasonable for sex.;

2.

Check for linearity on the log(hazard)-scale for age and sysBP.

We can check the linearity assumptions on the log(hazards)-scale for age and systolic blood pressure by including non-linear effects.

The linearity assumption of systolic blood pressure can be investigated by including a linear spline. We select 135 as a knot since this value is typically the cut-point for medication for hypertension.

Code show/hide
#Adding covariate (systolisk - 135)*I(systolisk > 135)
chs_data$hypertension <- (chs_data$sbp - 135)*(chs_data$sbp > 135)

#Cox model with non-linear effect of systolic blood pleasure (linear spline)
cox252_sbp <- coxph(Surv(timestrokeordeath, strokeordeath) ~ esvea + sex + age + sbp + hypertension, ties = "breslow", data = chs_data)
summary(cox252_sbp)
Call:
coxph(formula = Surv(timestrokeordeath, strokeordeath) ~ esvea + 
    sex + age + sbp + hypertension, data = chs_data, ties = "breslow")

  n= 675, number of events= 285 
   (3 observations deleted due to missingness)

                  coef exp(coef)  se(coef)      z Pr(>|z|)    
esvea         0.322940  1.381183  0.152659  2.115   0.0344 *  
sex           0.577846  1.782195  0.126886  4.554 5.26e-06 ***
age           0.076676  1.079692  0.009369  8.184 2.75e-16 ***
sbp           0.016525  1.016662  0.015020  1.100   0.2713    
hypertension -0.012579  0.987500  0.016299 -0.772   0.4403    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

             exp(coef) exp(-coef) lower .95 upper .95
esvea           1.3812     0.7240    1.0240     1.863
sex             1.7822     0.5611    1.3898     2.285
age             1.0797     0.9262    1.0600     1.100
sbp             1.0167     0.9836    0.9872     1.047
hypertension    0.9875     1.0127    0.9565     1.020

Concordance= 0.671  (se = 0.016 )
Likelihood ratio test= 100.1  on 5 df,   p=<2e-16
Wald test            = 103.9  on 5 df,   p=<2e-16
Score (logrank) test = 110.1  on 5 df,   p=<2e-16

We compare this non-linear model with the linear model from exercise 2.4.1 using a likelihood ratio test.

Code show/hide
#LRT
anova(cox241,cox252_sbp, test = "LRT")
Analysis of Deviance Table
 Cox model: response is  Surv(timestrokeordeath, strokeordeath)
 Model 1: ~ esvea + sex + age + sbp
 Model 2: ~ esvea + sex + age + sbp + hypertension
   loglik  Chisq Df Pr(>|Chi|)
1 -1729.0                     
2 -1728.7 0.6254  1      0.429

We get a chi-squared statistic of 0.6254 on 1 degree of freedom with a corresponding p-value of 0.429 when comparing the two Cox models. Including a linear spline to our model does not seem to describe data better than the model from exercise 2.4.1.

We will test the linearity assumption of age by adding a quadratic term to the Cox model.

Code show/hide
#Cox model with non-linear assumption of age (quadratic term)
cox252_age <- coxph(Surv(timestrokeordeath, strokeordeath) ~ esvea + sex + age + I(age^2) + sbp, ties = "breslow", data =chs_data)
summary(cox252_age)
Call:
coxph(formula = Surv(timestrokeordeath, strokeordeath) ~ esvea + 
    sex + age + I(age^2) + sbp, data = chs_data, ties = "breslow")

  n= 675, number of events= 285 
   (3 observations deleted due to missingness)

               coef  exp(coef)   se(coef)      z Pr(>|z|)    
esvea     0.3172738  1.3733785  0.1526657  2.078   0.0377 *  
sex       0.5820931  1.7897808  0.1293139  4.501 6.75e-06 ***
age       0.1117230  1.1182031  0.1955347  0.571   0.5677    
I(age^2) -0.0002659  0.9997341  0.0014809 -0.180   0.8575    
sbp       0.0051368  1.0051500  0.0024407  2.105   0.0353 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

         exp(coef) exp(-coef) lower .95 upper .95
esvea       1.3734     0.7281    1.0182     1.852
sex         1.7898     0.5587    1.3891     2.306
age         1.1182     0.8943    0.7622     1.640
I(age^2)    0.9997     1.0003    0.9968     1.003
sbp         1.0052     0.9949    1.0004     1.010

Concordance= 0.672  (se = 0.016 )
Likelihood ratio test= 99.48  on 5 df,   p=<2e-16
Wald test            = 103.6  on 5 df,   p=<2e-16
Score (logrank) test = 111.5  on 5 df,   p=<2e-16

Then we compare this model to the Cox model from 2.4.1 using a likelihood ratio test.

Code show/hide
#LRT
anova(cox241, cox252_age, test ="LRT")
Analysis of Deviance Table
 Cox model: response is  Surv(timestrokeordeath, strokeordeath)
 Model 1: ~ esvea + sex + age + sbp
 Model 2: ~ esvea + sex + age + I(age^2) + sbp
  loglik  Chisq Df Pr(>|Chi|)
1  -1729                     
2  -1729 0.0323  1     0.8574

We get a chi-squared statistic of 0.323 on 1 degrees of freedom and a corresponding p-value of 0.8574 when comparing the Cox models. Thus, we do not find evidence against the linearity assumption of age.

Code show/hide
* We can check the linearity assumptions on the log(hazards)-scale for age and systolic blood pressure by including non-linear effects.;

* The linearity assumption of systolic blood pressure can be investigated by including a linear spline. We select 135 as a knot 
  since this value is typically the cut-point for medication for hypertension.;
  
title "2.5.2";
data chs_data;
   set chs_data;
   if sbp > 135 then
   hypertension = sbp - 135;  
   else hypertension = 0;
run;

* Then we include this new explanatory variable in our Cox model;

proc phreg data = chs_data;
    model timestrokeordeath*strokeordeath(0)=esvea sex age sbp hypertension;
run;

* We compare the effect of adding a linear spline with the Cox model from 2.4.1 using a likelihood ratio test;

title "2.5.2: Checking the linearity on the log(hazard)-scale for systolic blood pressure";
data p;
    chi2=3457.955-3457.330;
    p=1-probchi(chi2,1);
proc print;
run;

* We get a Chi-square statistic of 0.625 on 1 degree of freedom with a correpsonding p-value of 0.4292. Thi is close
  to the Wald test. Thus, we conclude that
  including a linear spline in our model does not seem to describe data better than our model from exercise 2.4.1.;

* We will test the linearity assumption of age by including a quadratic term to the Cox model.;

title "2.5.2";
data chs_data;
   set chs_data;
   age2 = age*age;
run;

proc phreg data = chs_data;
    model timestrokeordeath*strokeordeath(0)=esvea sex age age2 sbp;
run;

*Then, we compare this model to the Cox model forom exercise 2.4.1. using a likelihood ratio test.;

title "2.5.2: Checking the linearity on the log(hazard)-scale for age";
data p;
    chi2=3457.955-3457.923;
    p=1-probchi(chi2,1);
proc print;
run;

* We get a Chi-square statistic of 0.032 on 1 degree of freedom and a corresponding p-value of 0.85803. Thus, we do not have evidence against the linearity assumption of age. Note, once more, the simlarity with the Wald test.;

3.

Do the same for the Poisson model.

To investigate the assumption of proportional hazards for subjects with or without ESVEA for the Poisson model we will include an interaction term between time and ESVEA. This model can then be compared with the model from exercise 2.4.1 using a likelihood ratio test.

Code show/hide
#Poisson model with interaction term of time and esvea
poisson253_esvea <- glm(strokeordeath ~ factor(interval) - 1 + esvea + sex + esvea:factor(interval) + age + sbp + offset(log(duration)), family = poisson(), data = data_pch)
summary(poisson253_esvea)

Call:
glm(formula = strokeordeath ~ factor(interval) - 1 + esvea + 
    sex + esvea:factor(interval) + age + sbp + offset(log(duration)), 
    family = poisson(), data = data_pch)

Coefficients:
                          Estimate Std. Error z value Pr(>|z|)    
factor(interval)1       -10.080740   0.704932 -14.300  < 2e-16 ***
factor(interval)2        -9.369197   0.693746 -13.505  < 2e-16 ***
factor(interval)3        -8.973328   0.687025 -13.061  < 2e-16 ***
esvea                    -0.230225   0.320302  -0.719   0.4723    
sex                       0.571910   0.127003   4.503  6.7e-06 ***
age                       0.075444   0.009374   8.048  8.4e-16 ***
sbp                       0.005196   0.002438   2.131   0.0331 *  
factor(interval)1:esvea   0.992907   0.401923   2.470   0.0135 *  
factor(interval)2:esvea   0.523021   0.404268   1.294   0.1958    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 15060.9  on 1767  degrees of freedom
Residual deviance:  1443.1  on 1758  degrees of freedom
  (5 observations deleted due to missingness)
AIC: 2031.1

Number of Fisher Scoring iterations: 6
Code show/hide
#LRT
anova(poisson242, poisson253_esvea, test = "LRT")
Analysis of Deviance Table

Model 1: strokeordeath ~ factor(interval) - 1 + esvea + sex + age + sbp + 
    offset(log(duration))
Model 2: strokeordeath ~ factor(interval) - 1 + esvea + sex + esvea:factor(interval) + 
    age + sbp + offset(log(duration))
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
1      1760     1449.7                       
2      1758     1443.1  2   6.5411  0.03798 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We get a chi-squared statistic of 6.5411 on 2 degrees of freedom and a corresponding p-value of 0.03798. This suggests that the proportional hazard assumption is questionable for ESVEA.

We will now repeat the procedure to investigate the proportional hazards assumption for sex for the Poisson model.

Code show/hide
#Poisson model with interaction term of time and sex
poisson253_sex <- glm(strokeordeath ~ factor(interval) -1 + esvea + sex + sex:factor(interval) + age + sbp + offset(log(duration)), family = poisson(), data = data_pch)
summary(poisson253_sex)

Call:
glm(formula = strokeordeath ~ factor(interval) - 1 + esvea + 
    sex + sex:factor(interval) + age + sbp + offset(log(duration)), 
    family = poisson(), data = data_pch)

Coefficients:
                       Estimate Std. Error z value Pr(>|z|)    
factor(interval)1     -9.991770   0.713147 -14.011  < 2e-16 ***
factor(interval)2     -9.480863   0.700359 -13.537  < 2e-16 ***
factor(interval)3     -8.986670   0.692599 -12.975  < 2e-16 ***
esvea                  0.309414   0.152608   2.028   0.0426 *  
sex                    0.437336   0.202502   2.160   0.0308 *  
age                    0.076069   0.009362   8.125 4.47e-16 ***
sbp                    0.005064   0.002436   2.079   0.0377 *  
factor(interval)1:sex  0.161900   0.313850   0.516   0.6060    
factor(interval)2:sex  0.271022   0.286341   0.947   0.3439    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 15060.9  on 1767  degrees of freedom
Residual deviance:  1448.8  on 1758  degrees of freedom
  (5 observations deleted due to missingness)
AIC: 2036.8

Number of Fisher Scoring iterations: 6
Code show/hide
#LRT
anova(poisson242, poisson253_sex, test = "LRT")
Analysis of Deviance Table

Model 1: strokeordeath ~ factor(interval) - 1 + esvea + sex + age + sbp + 
    offset(log(duration))
Model 2: strokeordeath ~ factor(interval) - 1 + esvea + sex + sex:factor(interval) + 
    age + sbp + offset(log(duration))
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1      1760     1449.7                     
2      1758     1448.8  2  0.90827    0.635

We get a chi-squared statistic of 0.9083 with a corresponding p-value of 0.635. We conclude that we do not have evidence against the proportional hazards assumption for sex.

We will also check the linearity assumption of sbp by inclusion the indicator of hypertension in the Poisson model

Code show/hide
#Splitting data in 5-year intervals
data_pch <- survSplit(Surv(timestrokeordeath, strokeordeath) ~ esvea + age + sex + sbp + hypertension,  
          data = chs_data, 
          cut = c(5,10), 
          episode = "interval")
data_pch$duration <- data_pch$timestrokeordeath - data_pch$tstart

#Poisson model with non-linear effect of systolic blood pleasure (linear spline)
poisson253_sbp <- glm(strokeordeath ~ factor(interval) - 1 + esvea + sex + age + sbp + hypertension + offset(log(duration)), family = poisson(), data = data_pch)
summary(poisson253_sbp)

Call:
glm(formula = strokeordeath ~ factor(interval) - 1 + esvea + 
    sex + age + sbp + hypertension + offset(log(duration)), family = poisson(), 
    data = data_pch)

Coefficients:
                    Estimate Std. Error z value Pr(>|z|)    
factor(interval)1 -11.457462   2.063531  -5.552 2.82e-08 ***
factor(interval)2 -10.875624   2.059459  -5.281 1.29e-07 ***
factor(interval)3 -10.548863   2.056350  -5.130 2.90e-07 ***
esvea               0.315588   0.152617   2.068   0.0387 *  
sex                 0.579222   0.126819   4.567 4.94e-06 ***
age                 0.076174   0.009367   8.132 4.22e-16 ***
sbp                 0.016310   0.015004   1.087   0.2770    
hypertension       -0.012441   0.016280  -0.764   0.4448    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 15060.9  on 1767  degrees of freedom
Residual deviance:  1449.1  on 1759  degrees of freedom
  (5 observations deleted due to missingness)
AIC: 2035.1

Number of Fisher Scoring iterations: 6
Code show/hide
#LRT
anova(poisson242, poisson253_sbp ,test = "LRT")
Analysis of Deviance Table

Model 1: strokeordeath ~ factor(interval) - 1 + esvea + sex + age + sbp + 
    offset(log(duration))
Model 2: strokeordeath ~ factor(interval) - 1 + esvea + sex + age + sbp + 
    hypertension + offset(log(duration))
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1      1760     1449.7                     
2      1759     1449.1  1  0.61294   0.4337

We get a chi-squared statistic of 0.613 on 1 degree of freedom with a corresponding p-value of 0.4337 when comparing the Poisson models.

We will test the linearity assumption of age by adding a quadratic term to the Poisson model.

Code show/hide
#Poisson model with non-linear assumption of age (quadratic term)
poisson253_age <- glm(strokeordeath ~ factor(interval) - 1 + esvea + sex + age + I(age^2) + sbp + offset(log(duration)), family = poisson(), data = data_pch)
summary(poisson253_age)

Call:
glm(formula = strokeordeath ~ factor(interval) - 1 + esvea + 
    sex + age + I(age^2) + sbp + offset(log(duration)), family = poisson(), 
    data = data_pch)

Coefficients:
                    Estimate Std. Error z value Pr(>|z|)    
factor(interval)1 -1.123e+01  6.412e+00  -1.751   0.0799 .  
factor(interval)2 -1.065e+01  6.411e+00  -1.661   0.0968 .  
factor(interval)3 -1.032e+01  6.411e+00  -1.610   0.1074    
esvea              3.100e-01  1.526e-01   2.031   0.0423 *  
sex                5.837e-01  1.292e-01   4.518 6.26e-06 ***
age                1.143e-01  1.956e-01   0.584   0.5589    
I(age^2)          -2.891e-04  1.481e-03  -0.195   0.8452    
sbp                5.045e-03  2.440e-03   2.067   0.0387 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 15060.9  on 1767  degrees of freedom
Residual deviance:  1449.6  on 1759  degrees of freedom
  (5 observations deleted due to missingness)
AIC: 2035.6

Number of Fisher Scoring iterations: 6
Code show/hide
#LRT
anova(poisson242, poisson253_age,test = "LRT")
Analysis of Deviance Table

Model 1: strokeordeath ~ factor(interval) - 1 + esvea + sex + age + sbp + 
    offset(log(duration))
Model 2: strokeordeath ~ factor(interval) - 1 + esvea + sex + age + I(age^2) + 
    sbp + offset(log(duration))
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1      1760     1449.7                     
2      1759     1449.6  1 0.038156   0.8451

We get a chi-squared statistic of 0.03816 on 1 degree of freedom and a corresponding p-value of 0.8451 when comparing the Poisson models. Thus, there is not evidence against the linearity assumption of age.

Code show/hide
* We can investigate the proportionality assumption of sex in the piece-wise constant hazards model by including an interaction term 
  of time and sex and compare this model with the model from exercise 2.4.2 using a likelihood ratio test (directly
  iobtainable using the type3 option).;

title "2.5.3";  
proc genmod data=chs_pch_null;
    class interval (ref = '1');
    model cens=interval esvea sex age sbp sex*interval /dist=poi offset=logrisk type3;
run;


title "2.5.3: Examining the assumption of proportional hazards for sex";
data p;
    chi2=1449.6828-1448.7745;
    p=1-probchi(chi2,2);
proc print;
run;

* We get a Chi-square statistic of 0.9083 on 2 degrees of freedom with a corresponding p-value of 0.63499. We conclude 
  that we do not have enough evidence against the proportional hazards assumption for sex.;

* To investigate the proportionality assumption of ESVEA we include an interaction term of time and ESVEA and compare this model with
  the model from exercise 2.4.2 using a likelihood ratio test.;
  
proc genmod data=chs_pch_null;
    class interval (ref = '1');
    model cens=interval esvea sex age sbp esvea*interval /dist=poi offset=logrisk type3;
run;

title "2.5.3: Examining the assumption of proportional hazards for ESVEA";
data p;
    chi2=1449.6828-1443.1417;
    p=1-probchi(chi2,2);
proc print;
run;

* We get a Chi-square statistic of 6.5411 on 2 degrees of freedom with a corresponding p-value of 0.037986. This suggest that 
  the proportional hazard assumption does not hold for ESVEA on a 0.05 significance level.;

* We can investigate the linearity assumption on the log(hazard)-scale for systolic blood pressure by including a variable for 
  hypertension to the piece-wise constant hazards model. Then, we can compare this model with the model from 2.4.2 using a likelihood 
  ratio test.;

title "2.5.3";
data chs_pch_null;
   set chs_pch_null;
   if sbp > 135 then
   hypertension = sbp - 135;  
   else hypertension = 0;
run;

proc genmod data=chs_pch_null;
    class interval;
    model cens=interval esvea sex age sbp hypertension /dist=poi offset=logrisk noint;
run;

title "2.5.3: Checking the linearity on the log(hazard)-scale for systolic blood pressure";
data p;
    chi2=1449.6828-1449.0699;
    p=1-probchi(chi2,1);
proc print;
run;

* We get a Chi-square statistic of 0.6129 on 1 degree of freedom with a corresponding p-value of 0.4337 when comparing the Poisson models. 
  Thus, for both the Cox model and the Poisson model we cannot reject the hypothesis that the effect of systolic blood pressure 
  is linear.;

* We can then investigate the linearity assumption for age by including a quadratic term to our model;

title "2.5.3";
data chs_pch_null;
   set chs_pch_null;
   age2 = age*age;
run;

proc genmod data=chs_pch_null;
    class interval;
    model cens=interval esvea sex age age2 sbp /dist=poi offset=logrisk noint;
run;

* Then this model is compared with the one from exercise 2.4.2.;

title "2.5.3: Checking the linearity on the log(hazard)-scale for age";
data p;
    chi2=1449.6828-1449.6446;
    p=1-probchi(chi2,1);
proc print;
run;

* We get a Chi-square statistic of 0.0382 on 1 degree of freedom and a corresponding p-value of 0.84504. We once more reach the 
  conclusion that we do not have evidence against the linearity assumption of age.;

Exercise 2.6

Consider the data from the Copenhagen Holter study and focus now on the mortality rate after stroke.

The data should be loaded as chs_data:

Code show/hide
chs_data <- read.csv("data/cphholter.csv")

Note that the time variables, timeafib, timestroke and timedeath, are measured in days. We will first convert them to years for easier interpretations.

Code show/hide
chs_data$timeafib   <- chs_data$timeafib/365.25
chs_data$timestroke <- chs_data$timestroke/365.25
chs_data$timedeath  <- chs_data$timedeath/365.25

Load the relevant packages:

Code show/hide
library(tidyverse) #Data manipulations and plots
library(survival) #Core survival analysis routines
library(ggplot2)
theme_set(theme_bw())
Code show/hide
* We first load the data;

proc import out = chs_data
datafile = 'data/cphholter.csv'
dbms= csv replace;
getnames=yes;
run;

* We will convert the time variables ('timeafib', 'timestroke', and 'timedeath') from days to years;

data chs_data;
set chs_data;
timeafib = timeafib/365.25;
timestroke = timestroke/365.25;
timedeath = timedeath/365.25;
run;

1.

Estimate non-parametrically the cumulative hazards for subjects with or without ESVEA using the time variable ‘time since recruitment’.

We first make a subset the data which only includes subjects experiencing a stroke during the study. Furthermore, we will add 0.5 day to the time of death for observations where time of stroke and time of death are the same.

Code show/hide
#Subset of data. Only subjects with strokes during the period of the study
stroke_data <- subset(chs_data, stroke == 1)

#Adding 0.5 day to timedeath for subjects with ties
stroke_data$timedeath <- ifelse(stroke_data$timestroke == stroke_data$timedeath, stroke_data$timedeath + 0.5/365.25, stroke_data$timedeath)

Once again we use the survfit function from the survival package to obtain the Nelson-Aalen estimates. Since the time variable is time since recruitment we need to take delayed entry into account. Thus, both the entry time and exit time of the stroke state, timestroke and timedeath, are included in the Surv object.

Code show/hide
naafit261 <- survfit(formula = Surv(timestroke, timedeath, death) ~ esvea, data = stroke_data)

naadata261 <- data.frame(time = naafit261$time,
                        cumhaz = naafit261$cumhaz, 
                        esvea = c(rep(names(naafit261$strata)[1], naafit261$strata[1]),
                               rep(names(naafit261$strata)[2], naafit261$strata[2])))

(fig261 <- ggplot(aes(x = time, y = cumhaz, linetype = esvea), data = naadata261) + 
          geom_step(size = 1) + 
          scale_linetype_discrete("ESVEA", labels = c("0", "1")) + 
          xlab("Time since randomization (years)") + 
          ylab("Cumulative hazard"))

Code show/hide
* To examine how ESVEA affects the mortality rate after stroke we will only consider the subjects who have a stroke during the 
  study. Thus, we will create a new data frame called 'stroke_data'.;
  
* Two observations (id 24 and 368) die on the same days as they have a stroke. We will add 0.5 day to their death time such that
  it is not identical to time of stroke. Otherwise, they will not be used in the regression.;

title "2.6.1";
data stroke_data;
   set chs_data;
   if stroke = 1;
   if timestroke = timedeath then timedeath = timedeath + 0.5/365.25;
run;

* If the time variable is time since recruitment we need to take delayed entry into account. 

* We will estimate the cumulative hazard non-parametrically with the Nelson-Aalen estimator which is implemented in the phreg
  procedure. We must take delayed entry into account. Thus, both the entry time 'timestroke' and exit time 'timedeath' is included
  in the model statement.;

proc phreg data=stroke_data; 
    model (timestroke,timedeath)*death(0)=;
    strata esvea;
    baseline out=hazdata cumhaz=naa;
run;

* Then, the estimates can be plotted;

title "2.6.1: Nelson-Aalen estimate of the cumulative hazards of death after stroke";
proc gplot data=hazdata;
    plot naa*timedeath=esvea / haxis=axis1 vaxis=axis2;
    axis1 order=0 to 15.19 by 1 minor=none label=('Years');
    axis2 order=0 to 3.3 by 0.1 minor=none label=(a=90 'Cumulative hazard');
    symbol1  v=none i=stepjl c=red;
    symbol2  v=none i=stepjl c=blue;
run;

2.

Assume proportional hazards and estimate the hazard ratio between subjects with or without ESVEA.

Similarly, we fit the Cox model where both time of entry and exit are included in the Surv object.

Code show/hide
#Cox model with time since recruitment as baseline
cox262 <- coxph(Surv(timestroke, timedeath, death) ~ esvea, ties = "breslow", data =stroke_data)
summary(cox262)
Call:
coxph(formula = Surv(timestroke, timedeath, death) ~ esvea, data = stroke_data, 
    ties = "breslow")

  n= 73, number of events= 47 

        coef exp(coef) se(coef)     z Pr(>|z|)
esvea 0.2624    1.3001   0.3075 0.853    0.393

      exp(coef) exp(-coef) lower .95 upper .95
esvea       1.3     0.7692    0.7116     2.375

Concordance= 0.549  (se = 0.036 )
Likelihood ratio test= 0.72  on 1 df,   p=0.4
Wald test            = 0.73  on 1 df,   p=0.4
Score (logrank) test = 0.73  on 1 df,   p=0.4

We get a hazard ratio of 1.300 for ESVEA with a Wald test of 0.853 and a corresponding p-value of 0.393. This suggests that having ESVEA does not significantly increase the rate of death after stroke when the time variable is time since recruitment.

Code show/hide
*Similarly, we fit a Cox model where both entry and exit are included in the model statement.;

title "2.6.2";
proc phreg data = stroke_data;
    model (timestroke,timedeath)*death(0)=esvea;
run;

* We get a hazard ratio of exp(0.26242) = 1.3 for ESVEA with a Chi-square statistic of 0.7282 and a corresponding p-value of 0.3935. 
  This suggests that having ESVEA does not significantly increase the rate of death after stroke when the time variable is time 
  since recruitment.; 
* In the model where the time variable is time since stroke, a new variable 'timesincestroke' is added to the stroke_data data set ;

title "2.6.3";
data stroke_data;
   set stroke_data;
   timesincestroke = timedeath - timestroke;
run;

* The Nelson Aalen estimates of the cumulative hazards are then calculated and plotted as before;

proc phreg data=stroke_data; 
    model timesincestroke*death(0)=;
    strata esvea;
    baseline out=hazdata cumhaz=naa;
run;

title "2.6.3: Nelson-Aalen estimate of the cumulative hazards of death after stroke";
proc gplot data=hazdata;
    plot naa*timesincestroke = esvea / haxis=axis1 vaxis=axis2;
    axis1 order=0 to 15.19 by 1 minor=none label=('Years');
    axis2 order=0 to 3.5 by 0.1 minor=none label=(a=90 'Cumulative hazard');
    symbol1  v=none i=stepjl c=red;
    symbol2  v=none i=stepjl c=blue;
run;


* Likewise, the hazard ratio is estimated assuming proportional hazards with the Cox model; 

title "2.6.3";
proc phreg data = stroke_data;
    model timesincestroke*death(0)=esvea;
run;

* We get a hazard ratio of exp(0.15385) = 1.1663 for ESVEA with a  χ2 statistic of 0.2458 and a corresponding p-value of 0.62. Thus,
  ESVEA does not increase the risk of death after stroke significantly when the time variable is time since stroke as well.

3.

Repeat these two questions using now the time variable ‘time since stroke’ and compare the results.

In the model where the time variable is time since stroke, a new variable timesincestroke is added to the data set and used as time variable in the Surv object in the formula for the Cox model.

Code show/hide
#Adding time since stroke as new column to the data set
stroke_data$timesincestroke <- stroke_data$timedeath - stroke_data$timestroke

The cumulative hazards are then estimated non-parametrically using survfit.

Code show/hide
naafit263 <- survfit(formula = Surv(timesincestroke, death) ~ esvea, data = stroke_data)

naadata263 <- data.frame(time = naafit263$time,
                        cumhaz = naafit263$cumhaz, 
                        esvea = c(rep(names(naafit263$strata)[1], naafit263$strata[1]),
                               rep(names(naafit263$strata)[2], naafit263$strata[2])))

(fig263 <- ggplot(aes(x = time, y = cumhaz, linetype = esvea), data = naadata263) + 
          geom_step(size = 1) + 
          scale_linetype_discrete("ESVEA", labels = c("0", "1")) + 
          xlab("Time since stroke (years)") + 
          ylab("Cumulative hazard"))

The hazard ratio is estimated assuming proportional hazards using the Cox model

Code show/hide
cox263 <- coxph(Surv(timesincestroke, death) ~ esvea, ties = "breslow", data =stroke_data)
summary(cox263)
Call:
coxph(formula = Surv(timesincestroke, death) ~ esvea, data = stroke_data, 
    ties = "breslow")

  n= 73, number of events= 47 

        coef exp(coef) se(coef)     z Pr(>|z|)
esvea 0.1532    1.1655   0.3103 0.494    0.622

      exp(coef) exp(-coef) lower .95 upper .95
esvea     1.166      0.858    0.6345     2.141

Concordance= 0.507  (se = 0.038 )
Likelihood ratio test= 0.24  on 1 df,   p=0.6
Wald test            = 0.24  on 1 df,   p=0.6
Score (logrank) test = 0.24  on 1 df,   p=0.6

We get a hazard ratio of 1.1654 and a Wald test of 0.493 with a corresponding p-value of 0.622. This suggests that ESVEA does not change the hazard significantly when the time variable is time since stroke as well.

Code show/hide
data stroke_data;
   set stroke_data;
   timesincestroke = timedeath - timestroke;
run;

* The Nelson Aalen estimates of the cumulative hazards are then calculated and plotted as before;

proc phreg data=stroke_data; 
    model timesincestroke*death(0)=;
    strata esvea;
    baseline out=hazdata cumhaz=naa;
run;

title "2.6.3: Nelson-Aalen estimate of the cumulative hazards of death after stroke";
proc gplot data=hazdata;
    plot naa*timesincestroke = esvea / haxis=axis1 vaxis=axis2;
    axis1 order=0 to 15.19 by 1 minor=none label=('Years');
    axis2 order=0 to 3.5 by 0.1 minor=none label=(a=90 'Cumulative hazard');
    symbol1  v=none i=stepjl c=red;
    symbol2  v=none i=stepjl c=blue;
run;


* Likewise, the hazard ratio is estimated assuming proportional hazards with the Cox model; 

title "2.6.3";
proc phreg data = stroke_data;
    model timesincestroke*death(0)=esvea;
run;

* We get a hazard ratio of exp(0.15385) = 1.1663 for ESVEA with a  χ2 statistic of 0.2458 and a corresponding p-value of 0.62. Thus,
  ESVEA does not increase the risk of death after stroke significantly when the time variable is time since stroke as well.

Exercise 2.7

1.

Consider the data from the Copenhagen Holter study and fit Cox models for the cause-specific hazards for the outcomes stroke and death without stroke including ESVEA, sex, age, and sysBP.

The data should be loaded as chs_data:

Code show/hide
chs_data <- read.csv("data/cphholter.csv")

Note that the time variables, timeafib, timestroke and timedeath, are measured in days. We will first convert them to years for easier interpretations.

Code show/hide
chs_data$timeafib   <- chs_data$timeafib/365.25
chs_data$timestroke <- chs_data$timestroke/365.25
chs_data$timedeath  <- chs_data$timedeath/365.25

Load the relevant packages:

Code show/hide
library(tidyverse) #Data manipulations and plots
library(survival) #Core survival analysis routines
library(ggplot2)
theme_set(theme_bw())
Code show/hide
* We first load the data;

proc import out = chs_data
datafile = 'data/cphholter.csv'
dbms= csv replace;
getnames=yes;
run;

* We will convert the time variables ('timeafib', 'timestroke', and 'timedeath') from days to years;

data chs_data;
set chs_data;
timeafib = timeafib/365.25;
timestroke = timestroke/365.25;
timedeath = timedeath/365.25;
run;

We fit a Cox model to the cause-specific hazard for the outcome stroke

Code show/hide
chs_data$timestrokeordeath <- ifelse(chs_data$stroke == 1, chs_data$timestroke, chs_data$timedeath) 
chs_data$strokeordeath <- ifelse(chs_data$stroke == 1, 1, chs_data$death) 

cox27_stroke <- coxph(Surv(timestrokeordeath, stroke) ~ esvea + sex + age + sbp, ties = "breslow", data =chs_data)
summary(cox27_stroke)
Call:
coxph(formula = Surv(timestrokeordeath, stroke) ~ esvea + sex + 
    age + sbp, data = chs_data, ties = "breslow")

  n= 675, number of events= 72 
   (3 observations deleted due to missingness)

          coef exp(coef) se(coef)     z Pr(>|z|)    
esvea 0.702407  2.018606 0.269968 2.602  0.00927 ** 
sex   0.491881  1.635389 0.248634 1.978  0.04789 *  
age   0.078980  1.082183 0.019054 4.145  3.4e-05 ***
sbp   0.011340  1.011404 0.004651 2.438  0.01477 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

      exp(coef) exp(-coef) lower .95 upper .95
esvea     2.019     0.4954     1.189     3.426
sex       1.635     0.6115     1.005     2.662
age       1.082     0.9241     1.043     1.123
sbp       1.011     0.9887     1.002     1.021

Concordance= 0.728  (se = 0.028 )
Likelihood ratio test= 41.22  on 4 df,   p=2e-08
Wald test            = 43.26  on 4 df,   p=9e-09
Score (logrank) test = 47.2  on 4 df,   p=1e-09

We get a hazard ratio of 2.02 and a Wald test of 2.602 with a corresponding p-value of 0.009.

To fit a Cox model for the outcome death without stroke we must first add the variable death without stroke to our data set. Afterwards, the model can be fitted using coxph.

Code show/hide
chs_data$death_wo_stroke <- ifelse(chs_data$stroke == 1, 0, chs_data$death)

cox27_death <- coxph(Surv(timestrokeordeath, death_wo_stroke) ~ esvea + sex + age + sbp, ties = "breslow", data =chs_data)
summary(cox27_death)
Call:
coxph(formula = Surv(timestrokeordeath, death_wo_stroke) ~ esvea + 
    sex + age + sbp, data = chs_data, ties = "breslow")

  n= 675, number of events= 213 
   (3 observations deleted due to missingness)

          coef exp(coef) se(coef)     z Pr(>|z|)    
esvea 0.160081  1.173605 0.186795 0.857    0.391    
sex   0.605281  1.831766 0.147665 4.099 4.15e-05 ***
age   0.076073  1.079041 0.010759 7.071 1.54e-12 ***
sbp   0.002956  1.002960 0.002867 1.031    0.303    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

      exp(coef) exp(-coef) lower .95 upper .95
esvea     1.174     0.8521    0.8138     1.692
sex       1.832     0.5459    1.3714     2.447
age       1.079     0.9267    1.0565     1.102
sbp       1.003     0.9970    0.9973     1.009

Concordance= 0.657  (se = 0.019 )
Likelihood ratio test= 64.37  on 4 df,   p=3e-13
Wald test            = 67.44  on 4 df,   p=8e-14
Score (logrank) test = 70.93  on 4 df,   p=1e-14

We get a hazard ratio of 1.17 and a Wald test of 0.857 with a corresponding p-value of 0.391.

Code show/hide
* We fit a Cox model to the cause-specific hazard for the outcome stroke;

title '2.7';
proc phreg data = chs_data;
    model timestrokeordeath*stroke(0)= esvea sex age sbp;
run;

* We get a hazard ratio of 2.02 and a Chi-square statistic of 6.77 on 1 degree of freedom with a corresponding p-value of 0.009. 
  This suggest that having ESVEA does change the hazard significantly when the outcome is stroke.;

* To fit a Cox model for the outcome death wihtout stroke we must first add the censoring variable death without stroke to our data 
  set. Afterwards, the model can be fitted with the phreg procedure;
  
data chs_data;
    set chs_data;
    death_wo_stroke = death;
    if stroke = 1 then death_wo_stroke = 0;
run;

proc phreg data = chs_data;
    model timestrokeordeath*death_wo_stroke(0)= esvea sex age sbp;
run;

* We get a hazard ratio of 1.17 and a Chi-square statistic of 0.7344 on 1 degree of freedom with a corresponding p-value of 0.391. 
  This suggests that the status of ESVEA does not change the hazard significantly when the outcome is death without stroke.;

2.

Compare with the results from Exercise 4 in Chapter 2 (first question).

It is seen that (male) sex and age are both significantly associated with increased cause-specific hazards whereas ESVEA and SysBP are only associated with the rate of stroke. The coefficients for the composite end-point are between those for the cause-specific hazards.

Please note that the Cox model for the composite end-point and those for the separate cause-specific hazards are mathematically incompatible.

Exercise 2.8

Consider the data on repeated episodes in affective disorder, Example 1.1.5.

The data should be loaded as affective_data:

Load the relevant packages:

Code show/hide
library(tidyverse) #Data manipulations and plots
library(survival) #Core survival analysis routines
library(mets)
library(ggplot2)
theme_set(theme_bw())

The data should be loaded as affective_data:

Code show/hide
proc import out=affective_data
    datafile='data/affective.csv' 
    dbms=csv replace;
    getnames=yes;
run;

1.

Estimate non-parametrically the cumulative event intensities for unipolar and bipolar patients.

We estimate the cumulative event intensities non-parametrically using the survfit function from the survival package. We must include the id = id argument since we have multiple lines from the same subject. Furthermore, we include start in the surv object of the formula argument to account for delayed entry and restrict attention to records with state=0 (out of hospital).

Code show/hide
# Estimating the cumulative event intensities for unipolar and bipolar patients non-parametrically
naafit281 <- survfit(Surv(start, stop, status == 1) ~ bip,
                 data = subset(affective_data, state==0), id = id)

We then plot the result

Code show/hide
# Collecting data in data frame
naadata281 <- data.frame(cumhaz = naafit281$cumhaz,
                        time = naafit281$time,
                        disorder = c(rep("Unipolar", naafit281$strata[[1]]),
                                  rep("Bipolar", naafit281$strata[[2]])))

# Plotting the result
fig281 <- ggplot(data = naadata281) + geom_step(aes(x = time/12, y = cumhaz, linetype = disorder), size = 1) +
  ylab("Cumulative event intensity") +
  xlab("Time since first admission (years)") +
  theme_bw() 
fig281

The cumulative event intensity is at all times higher for bipolar patients compared to unipolar patients.

Code show/hide
* We will non-parametrically estimate the cumulative event intensities for unipolar and bipolar patients using the phreg procedure 
  where we include 'status(0,2,3)' as censoring variables in the model statement since the event of interest is 1 (admission 
  to hospital) and 'bip' in the strata. Furthermore, we include 'start' in the model statement because of delayed entry. Attention 
  is restricted to records where state=0 (out of hospital).;

title '2.8.1';
proc phreg data=affective_data; 
    where state=0;
    model (start,stop)*status(0,2,3)=;
    strata bip;
    baseline out=data281 cumhaz=naa;
run;

* We plot the result using the gplot procedure;

proc gplot data=data281;
    plot naa*stop=bip /haxis=axis1 vaxis=axis2;
    axis1 order=0 to 340 by 24 label=('Time since first admission');
    axis2 order=0 to 10 by 0.5 label=(a=90 'Cumulative event intensity');
    symbol1 i=stepjl c=red;
    symbol2 i=stepjl c=blue;
run;

* The cumulative event intensity is at all times higher for bipolar patients compared to unipolar patients.;

2.

Fit an AG-type model and estimate, thereby, the ratio between intensities for unipolar and bipolar patients, adjusting for year of diagnosis.

We can fit a very simple version of a AG model (like the one described in Section 2.4.1) using the coxph function from the survival package. We take delayed entry into account in the Surv object of the formula argument and adjust for disease type (unipolar vs bipolar) and year of diagnosis. Furthermore, we restrict the data set to the rows where patients are out of hospital (state = 0).

Code show/hide
# Fitting an AG-type model
agfit282 <- coxph(Surv(start, stop, status == 1) ~ bip + year, 
                        data = subset(affective_data, state == 0), 
                        method = "breslow")
summary(agfit282)
Call:
coxph(formula = Surv(start, stop, status == 1) ~ bip + year, 
    data = subset(affective_data, state == 0), method = "breslow")

  n= 626, number of events= 542 

         coef exp(coef) se(coef)      z Pr(>|z|)    
bip   0.35001   1.41908  0.09457  3.701 0.000215 ***
year -0.11814   0.88857  0.03404 -3.471 0.000519 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

     exp(coef) exp(-coef) lower .95 upper .95
bip     1.4191     0.7047    1.1790    1.7081
year    0.8886     1.1254    0.8312    0.9499

Concordance= 0.567  (se = 0.021 )
Likelihood ratio test= 26.48  on 2 df,   p=2e-06
Wald test            = 26.87  on 2 df,   p=1e-06
Score (logrank) test = 27.1  on 2 df,   p=1e-06

Thus, we estimate hazard ratio of \(\exp(0.35) = 1.4291\) of admission to hospital for being diagnosed with bipolar disorder compared to those diagnosed with unipolar disorder.

Code show/hide
*We fit a simple AG-type model (like the one described in section 2.4.1) using the phreg procedure on the subset of data where 'state = 0', 
i.e. periods where patients are out of hospital.; 

title '2.8.2';
proc phreg data=affective_data; 
    model (start,stop)*status(0,2,3)= bip year;
    where state = 0;
run;

* Thus, we estimate hazard ratio of exp(0.35) = 1.4291 of admission to hospital for being diagnosed with bipolar disorder 
  compared to those diagnosed with unipolar disorder.;

3.

Fit a PWP model and estimate, thereby, the ratio between intensities for unipolar and bipolar patients, adjusting for year of diagnosis.

We can fit a PWP model using the coxph function by adding strata(episode) in the formula argument. We must still restrict the data to time periods where the patients are out of hospital (state = 0).

Code show/hide
# Fitting a PWP model
pwpfit283 <- coxph(Surv(start, stop, status == 1) ~ strata(episode) + bip + year, 
                   data = subset(affective_data, state == 0), 
                   method = "breslow")
summary(pwpfit283)
Call:
coxph(formula = Surv(start, stop, status == 1) ~ strata(episode) + 
    bip + year, data = subset(affective_data, state == 0), method = "breslow")

  n= 626, number of events= 542 

         coef exp(coef) se(coef)      z Pr(>|z|)  
bip   0.24213   1.27396  0.11199  2.162   0.0306 *
year -0.03514   0.96547  0.03940 -0.892   0.3724  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

     exp(coef) exp(-coef) lower .95 upper .95
bip     1.2740      0.785    1.0229     1.587
year    0.9655      1.036    0.8937     1.043

Concordance= 0.54  (se = 0.021 )
Likelihood ratio test= 5.34  on 2 df,   p=0.07
Wald test            = 5.45  on 2 df,   p=0.07
Score (logrank) test = 5.47  on 2 df,   p=0.06

We get an estimate of the hazard ratio of readmission for patients diagnosed with bipolar disorder of \(\exp(0.2421) = 1.274\) compared to patients diagnosed with unipolar disorder.

Code show/hide
* We fit a PWP model using the phreg procedure. The set up is the same as for the AG type model but we also include 'episode' in  the strata statement;

title '2.8.3';
proc phreg data=affective_data; 
    model (start,stop)*status(0,2,3)= bip year;
    where state = 0;
    strata episode;
run;

* We get an estimate of the hazard ratio of readmission for patients diagnosed with bipolar disorder of exp(0.2421) = 1.274 compared to patients diagnosed with unipolar disorder.;

4.

Compare the results from the two models.

We get a smaller coefficient of ‘bip’ with the PWP model than with the AG model. This is because, by taking the number of previous episodes into account (via stratification), some of the discrepancy between bipolar and unipolar patients disappears since the occurrence of repeated episodes is itself affected by the initial diagnosis as discussed in the book.