library(Biobase)
library(tidyverse)
library(scales)
library(gplots)
library(ggbiplot)
library(qusage)
library(ComplexHeatmap)
library(RColorBrewer)
library(circlize)
library(EnhancedVolcano)
library(survival)
library(survminer)
library(ROCR)
source("lib/cross_indication.color.palettes.R")
source("lib/functions.R")
load("data/data.RData")

# loads 8 objects
# eset.all.arms: training set (611 samples from IMvigor210, POPLAR and IMmotion150)
# eset.pcd: test set (206 samples from PCD4686g)
# gs.wgcna: WGCNA modules
# gs.wgcna.annotations: WGCNA modules annotations
# genesets: gene lists used to benchmark our lasso58 signature
# lasso58: list of genes from lasso58 with coefficients
# pca: principal component analysis results
# fmi.dat.by.group: fmi CDKN2A CNA data
# Load data
## atezolizumab monotherapy
eset <- eset.all.arms[, eset.all.arms$ARM == "atezo"]

# control arms (chemotherapy/TKI)
eset.chemo <- eset.all.arms[, eset.all.arms$ARM %in% c("docetaxel", "sunitinib")]

# TMB-evaluable samples
tmb.median <- median(eset$TMB, na.rm = TRUE)
eset$TMB_50 <- factor(ifelse(is.na(eset$TMB), NA, 
                               ifelse(eset$TMB >= tmb.median, "TMBhigh", "TMBlow")),
                        levels = c("TMBlow", "TMBhigh"))

eset.tmb <- eset[, !is.na(eset$TMB)]

Figure 1 - PD-L1, TMB and global RNA-seq profiles in mUC, NSCLC and RCC cohorts

Figure 1A - PD-L1 distribution by indication and response status

ggplot(data = pData(eset), aes(x = ORR.CRPR.SDPD, 
                               fill = Category.PDL1.3CAT)) +
  geom_bar(position = "fill") +
  xlab("") +
  ylab("Patient Percentage") +
  scale_x_discrete(labels = c("SD/PD", "CR/PR")) +
  facet_grid(facets = . ~ CANCER_TYPE) + 
  scale_y_continuous(labels = percent_format()) +
  scale_fill_manual(name = "PD-L1",
                    labels = c("IC/TC0",
                               "IC/TC1",
                               "IC/TC23"),
                    values = palette.PDL1.3CAT) +
  theme_bw() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.ticks.x = element_blank(),
        legend.key.size = unit(15, "points"),
        axis.text.x = element_text(angle = 45, 
                                   hjust = 1,
                                   vjust = 1.4,
                                   size = 8))

Figure 1B - Tumor mutation burden by indication and response status

ggplot(pData(eset.tmb), aes(x = CANCER_TYPE, 
                            y = log(TMB, 2), 
                            fill = ORR.CRPR.SDPD)) + 
  geom_boxplot() +
  scale_y_continuous(limits = c(1, 6)) +
  scale_fill_manual(name = "ORR",
                    labels = c("SD/PD", "CR/PR"),
                    values = c("#4092C4", "#D7604E")) +
  xlab("") +
  ylab("log2(Mutations / Mb)") +
  theme_bw() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        legend.direction = 'vertical',
        legend.key.size = unit(15, "points"),
        axis.text.x = element_text(size = 12,
                                   angle = 45, 
                                   hjust = 1, 
                                   vjust = 1))

Figure 1C - Overlap between responders, PD-L1+ and TMBhigh tumors

venn(list("PD-L1+" = rownames(subset(pData(eset.tmb), Category.PDL1 == "PDL1_123")),
           "TMBhigh" = rownames(subset(pData(eset.tmb), TMB_50 == "TMBhigh")),
           "CR/PR" = rownames(subset(pData(eset.tmb), ORR.CRPR.SDPD == "CRPR"))),
     intersections = FALSE)

Figure 1D - Principal component analysis, colored by indication

pc.groups <- eset$CANCER_TYPE

ggbiplot(pca, choices = c(1,2), 
         obs.scale = 1, 
         var.scale = 1, 
         var.axes = FALSE, 
         groups = pc.groups, 
         ellipse = TRUE, 
         circle = FALSE) + 
  scale_color_discrete(name = '') +
  scale_color_manual(name = "",
                     labels = c("mUC", "NSCLC", "RCC"),
                     values = palette.cancer.type) +
  theme_bw() +
  theme(legend.direction = 'horizontal', 
        legend.position = 'top',
        legend.text = element_text(size = 14))

Figure 1E - Principal component analysis, colored by response status

pc.groups <- eset$ORR.CRPR.SDPD

ggbiplot(pca, choices = c(1,2), 
         obs.scale = 1, 
         var.scale = 1, 
         var.axes = FALSE, 
         groups = pc.groups, 
         ellipse = TRUE, 
         circle = FALSE) + 
  scale_color_discrete(name = '') +
  scale_color_manual(name = "",
                     labels = c("Non-responders", "Responders"),
                     values = palette.orr.2cat) +
  theme_bw() +
  theme(legend.direction = 'horizontal', 
        legend.position = 'top',
        legend.text = element_text(size = 14))


Figure 2 - Performance of transcriptional signatures to predict response to PD-L1 blockade

Figure 2B - Lasso58 score by indication and response status in atezolizumab or control arms

eset.tmb$lasso58.coef <- calculateSignatureWithCoefs(eset.tmb, 
                                                     symbols.with.coef = lasso58, 
                                                     symbols.col = "symbol")

ggplot(pData(eset.tmb), aes(x = CANCER_TYPE, y = lasso58.coef, fill = ORR.CRPR.SDPD)) + 
      geom_boxplot() +
      scale_fill_manual(name = "ORR",
                        labels = c("SD/PD", "CR/PR"),
                        values= palette.orr.2cat) +
      xlab("") +
      ylab("Signature score") +
      ggtitle("Lasso58 - Atezolizumab") +
      theme_bw() +
      theme(panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            legend.key.size = unit(15, "points"),
            axis.text.x = element_text(size = 14))

eset.chemo$lasso58.coef <- calculateSignatureWithCoefs(eset.chemo, 
                                                    symbols.with.coefs = lasso58, 
                                                    symbols.col = "symbol")

ggplot(pData(eset.chemo), aes(x = CANCER_TYPE, y = lasso58.coef, fill = ORR.CRPR.SDPD)) + 
      geom_boxplot() +
      scale_fill_manual(name = "ORR",
                        labels = c("SD/PD", "CR/PR"),
                        values = palette.orr.2cat) +
      scale_x_discrete(labels = c("NSCLC/\nDocetaxel", "RCC/\nSunitinib")) +
      xlab("") +
      ylab("Signature score") +
      ggtitle("Lasso58 - Other arms") +
      theme_bw() +
      theme(panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            legend.key.size = unit(15, "points"),
            axis.text.x = element_text(size = 12))

Figure 2C - Signature performance in the training set

sigs <- c("Genentech_tGE8",
          "Ayers_2017",
          "Cristescu_2018",
          "Jiang_TIDE_T_cell_inclusion",
          "Rooney_2015")

model.names <- c("LASSO58" = "lasso58.coef", 
                 "PD-L1 IHC" = "Category.PDL1", 
                 "TMB" = "TMB",
                 "LASSO58 + PD-L1 + TMB" = "lasso58.coef.PDL1.TMB",
                 "Genentech tGE8" = "Genentech_tGE8",
                 "Ayers et al., 2017" = "Ayers_2017",
                 "Cristescu et al., 2018" = "Cristescu_2018",
                 "Jiang et al., TIDE T cell inclusion" = "Jiang_TIDE_T_cell_inclusion",
                 "Rooney et al., 2015" = "Rooney_2015")

color.map <- list("lasso58.coef" = "red",
                  "Category.PDL1" = "blue",
                  "TMB" = "black",
                  "lasso58.coef.PDL1.TMB" = "purple",
                  "lasso58.coef.PDL1" = "purple",
                  "Genentech_tGE8" = "chartreuse4",
                  "Ayers_2017" = "darkorange",
                  "Cristescu_2018" = "grey50",
                  "Jiang_TIDE_T_cell_inclusion" = "brown",
                  "Rooney_2015" = "goldenrod1")

# Calculate signatures
eset$lasso58.coef <- calculateSignatureWithCoefs(eset, lasso58, symbols.col = "symbol")

for(sig in sigs){
  eset[[sig]] <- calculateSignatureExpression(eset, genesets[[sig]], symbols.col = "symbol")
}


dat <- subset(pData(eset), !is.na(TMB))
dat$ORR.CRPR.SDPD <- factor(dat$ORR.CRPR.SDPD, levels = c("CRPR", "SDPD"))

predictions <- list()

for(model.name in c(model.names)){
  if(model.name == "lasso58.coef.PDL1.TMB"){
    model <- glm(ORR.CRPR.SDPD ~ lasso58.coef + Category.PDL1 + TMB, data = dat, family = "binomial")
  }else{
    model <- glm(ORR.CRPR.SDPD ~ dat[[model.name]], data = dat, family = "binomial")
  }
  predictions[[model.name]] <- predict(model, newdata = dat, type = "response")
}

cols <- as.character(color.map[model.names])
lwd <- c(4, rep(2, length(predictions) - 1))

for (i in 1:length(predictions)) {
   plot(performance(prediction(predictions[[i]], 
                               dat$ORR.CRPR.SDPD), 
                    "tpr", 
                    "fpr"), 
        add = (i != 1), 
        col = cols[i], 
        lwd = lwd[i])
}

legend(0.5, 
       0.4, 
       legend = names(model.names),
       col = cols, 
       lty = 1, 
       cex = 0.6)

abline(a = 0, 
       b = 1)

Figure 2D - Signature performance in the test set

model.names <- c("LASSO58" = "lasso58.coef", 
                 "PD-L1 IHC" = "Category.PDL1", 
                 "LASSO58 + PD-L1" = "lasso58.coef.PDL1",
                 "Genentech tGE8" = "Genentech_tGE8",
                 "Ayers et al., 2017" = "Ayers_2017",
                 "Cristescu et al., 2018" = "Cristescu_2018",
                 "Jiang et al., TIDE T cell inclusion" = "Jiang_TIDE_T_cell_inclusion",
                 "Rooney et al., 2015" = "Rooney_2015")

# Calculate signatures
eset.pcd$lasso58.coef <- calculateSignatureWithCoefs(eset.pcd, lasso58, symbols.col = "symbol")

for(sig in sigs){
  eset.pcd[[sig]] <- calculateSignatureExpression(eset.pcd, genesets[[sig]], symbols.col = "symbol")
}


dat <- subset(pData(eset.pcd), !is.na(Category.PDL1))
dat$ORR.CRPR.SDPD <- factor(dat$ORR.CRPR.SDPD, levels = c("CRPR", "SDPD"))

predictions <- list()

for(model.name in c(model.names)){
  if(model.name == "lasso58.coef.PDL1"){
    model <- glm(ORR.CRPR.SDPD ~ lasso58.coef + Category.PDL1, data = dat, family = "binomial")
  }else{
    model <- glm(ORR.CRPR.SDPD ~ dat[[model.name]], data = dat, family = "binomial")
  }
  predictions[[model.name]] <- predict(model, newdata = dat, type = "response")
}

cols <- as.character(color.map[model.names])
lwd <- c(4, rep(2, length(predictions) - 1))

for (i in 1:length(predictions)) {
   plot(performance(prediction(predictions[[i]], 
                               dat$ORR.CRPR.SDPD), 
                    "tpr", 
                    "fpr"), 
        add = (i != 1), 
        col = cols[i], 
        lwd = lwd[i])
}

legend(0.5, 
       0.4, 
       legend = names(model.names),
       col = cols, 
       lty = 1, 
       cex = 0.6)

abline(a = 0, 
       b = 1)


Figure 3 - Transcriptional correlates of response to PD-L1 blockade across indications

# Define color gradient for heatmaps
palette.breaks <- seq(-0.3, 0.3, 0.1)
color.palette <- colorRampPalette(c("cadetblue1", "black", "yellow"))(length(palette.breaks)); 
colors <- unique(c(seq(-0.3, 0, length = 5), seq(0, 0.3, length = 6)))
qgen.combined <- data.frame(matrix(nrow = length(gs.wgcna), ncol = 0))
rownames(qgen.combined) <- names(gs.wgcna)

for(cancer in levels(eset$CANCER_TYPE)){
  qgen.results <- readRDS(paste0("data/qgen.ORR.", cancer, ".WGCNA.rds"))
  qgen.results.tab <- qsTable(qgen.results, number = length(qgen.results$pathways))
  
  # Update result table row and column names
  rownames(qgen.results.tab) <- qgen.results.tab$pathway.name
  colnames(qgen.results.tab) <- paste(cancer, colnames(qgen.results.tab), sep=".")
  
  # Create categorical variable for pathway significance
  qgen.results.tab[[paste0(cancer, ".sig")]] <- factor(ifelse(qgen.results.tab[[paste0(cancer,".FDR")]] <0.05, "fdr",
                                                              ifelse(qgen.results.tab[[paste0(cancer, ".p.Value")]] < 0.05, "nom", "")))
  
  # Merge results
  qgen.combined <- merge(qgen.combined,
                         qgen.results.tab,
                         by = "row.names",
                         all = TRUE)
  rownames(qgen.combined) <- qgen.combined$Row.names
  qgen.combined <- qgen.combined[,-1]
}

# Filter significant modules on p-Value
qgen.combined.pval <- qgen.combined[,grep('p.Value', names(qgen.combined))]
sig.mods <- rownames(qgen.combined.pval[apply(qgen.combined.pval, 1, function(x) min(x) < 0.05), ])

Figure 3A - In all tumors

qgen.results <- readRDS("data/qgen.ORR.WGCNA.rds")
qgen.results.tab <- subset(qsTable(qgen.results, length(gs.wgcna)), p.Value < 0.05)

# Combine modules significant per indication and in combined analysis
all.mods <- unique(c(sig.mods, as.character(qgen.results.tab$pathway.name)))

path.means <- as.data.frame(qgen.results$path.mean)
path.means$module <- rownames(path.means)
colnames(path.means) <- c("mean", "module")
path.means <- path.means[order(-path.means$mean),]
path.means <- subset(path.means, module %in% all.mods)

par(mar = c(6,6,2,2))
plotCIs(qgen.results,
        path.index = all.mods,
        p.adjust.method = "BH",
        cex = 2, 
        main = "CR/PR vs. SD/PD",
        addGrid = F,
        p.breaks = c(0.01, 0.05, 0.1),
        ylim = c(-0.5, 0.4))

qgen.combined.logFC <- qgen.combined[all.mods, grep('log.fold.change', names(qgen.combined))]
names(qgen.combined.logFC) <- gsub('.log.fold.change', '', names(qgen.combined.logFC))
qgen.combined.logFC <- qgen.combined.logFC[rowSums(is.na(qgen.combined.logFC)) != ncol(qgen.combined.logFC),]

data <- t(as.matrix(qgen.combined.logFC))
rownames(data) <- c("mUC", "NSCLC", "RCC")

data.sig <- t(as.matrix(qgen.combined[all.mods, grep(".sig", names(qgen.combined))]))
data <- data[, path.means$module]

row.labels <- rownames(data)
col.labels <- colnames(data)

# Save for PD-L1+ and PD-L1- heatmaps order
combined.module.order <- col.labels

col.annotations <- as.character(gs.wgcna.annotations[col.labels,]$Annotation)

col.named <- c()
for(col in col.labels){
  col.named[col] = col
}

df <- data.frame(module = as.character(col.labels))

set.seed(123)

data.sig <- data.sig[,colnames(data)]

ha <- HeatmapAnnotation(df = df,
                        colname = anno_text(col.annotations, 
                                            rot = 60, 
                                            just = "right", 
                                            offset = unit(1, "npc") - unit(2, "mm"),
                                            gp = gpar(fontsize = 12)),
                       col = list(module = col.named),
                       show_annotation_name = TRUE,
                       annotation_height = unit.c(unit(5, "mm"), 
                                                  max_text_width(col.annotations) + unit(1, "mm")),
                       which = "col", 
                       show_legend = FALSE)


hm <- Heatmap(data, 
        name = "Enrichment",
        cluster_columns = F, 
        cluster_rows = F,
        show_row_dend = F, 
        show_column_dend = F,
        show_row_names = T, 
        show_column_names = F,
        show_heatmap_legend = T,
        row_names_gp = gpar(fontsize = 12),
        col = colorRamp2(breaks = palette.breaks, 
                         colors = color.palette, 
                         space = "RGB"),
        cell_fun = function(j, i, x, y, w, h, col, mat = data.sig) { 
          if(mat[i,j] == "fdr"){
            grid.points(x, y, pch = 20, gp = gpar(col = "red"))
          }else if(mat[i,j] == "nom"){
            grid.points(x, y, pch = 20, gp = gpar(col = "black"))
          }
        },
        bottom_annotation = ha,
        rect_gp = gpar(col = "black", lty = 1, lwd = 1))

draw(hm, padding = unit(c(15, 35, 0, 0), "mm"))

Figure 3B - In PD-L1pos and PD-L1neg tumors separated

PD-L1+ tumors
qgen.combined <- data.frame(matrix(nrow = length(gs.wgcna), ncol = 0))

# Combine qgen results from 3 indications
for(cancer in levels(eset$CANCER_TYPE)){
  # Read indication qgen analysis results
  qgen.results <- readRDS(paste0("data/qgen.ORR.PDL1pos.", cancer, ".WGCNA.rds"))
  qgen.tab <- qsTable(qgen.results, number=length(qgen.results$pathways))
  
  # Update result table colnames and cbind to combined result file
  colnames(qgen.tab) <- paste0(cancer, ".", colnames(qgen.tab))
  
  # Define statistical significance as a categorical variable (FDR, nominal, ns)
  qgen.tab[[paste0(cancer, ".sig")]] <- factor(ifelse(qgen.tab[[paste0(cancer,".FDR")]] < 0.05, "fdr",
                                                              ifelse(qgen.tab[[paste0(cancer, ".p.Value")]] < 0.05, "nom", "")))
  
  qgen.combined <- cbind(qgen.combined,
                         qgen.tab[order(qgen.tab[[paste0(cancer, ".pathway.name")]]),])
}

rownames(qgen.combined) <- qgen.combined[[paste0(levels(eset$CANCER_TYPE)[1], ".pathway.name")]]

qgen.combined.logFC <- qgen.combined[combined.module.order, grep('log.fold.change', names(qgen.combined))]
names(qgen.combined.logFC) <- gsub('.log.fold.change', '', names(qgen.combined.logFC))

data <- t(as.matrix(qgen.combined.logFC))
rownames(data) <- c("mUC", "NSCLC", "RCC")

data.sig <- t(as.matrix(qgen.combined[combined.module.order, grep(".sig", names(qgen.combined))]))
data.sig <- data.sig[, colnames(data)]

row.labels <- rownames(data)
col.labels <- colnames(data)
col.annotations <- as.character(gs.wgcna.annotations[col.labels,]$Annotation)

df <- data.frame(module = as.character(col.labels))

ha <- HeatmapAnnotation(df = df,
                        colname = anno_text(col.annotations, 
                                            rot = 60, 
                                            just = "right", 
                                            offset = unit(1, "npc") - unit(2, "mm"),
                                            gp = gpar(fontsize = 13)),
                       col = list(module = col.named),
                       show_annotation_name = TRUE,
                       annotation_height = unit.c(unit(5, "mm"), 
                                                  max_text_width(col.annotations) + unit(1, "mm")),
                       which = "col", 
                       show_legend = FALSE)

hm <- Heatmap(data, 
        name = "Enrichment",
        cluster_columns = F, 
        cluster_rows = F,
        show_row_dend = F, 
        show_column_dend = F,
        show_row_names = T, 
        show_column_names = F,
        row_names_gp = gpar(fontsize = 12),
        col = colorRamp2(breaks = palette.breaks, 
                         colors = color.palette, 
                         space = "RGB"),
        cell_fun = function(j, i, x, y, w, h, col, mat = data.sig) {
          if(mat[i,j] == "fdr"){
            grid.points(x, y, pch = 20, gp = gpar(col = "red"))
          }else if(mat[i,j] == "nom"){
            grid.points(x, y, pch = 20, gp = gpar(col = "black"))
          }
        },
        bottom_annotation = ha,
        rect_gp = gpar(col = "black", lty = 1, lwd = 1))

draw(hm, padding = unit(c(15, 35, 0, 0), "mm"))

PD-L1- tumors
qgen.combined <- data.frame(matrix(nrow = length(gs.wgcna), ncol = 0))

# Combine qgen results from 3 indications
for(cancer in levels(eset$CANCER_TYPE)){
  # Read indication qgen analysis results
  qgen.results <- readRDS(paste0("data/qgen.ORR.PDL1neg.", cancer, ".WGCNA.rds"))
  qgen.tab <- qsTable(qgen.results, number=length(qgen.results$pathways))
  
  # Update result table colnames and cbind to combined result file
  colnames(qgen.tab) <- paste0(cancer, ".", colnames(qgen.tab))
  
  # Define statistical significance as a categorical variable (FDR, nominal, ns)
  qgen.tab[[paste0(cancer, ".sig")]] <- factor(ifelse(qgen.tab[[paste0(cancer,".FDR")]] < 0.05, "fdr",
                                                              ifelse(qgen.tab[[paste0(cancer, ".p.Value")]] < 0.05, "nom", "")))
  
  qgen.combined <- cbind(qgen.combined,
                         qgen.tab[order(qgen.tab[[paste0(cancer, ".pathway.name")]]),])
}

rownames(qgen.combined) <- qgen.combined[[paste0(levels(eset$CANCER_TYPE)[1], ".pathway.name")]]

qgen.combined.logFC <- qgen.combined[combined.module.order, grep('log.fold.change', names(qgen.combined))]
names(qgen.combined.logFC) <- gsub('.log.fold.change', '', names(qgen.combined.logFC))

data <- t(as.matrix(qgen.combined.logFC))
rownames(data) <- c("mUC", "NSCLC", "RCC")

data.sig <- t(as.matrix(qgen.combined[combined.module.order, grep(".sig", names(qgen.combined))]))
data.sig <- data.sig[, colnames(data)]

row.labels <- rownames(data)
col.labels <- colnames(data)
col.annotations <- as.character(gs.wgcna.annotations[col.labels,]$Annotation)

df <- data.frame(module = as.character(col.labels))

ha <- HeatmapAnnotation(df = df,
                        colname = anno_text(col.annotations, 
                                            rot = 60, 
                                            just = "right", 
                                            offset = unit(1, "npc") - unit(2, "mm"),
                                            gp = gpar(fontsize = 13)),
                       col = list(module = col.named),
                       show_annotation_name = TRUE,
                       annotation_height = unit.c(unit(5, "mm"), 
                                                  max_text_width(col.annotations) + unit(1, "mm")),
                       which = "col", 
                       show_legend = FALSE)

hm <- Heatmap(data, 
        name = "Enrichment",
        cluster_columns = F, 
        cluster_rows = F,
        show_row_dend = F, 
        show_column_dend = F,
        show_row_names = T, 
        show_column_names = F,
        row_names_gp = gpar(fontsize = 12),
        col = colorRamp2(breaks = palette.breaks, 
                         colors = color.palette, 
                         space = "RGB"),
        cell_fun = function(j, i, x, y, w, h, col, mat = data.sig) {
          if(mat[i,j] == "fdr"){
            grid.points(x, y, pch = 20, gp = gpar(col = "red"))
          }else if(mat[i,j] == "nom"){
            grid.points(x, y, pch = 20, gp = gpar(col = "black"))
          }
        },
        bottom_annotation = ha,
        rect_gp = gpar(col = "black", lty = 1, lwd = 1))

draw(hm, padding = unit(c(15, 35, 0, 0), "mm"))


Figure 4 - CDK4/6 inhibition associates with increased response to PD-L1 blockade

Figure 4A - Differentially expressed genes between responders and non-responders

model.formula <- ~ ORR.CRPR.SDPD +
                         CANCER_TYPE +
                         Category.PDL1

design <- model.matrix(model.formula, 
                       data = pData(eset))
colnames(design) <- c("Intercept", "CRPR", "Lung", "RCC", "ICTC123")
fit <- lmFit(exprs(eset), design)
fit2 <- eBayes(fit)

tab <- topTable(fit2, adjust = "BH", coef = "CRPR", n = Inf)
tab.annotated <- merge(fData(eset), 
                       tab, by = "row.names", 
                       all.x = F, 
                       all.y = T)

EnhancedVolcano(tab.annotated,
      lab = as.character(tab.annotated$symbol),
      pCutoff = 0.1,
      FCcutoff = 0.5,
      title = "CR/PR vs. SD/PD",
      xlim = c(-1.5, 1.5),
      ylim = c(0, 1.5),
      x = 'logFC',
      y = 'adj.P.Val',
      drawConnectors = FALSE,
      widthConnectors = 0.5)

Figure 4B - Prevalence of CDKN2A copy-number losses in the Foundation Medicine database by indication

ggplot(fmi.dat.by.group, aes(x = group_ontology, 
                    y = pct.CDKN2A.CN01)) +
  geom_col(aes(fill = fmi.dat.by.group$color)) +
  geom_text(aes(label = paste0(CDKN2A.CN01, "/", group.sample.count, " (", pct.CDKN2A.CN01, "%)")), 
            size = 2.5,
            vjust = 0.5,
            hjust = -0.1) +
  coord_flip() +
  scale_color_manual(values = c("#AAAAAA", "#00BA38", "#619CFF",  "#F8766D")) +
  scale_fill_manual(values = c("#AAAAAA", "#00BA38", "#619CFF",  "#F8766D")) +
  ylab("% CDKN2A CN0-1") +
  ylim(0,70) +
  theme_bw() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 10),
        axis.text.y = element_text(size = 10),
        legend.position = "none")

Figure 4C - Response status distribution by CDKN2A deletion status

for(cancer.type in levels(eset$CANCER_TYPE)){
  pdat <- subset(pData(eset), CANCER_TYPE == cancer.type & !is.na(CDKN2A_CNA_2CAT))
  
  print(
    ggplot(data = pdat, aes(x = factor(CDKN2A_CNA_2CAT), fill = factor(ORR.CRPR.SDPD, levels = c("SDPD", "CRPR")))) +
        geom_bar(position = "fill", 
                 colour = "black") +
        geom_text(aes(label = ..count..), 
                  stat = 'count', 
                  position = position_fill(vjust = 0.5)) +
        xlab("") +
        ylab("Patient Percentage") +
        scale_y_continuous(labels = percent_format()) +
        scale_fill_manual(name = "ORR", 
                          values = palette.orr.2cat,
                          labels = c("SD/PD", "CR/PR")) +
        ggtitle(cancer.type) +
        theme_bw() +
        theme(panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              panel.border = element_blank(),
              axis.ticks.x = element_blank(),
              legend.key.size = unit(15, "points"),
              axis.text.x = element_text(angle = 45,
                                         hjust = 1,
                                         vjust = 1.2))
  )
}

Figure 4D - Survival outcome by transcriptional expression of CDKN2A or CDK6

genes <- c("CDKN2A", "CDK6")

for(gene in c(genes)){
  for(cancer in levels(eset$CANCER_TYPE)){
    outcome <- ifelse(cancer %in% c("mUC", "NSCLC"), "OS", "PFS")
    eset.sub <- eset[,eset$CANCER_TYPE == cancer]
    pdat <- pData(eset.sub)
  
    pdat[[gene]] <- as.numeric(exprs(eset.sub[rownames(subset(fData(eset.sub), symbol == gene)),]))
    
    gene.median <- median(pdat[[gene]], rm.na = TRUE)
    pdat[[paste0(gene, "_50")]] <- factor(ifelse(pdat[[gene]] < gene.median, paste0(gene, "_LOW"), paste0(gene, "_HIGH")), levels = c(paste0(gene, "_LOW"), paste0(gene, "_HIGH")))
  
    fittedSurv <- survfit(Surv(time = pdat[[paste0(outcome, "_MONTHS")]], event = pdat[[paste0(outcome, "_CNSR_REV")]]) ~ pdat[[paste0(gene, "_50")]], data = pdat)
    diffSurv <- survdiff(Surv(time = pdat[[paste0(outcome, "_MONTHS")]], event = pdat[[paste0(outcome, "_CNSR_REV")]]) ~ pdat[[paste0(gene, "_50")]], data = pdat)
    coxfit <- coxph(Surv(time = pdat[[paste0(outcome, "_MONTHS")]], event = pdat[[paste0(outcome, "_CNSR_REV")]]) ~ pdat[[paste0(gene, "_50")]], data = pdat)
    
    plotSurvival(survFit = fittedSurv,
                  survDiff = diffSurv,
                  diff.factor = pdat[[paste0(gene, "_50")]],
                  main =  paste0(cancer, " - ", gene),
                  cols = c("blue", "red"),
                  ltypes = c(1, 1),
                  pval = "coxph",
                  coxphData = coxfit,
                  plotMedian = TRUE,
                  plot.nrisk = TRUE,
                  xLab = paste0(outcome, " (months)"),
                  lwd = 3,
                  cexMedian = 1,
                  cexLegend = 0.9,
                  cex.lab = 1.5)
  }
}


Session Info

sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] grid      parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ROCR_1.0-11           survminer_0.4.9       ggpubr_0.4.0         
##  [4] survival_3.2-10       EnhancedVolcano_1.8.0 ggrepel_0.9.1        
##  [7] circlize_0.4.12       RColorBrewer_1.1-2    ComplexHeatmap_2.6.2 
## [10] qusage_2.24.0         limma_3.46.0          ggbiplot_0.55        
## [13] plyr_1.8.6            gplots_3.1.1          scales_1.1.1         
## [16] forcats_0.5.1         stringr_1.4.0         dplyr_1.0.5          
## [19] purrr_0.3.4           readr_1.4.0           tidyr_1.1.3          
## [22] tibble_3.1.0          ggplot2_3.3.3         tidyverse_1.3.0      
## [25] Biobase_2.50.0        BiocGenerics_0.36.0  
## 
## loaded via a namespace (and not attached):
##   [1] ggbeeswarm_0.6.0    colorspace_2.0-0    ggsignif_0.6.1     
##   [4] rjson_0.2.20        ellipsis_0.3.1      rio_0.5.26         
##   [7] estimability_1.3    GlobalOptions_0.1.2 fs_1.5.0           
##  [10] clue_0.3-58         rstudioapi_0.13     farver_2.1.0       
##  [13] fansi_0.4.2         mvtnorm_1.1-1       lubridate_1.7.10   
##  [16] xml2_1.3.2          splines_4.0.4       extrafont_0.17     
##  [19] knitr_1.31          jsonlite_1.7.2      Cairo_1.5-12.2     
##  [22] km.ci_0.5-2         broom_0.7.5         Rttf2pt1_1.3.8     
##  [25] cluster_2.1.1       dbplyr_2.1.0        png_0.1-7          
##  [28] compiler_4.0.4      httr_1.4.2          emmeans_1.5.5-1    
##  [31] backports_1.2.1     assertthat_0.2.1    Matrix_1.3-2       
##  [34] cli_2.3.1           htmltools_0.5.1.1   tools_4.0.4        
##  [37] gtable_0.3.0        glue_1.4.2          maps_3.3.0         
##  [40] Rcpp_1.0.6          carData_3.0-4       cellranger_1.1.0   
##  [43] vctrs_0.3.6         ggalt_0.4.0         nlme_3.1-152       
##  [46] extrafontdb_1.0     xfun_0.22           openxlsx_4.2.3     
##  [49] rvest_1.0.0         lifecycle_1.0.0     gtools_3.8.2       
##  [52] rstatix_0.7.0       zoo_1.8-9           MASS_7.3-53.1      
##  [55] hms_1.0.0           proj4_1.0-10.1      curl_4.3           
##  [58] yaml_2.2.1          gridExtra_2.3       KMsurv_0.1-5       
##  [61] ggrastr_0.2.3       stringi_1.5.3       highr_0.8          
##  [64] S4Vectors_0.28.1    caTools_1.18.1      zip_2.1.1          
##  [67] shape_1.4.5         rlang_0.4.10        pkgconfig_2.0.3    
##  [70] matrixStats_0.58.0  bitops_1.0-6        evaluate_0.14      
##  [73] lattice_0.20-41     labeling_0.4.2      tidyselect_1.1.0   
##  [76] magrittr_2.0.1      R6_2.5.0            IRanges_2.24.1     
##  [79] fftw_1.0-6          generics_0.1.0      DBI_1.1.1          
##  [82] foreign_0.8-81      pillar_1.5.1        haven_2.3.1        
##  [85] withr_2.4.1         abind_1.4-5         ash_1.0-15         
##  [88] modelr_0.1.8        crayon_1.4.1        car_3.0-10         
##  [91] survMisc_0.5.5      KernSmooth_2.23-18  utf8_1.2.1         
##  [94] rmarkdown_2.7       GetoptLong_1.0.5    readxl_1.3.1       
##  [97] data.table_1.14.0   reprex_1.0.0        digest_0.6.27      
## [100] xtable_1.8-4        stats4_4.0.4        munsell_0.5.0      
## [103] beeswarm_0.3.1      vipor_0.4.5