# Structure of the lecture ---------------------------------------------------- # simulate and look at data # First we calculate risks and rates for one AE # Next, we calculate it for all AEs # Packages ---------------------------------------------------------------- req.packcages <- c("labelled", "data.table", "dplyr", "tidyr", "ggplot2", "scales", "gridExtra", "grid", "gtable", "survminer") for(pack in req.packcages) if(!(pack %in% rownames(installed.packages()))) install.packages(pack) require(labelled) # Manipulating Labelled Data require(data.table) # Enhanced data.frame require(dplyr) # dplyr: a grammar of data manipulation require(tidyr) # Easily Tidy Data with 'spread()' and 'gather()' Functions require(ggplot2) # Create Elegant Data Visualisations Using the Grammar of Graphics require(scales) # Generic plot scaling methods require(gridExtra) # To arrange multiple grid-based plots on a page, and draw tables require(grid) # A rewrite of the graphics layout capabilities, plus some support for interaction require(gtable) # Extract label info from plot require(survival) # Fit survival curves require(survminer) # Survival plot by ggplot2 # The package stdRate have been developed for this course #install.packages("../stdRate0.1.0.zip", repos = NULL) require(stdRat) # Calculation of risk and rates for the ADaM dataset ADAE # Invetigate the data --------------------------------------------------------- adsl %>% ungroup # How many studies are included: unique(adsl$studyid) # which countries unique(adsl$country) # How many subjects included by contry by site table(adsl$studyid, adsl$region, adsl$country) # Since we will mainly be looking at the safety analysis set we can filter accordingly adsl_f <- adsl %>% filter(saffl == "Y") %>% mutate(trtdurd = as.numeric(eotdt-randdt), intrdurd = as.numeric(eosdt-randdt)) #How many included in Safety analysis set by site nested under studyid, region, and country, adsl_f %>% group_by(studyid, region, country, siteid) %>% summarise(N = n()) # Drop out pattern --------------------------------------------------------- # Establish survival curves fit <- survfit(Surv(trtdurd, eotstt != "COMPLETED") ~ trta, data = adsl_f) # Plot the curves and show p-val for equal "survival" ggsurvplot(fit, data = adsl_f, conf.int = TRUE, pval = TRUE, fun = "pct", risk.table = TRUE, size = 1, linetype = "strata", palette = c("#E7B800", "#2E9FDF"), legend.title = "Treatment", legend.labs = rev(c("New drug", "Old drug"))) ##########################################################################- # Calculate advese event risk/rate -------------------------------------------- ##########################################################################- # Assignment. # We have the following situation: # Fits try playing with the following example: # study1 new 14 381 14 164.2491 # study1 old 17 420 17 190.9049 # study2 new 9 251 9 105.3005 # study2 old 9 247 9 109.9466 # calculate the risk and rate for study 1: # General situation: # Count the number of subjects and person years of exposure in the # safety analysis set by studyid and actual treatment: n_sas_data <- adsl %>% filter(saffl == "Y") %>% group_by(studyid, trta) %>% summarise(n_trt = n(), pye_trt = sum(as.numeric(eotdt-randdt))/365.25) # Calculate different summary statistics for adverse events, # Since we divide by the number of subjects in the safety analysis set # these numbers are joined aedecod_summary <- adae %>% filter(saffl == "Y" & trtemfl == "Y") %>% left_join(n_sas_data, by = c("studyid", "trta")) %>% group_by(aebodsys, aehlgt, aedecod, studyid, trta) %>% summarize(N = n_distinct(usubjid), P = N/n_trt[1], P_var = (P*(1-P)/n_trt[1]), P_low = (P-qnorm(0.975)*sqrt(P_var))*100, P_high = (P+qnorm(0.975)*sqrt(P_var))*100, P = P * 100, E = n(), R = E/pye_trt[1], R_var = E/pye_trt[1]^2, R_low = (R-qnorm(0.975)*sqrt(R_var))*100, R_high = (R+qnorm(0.975)*sqrt(R_var))*100, R = R * 100) # Using package stdRate --------------------------------------------------- # The function freqAEstd can be used to calculate # Assignment 2 Go through the examples in function freqAEstd # Try grouping by region # comparisons ------------------------------------------------------------- # Back to doing things by hand. # Calculate diffrent standard error to be able to calculate confidence intervals aedecod_se <- aedecod_summary %>% group_by(aebodsys, aehlgt, aedecod, studyid) %>% mutate(se_risk_diff = sqrt(sum(P_var)), se_rate_diff = sqrt(sum(R_var)), se_risk_ratio = sqrt(sum(1/N)), se_rate_ratio = sqrt(sum(1/E))) # Function for ratio ratio_calc <- function(rate, names, order){ if(!all(names %in% order)){ return(NaN) }else{ names(rate) <- names rate[order[1]]/rate[order[2]] } } # Function for difference diff_calc <- function(risk, names, order){ if(length(risk) == 1){ missing <- setdiff(order, names) risk <- c(risk, 0) names <- c(names, missing) } names(risk) <- names risk[order[1]]-risk[order[2]] } # Calculate the risk and rate difference comparison <- aedecod_se %>% group_by(aebodsys, aehlgt, aedecod, studyid) %>% summarise(# Total number of affected subjects N = sum(N), # Total number of events E = sum(E), # Risk difference risk_diff = diff_calc(P, names=trta, order = c("new drug", "old drug")), risk_diff_low = (risk_diff/100-qnorm(0.975)*se_risk_diff[1])*100, risk_diff_high = (risk_diff/100+qnorm(0.975)*se_risk_diff[1])*100, risk_diff_pval = 2*(1-pnorm(abs(risk_diff)/100/se_risk_diff[1])), # Risk ratio risk_ratio = ratio_calc(P, names=trta, order = c("new drug", "old drug")), risk_ratio_low = exp(log(risk_ratio)-qnorm(0.975)*se_risk_ratio[1]), risk_ratio_high = exp(log(risk_ratio)+qnorm(0.975)*se_risk_ratio[1]), risk_ratio_pval = 2*(1-pnorm(abs(log(risk_ratio))/se_risk_ratio[1])), #risk_harm = ifelse(rate_ratio>1, "new drug", "old drug"), # Rate diff rate_diff = diff_calc(R, names=trta, order = c("new drug", "old drug")), rate_diff_low = (rate_diff/100-qnorm(0.975)*se_rate_diff[1])*100, rate_diff_high = (rate_diff/100+qnorm(0.975)*se_rate_diff[1])*100, rate_diff_pval = 2*(1-pnorm(abs(rate_diff)/100/se_rate_diff[1])), # Rate ratio rate_ratio = ratio_calc(R, names=trta, order = c("new drug", "old drug")), rate_ratio_low = exp(log(rate_ratio)-qnorm(0.975)*se_rate_ratio[1]), rate_ratio_high = exp(log(rate_ratio)+qnorm(0.975)*se_rate_ratio[1]), rate_ratio_pval = 2*(1-pnorm(abs(log(rate_ratio))/se_rate_ratio[1])), rate_ratio_pval_adj = p.adjust(rate_ratio_pval, method = "holm"), rate_harm = ifelse(rate_ratio>1, "new drug", "old drug")) # Using package stdRate --------------------------------------------------- # The function freqAEstdComp can be used to calculate # Assignment3 Go: through the examples in function freqAEstdComp # Volcano plot ------------------------------------------------------------ # Create the volcano plot p <- ggplot(comparison, aes(x=rate_ratio, y=rate_ratio_pval, size = E, color=rate_harm)) + geom_point(alpha=0.2) p # there is something about the scaled reverselog_trans <- function(base = exp(1)) { force(base) trans <- function(x) -log(x, base) inv <- function(x) base^(-x) trans_new(paste0("reverselog-", format(base)), trans, inv, log_breaks(base = base), domain = c(1e-100, Inf)) } p <- p + scale_y_continuous(trans = reverselog_trans(10)) + scale_x_log10() p + geom_text(aes(label=ifelse(rate_ratio_pval_adj < 0.05, aedecod, ""), size = 10)) p <- p + geom_text(aes(label=ifelse(rate_ratio_pval_adj < 0.05, aedecod, ""), size = 10), vjust = "inward", hjust = "inward", show.legend = FALSE) # lets add some label colour p <- p + scale_color_manual(values=structure(c("#001965", "#808080"), names = c("new drug", "old drug"))) + scale_fill_manual( values=structure(c("#001965", "#808080"), names = c("new drug", "old drug")))+ xlab("Rate raio") + ylab("Adjusted p-value") + labs( size = "Number\nof events:" , color = "Harmfull for:", caption = "Line at 0.05 signifies statistical significance"#, ) p # Final code --------------------------------------------- ggplot(comparison, aes(x=rate_ratio, y=rate_ratio_pval, size = E, color=rate_harm)) + geom_hline(yintercept = 0.05) + geom_point(alpha=0.2) + scale_y_continuous(trans = reverselog_trans(10)) + scale_x_log10() + scale_size_continuous(range = c(0.5, 10)) + scale_color_manual(values=structure(c("#001965", "#808080"), names = c("new drug", "old drug"))) + scale_fill_manual(values=structure(c("#001965", "#808080"), names = c("new drug", "old drug")))+ xlab("Rate raio") + ylab("Adjusted p-value") + labs( size = "Number\nof events:" , color = "Harmfull for:", caption = "Line at 0.05 signifies statistical significance"#, #title = "Volcano plot with adjusted p-values" ) + theme_bw() + theme( text = element_text(size=12), plot.title = element_text(hjust = 0.5), legend.position = c(.95, .05), legend.justification = c("bottom"), panel.border = element_blank() ) + facet_grid(cols = vars(studyid)) + geom_text(aes(label=ifelse(rate_ratio_pval_adj < 0.05, aedecod, ""), size = 10), vjust = "inward", hjust = "inward", show.legend = FALSE) # Cochrane Mantel Haensel ------------------------------------------------- # Count the number of subjects and person years of exposure in the # safety analysis set by studyid and actual treatment: n_sas_data <- adsl %>% filter(saffl == "Y") %>% group_by(studyid, trta) %>% summarise(n_trt = n(), pye_trt = sum(as.numeric(eotdt-randdt))/365.25) # Calculate the mantel haensel correction for incidence and rate cmh_weight <- n_sas_data %>% group_by(studyid) %>% summarise(w_n = prod(n_trt)/sum(n_trt), w_pye = prod(pye_trt)/sum(pye_trt)) # First, the risk and rate is computed by trial summary_by_trial <- adae %>% filter(saffl == "Y" & trtemfl == "Y") %>% left_join(n_sas_data, by = c("studyid", "trta")) %>% group_by(aebodsys, aehlgt, aedecod, trta, studyid) %>% summarize(N = n_distinct(usubjid), P = N / n_trt[1], pv = P * (1 - P) / n_trt[1], n_trt = n_trt[1], E = n(), R = E / pye_trt[1], rv = E / pye_trt[1]^2, pye_trt = pye_trt[1]) # Next, the risk and rate are corrected aedecod_cmh <- summary_by_trial %>% group_by(aebodsys, aehlgt, aedecod, trta) %>% left_join(cmh_weight, by = c("studyid")) %>% summarise(N = sum(N), P_trt = sum(P*w_n), P = sum(P*w_n)/sum(w_n)*100, var_p = sum(pv*w_n^2)/sum(w_n)^2, P_low = (P/100-qnorm(0.975)*sqrt(var_p))*100, P_high = (P/100+qnorm(0.975)*sqrt(var_p))*100, E = sum(E), R_trt = sum(R*w_pye), # for calculation of RR_se R = sum(R*w_pye)/sum(w_pye)*100, var_r = sum(rv*w_pye^2)/sum(w_pye)^2, R_low = (R/100-qnorm(0.975)*sqrt(var_r))*100, R_high = (R/100+qnorm(0.975)*sqrt(var_r))*100) # Assignment -------------------------------------------------------------- # Go though the example in function freqAEcmh # Create a dataset with common AEs to_plot <- aedecod_cmh %>% group_by(aedecod) %>% filter(max(P) > 5) # Create a new factor level for aedecod corresponding to P for new drug levels <- to_plot %>% summarize(P=P[trta == "new drug"]) %>% arrange(P) %>% pull(aedecod) to_plot_ordered <- to_plot %>% ungroup() %>% mutate(aedecod = factor(aedecod, levels = levels)) # Plot the risk together with the fitted CI intervals risk_plot <- to_plot_ordered %>% ggplot(aes(color=trta)) + geom_linerange(aes(aedecod, ymin = P_low, ymax = P_high), position=position_dodge(w=.5), size=1.5) + geom_hline(yintercept=0, linetype=2) + coord_flip() + theme_minimal() + geom_point(aes(aedecod, P), position=position_dodge(w=.5), size =2)+ xlab(NULL) + labs( title = "Corrected Risk") + scale_color_manual(values=structure(c("#001965", "#808080"), names = c("new drug", "old drug"))) # Plot the rate together with the fitted CI intervals rate_plot <- to_plot_ordered %>% ggplot(aes(color=trta)) + geom_linerange(aes(aedecod, ymin = R_low, ymax = R_high), position=position_dodge(w=.5), size=1.5) + geom_hline(yintercept=0, linetype=2) + coord_flip() + theme_minimal() + geom_point(aes(aedecod, R), position=position_dodge(w=.5), size =2) + # scale_y_sqrt() + xlab(NULL) + labs( title = "Corrected Rate") + scale_color_manual(values=structure(c("#001965", "#808080"), names = c("new drug", "old drug"))) # To calculate the numerator for the variance of Rate and Risk Ratio aedecod_cmh_rate_se <- summary_by_trial %>% group_by(aebodsys, aehlgt, aedecod, studyid) %>% left_join(cmh_weight, by = c("studyid")) %>% summarise(ov_risk = w_n[1]*(sum(N)/sum(n_trt)- prod(P)), ov_rate = w_pye[1]*sum(E)/sum(pye_trt)) %>% group_by(aebodsys, aehlgt, aedecod) %>% summarise(ov_risk = sum(ov_risk), ov_rate = sum(ov_rate)) # Calculate diffrent standard error to be able to calculate confidence intervals aedecod_cmh_se <- aedecod_cmh %>% group_by(aebodsys, aehlgt, aedecod) %>% left_join(aedecod_cmh_rate_se, by=c("aebodsys", "aehlgt", "aedecod")) %>% mutate(se_risk_diff = sqrt(sum(var_p)), se_rate_diff = sqrt(sum(var_r)), se_risk_ratio = sqrt(ov_risk/(prod(P_trt))), se_rate_ratio = sqrt(ov_rate/(prod(R_trt)))) # calculate the comparisons comparison_cmh <- aedecod_cmh_se %>% summarise(N = sum(N), E = sum(E), # Risk diff risk_diff = diff_calc(P, names=trta, order = c("new drug", "old drug")), risk_diff_low = (risk_diff/100-qnorm(0.975)*se_risk_diff[1])*100, risk_diff_high = (risk_diff/100+qnorm(0.975)*se_risk_diff[1])*100, risk_diff_pval = 2*(1-pnorm(abs(risk_diff)/100/se_risk_diff[1])), risk_diff_pval_adj = p.adjust(risk_diff_pval, method = "holm"), # Risk ratio risk_ratio = ratio_calc(P, names=trta, order = c("new drug", "old drug")), risk_ratio_low = risk_ratio*exp(-qnorm(0.975)*se_risk_ratio[1]), risk_ratio_high = risk_ratio*exp( qnorm(0.975)*se_risk_ratio[1]), risk_ratio_pval = 2*(1-pnorm(abs(log(risk_ratio))/se_risk_ratio[1])), risk_ratio_pval_adj = p.adjust(risk_ratio_pval, method = "holm"), risk_harm = ifelse(risk_diff>1, "new drug", "old drug"), # Rate diff rate_diff = diff_calc(R, names=trta, order = c("new drug", "old drug")), rate_diff_low = (rate_diff/100-qnorm(0.975)*se_rate_diff[1])*100, rate_diff_high = (rate_diff/100+qnorm(0.975)*se_rate_diff[1])*100, rate_diff_pval = 2*(1-pnorm(abs(rate_diff)/100/se_rate_diff[1])), rate_diff_pval_adj = p.adjust(rate_diff_pval, method = "holm"), # Rate ratio rate_ratio = ratio_calc(R, names=trta, order = c("new drug", "old drug")), rate_ratio_low = rate_ratio*exp(-qnorm(0.975)*se_rate_ratio[1]), rate_ratio_high = rate_ratio*exp( qnorm(0.975)*se_rate_ratio[1]), rate_ratio_pval = 2*(1-pnorm(abs(log(rate_ratio))/se_rate_ratio[1])), rate_ratio_pval_adj = p.adjust(rate_ratio_pval, method = "holm"), rate_harm = ifelse(rate_ratio>1, "new drug", "old drug")) # Create plot for rate ratioi rate_ratio_plot <- comparison_cmh %>% filter(aedecod %in% to_plot$aedecod) %>% mutate(aedecod = factor(aedecod, levels = levels)) %>% ggplot(aes(color=rate_harm)) + geom_linerange(aes(aedecod, ymin = rate_ratio_low, ymax = rate_ratio_high), position=position_dodge(w=.5), size=1.5) + geom_hline(yintercept=1, linetype=2) + coord_flip() + theme_minimal() + geom_point(aes(aedecod, rate_ratio), position=position_dodge(w=.5), size =2)+ labs( title = "Corrected Rate Ratio")+ xlab(NULL) + ylab("Rate ratio") + scale_y_log10()+ scale_color_manual(values=structure(c("#001965", "#808080"), names = c("new drug", "old drug"))) # Function for extracting labels g_legend<-function(a.gplot){ tmp <- ggplot_gtable(ggplot_build(a.gplot)) leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box") legend <- tmp$grobs[[leg]] return(legend)} # Get the legend from risk plot legend <- g_legend(risk_plot + theme(legend.position="bottom")) # Plot the three plot together grid.arrange(arrangeGrob(risk_plot + theme(legend.position="none"), rate_plot + theme(legend.position="none", axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()), rate_ratio_plot + theme(axis.title.y=element_blank(), legend.position="none", axis.text.y=element_blank(), axis.ticks.y=element_blank()), nrow=1, widths = c(0.5, 0.25, 0.25)), legend, nrow=2, heights=c(10, 1))