Second part R course

Data for the second part

The aim of the (hypothetical) trial was to investigate the effect of three different treatments in terms of thickness and redness of the skin in psoriasis patients. There were 10 subjects per treatment arm and the effectiveness of the drug was measured in terms of redness and thickness of the skin. Thickness of the skin in the lesion was measured with a special device, whereas redness was evaluated by an investigator: a score ranging from 0 to 30. The data has a form of ADaM data ADXD with paramcd having two values: “Redness”, “Thickness” and aval contains the values of these parameters. ADY variable contains days and AVISIT contains visit days in a character format.

Import and plotting the data

library(readr)

## Warning: package 'readr' was built under R version 3.4.2

dat <- read_csv("C:/Users/dkxdk/OneDrive - Leo Pharma A S/stat/R course/dat_mmrm2.csv")

## Parsed with column specification:
## cols(
##   param = col_character(),
##   paramcd = col_character(),
##   aval = col_double(),
##   ADY = col_integer(),
##   avisit = col_character(),
##   subjid = col_integer(),
##   fasfl = col_character(),
##   trt01p = col_character()
## )

str(dat)

## Classes 'tbl_df', 'tbl' and 'data.frame':    360 obs. of  8 variables:
##  $ param  : chr  "Thickness" "Thickness" "Thickness" "Thickness" ...
##  $ paramcd: chr  "THICKNESS" "THICKNESS" "THICKNESS" "THICKNESS" ...
##  $ aval   : num  491 406 392 328 539 ...
##  $ ADY    : int  1 4 6 8 1 4 6 8 1 4 ...
##  $ avisit : chr  "Day 1" "Day 4" "Day 6" "Day 8" ...
##  $ subjid : int  21 21 21 21 22 22 22 22 23 23 ...
##  $ fasfl  : chr  "Y" "Y" "Y" "Y" ...
##  $ trt01p : chr  "drug 1%" "drug 1%" "drug 1%" "drug 1%" ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 8
##   .. ..$ param  : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ paramcd: list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ aval   : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ ADY    : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ avisit : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ subjid : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ fasfl  : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ trt01p : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr  "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"

Now we check the data structure and the variable formats.

str(dat)

## Classes 'tbl_df', 'tbl' and 'data.frame':    360 obs. of  8 variables:
##  $ param  : chr  "Thickness" "Thickness" "Thickness" "Thickness" ...
##  $ paramcd: chr  "THICKNESS" "THICKNESS" "THICKNESS" "THICKNESS" ...
##  $ aval   : num  491 406 392 328 539 ...
##  $ ADY    : int  1 4 6 8 1 4 6 8 1 4 ...
##  $ avisit : chr  "Day 1" "Day 4" "Day 6" "Day 8" ...
##  $ subjid : int  21 21 21 21 22 22 22 22 23 23 ...
##  $ fasfl  : chr  "Y" "Y" "Y" "Y" ...
##  $ trt01p : chr  "drug 1%" "drug 1%" "drug 1%" "drug 1%" ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 8
##   .. ..$ param  : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ paramcd: list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ aval   : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ ADY    : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ avisit : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ subjid : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ fasfl  : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ trt01p : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr  "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"

We are going to look at the redness of the skin. First we subset the data.

dat_red <- dat[dat$param == "Redness",]
head(dat_red)

## # A tibble: 6 x 8
##     param paramcd     aval   ADY avisit subjid fasfl  trt01p
##     <chr>   <chr>    <dbl> <int>  <chr>  <int> <chr>   <chr>
## 1 Redness REDNESS 17.91924     1  Day 1      1     Y drug 3%
## 2 Redness REDNESS 17.73705     2  Day 2      1     Y drug 3%
## 3 Redness REDNESS 15.75879     3  Day 3      1     Y drug 3%
## 4 Redness REDNESS 17.19799     4  Day 4      1     Y drug 3%
## 5 Redness REDNESS 15.46497     5  Day 5      1     Y drug 3%
## 6 Redness REDNESS 15.28378     6  Day 6      1     Y drug 3%

THen we plot redness scores versus days for each subject, colouring by treatment group :

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.4.4

dat_red$subjid<- as.factor(dat_red$subjid)
dat_red$trt01p <- as.factor(dat_red$trt01p)
ggplot(dat_red, aes(x=ADY, y=aval, group=subjid, colour=trt01p)) +
  geom_line()

It is also of interest to plot treatment group average time profiles:

require(plyr)

## Loading required package: plyr

## Warning: package 'plyr' was built under R version 3.4.4

dat_red$avisit <- as.factor(dat_red$avisit)
mns <- ddply(dat_red, ~ trt01p + avisit + ADY, summarize,
             redness = mean(aval))
ggplot(mns, aes(x=ADY, y=redness, group=trt01p, colour=trt01p)) +
  geom_point() + geom_line()

Plot data points with a fitted curve:

ggplot(dat_red, aes(x=ADY, y=aval, colour = trt01p, group = trt01p)) +
  geom_point(shape=1) +    # Use hollow circles
  geom_smooth(method = "lm", formula = y ~ splines::ns(x, 2), se = FALSE)

It seems like there is a difference in treatment groups. It is also clear that there is a between subjects variation.

Modelling of the data

First we fit data with a linear mixed model with one random subject effect by using lme4 package. The covariance matrix \(V\) has the following form:

\[ V_{y_{i_1}, y_{i_2}} = \begin{cases} 0 & \quad \text{if } subj_{i1} \neq subj_{i2} \text{ and } i_1 \neq i_2\\ \sigma_{subj}^2 & \quad \text{if } subj_{i1} = subj_{i2} \text{ and } i_1 \neq i_2 \\ \sigma_{subj}^2+ \sigma^2 & \quad \text{if } i_1 = i_2 \end{cases} \]

In this model two measurements on the subject are correlated, but equally correlated (no matter how far observations are taken). This type of model can be fit in R in different ways. Using the nlme or lme4 packages. With lme4:

library(lme4)

## Loading required package: Matrix

model1 <- lmer(aval ~ avisit*trt01p + (1 | subjid), data = dat_red)
summary(model1)

## Linear mixed model fit by REML ['lmerMod']
## Formula: aval ~ avisit * trt01p + (1 | subjid)
##    Data: dat_red
## 
## REML criterion at convergence: 853.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.22636 -0.64102 -0.03963  0.61653  2.47603 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subjid   (Intercept) 1.202    1.096   
##  Residual             1.884    1.373   
## Number of obs: 240, groups:  subjid, 30
## 
## Fixed effects:
##                           Estimate Std. Error t value
## (Intercept)               19.49493    0.55553   35.09
## avisitDay 2               -0.68020    0.61390   -1.11
## avisitDay 3               -1.72135    0.61390   -2.80
## avisitDay 4               -1.66418    0.61390   -2.71
## avisitDay 5               -2.12188    0.61390   -3.46
## avisitDay 6               -1.88220    0.61390   -3.07
## avisitDay 7               -1.62350    0.61390   -2.64
## avisitDay 8               -3.38599    0.61390   -5.52
## trt01pdrug 2%             -0.22719    0.78564   -0.29
## trt01pdrug 3%              0.03475    0.78564    0.04
## avisitDay 2:trt01pdrug 2% -1.24566    0.86819   -1.43
## avisitDay 3:trt01pdrug 2% -0.63570    0.86819   -0.73
## avisitDay 4:trt01pdrug 2% -1.99746    0.86819   -2.30
## avisitDay 5:trt01pdrug 2% -2.40608    0.86819   -2.77
## avisitDay 6:trt01pdrug 2% -2.46675    0.86819   -2.84
## avisitDay 7:trt01pdrug 2% -2.71482    0.86819   -3.13
## avisitDay 8:trt01pdrug 2% -4.32495    0.86819   -4.98
## avisitDay 2:trt01pdrug 3% -0.76171    0.86819   -0.88
## avisitDay 3:trt01pdrug 3% -1.17534    0.86819   -1.35
## avisitDay 4:trt01pdrug 3% -1.91081    0.86819   -2.20
## avisitDay 5:trt01pdrug 3% -3.14694    0.86819   -3.62
## avisitDay 6:trt01pdrug 3% -1.78482    0.86819   -2.06
## avisitDay 7:trt01pdrug 3% -3.05396    0.86819   -3.52
## avisitDay 8:trt01pdrug 3% -3.06099    0.86819   -3.53

## 
## Correlation matrix not shown by default, as p = 24 > 12.
## Use print(x, correlation=TRUE)  or
##   vcov(x)     if you need it

anova(model1)

## Analysis of Variance Table
##               Df Sum Sq Mean Sq F value
## avisit         7 647.02  92.431 49.0508
## trt01p         2  36.37  18.187  9.6512
## avisit:trt01p 14  85.17   6.083  3.2283

Now we will try to fit mixed model with repeated measurements:

\[redness \sim N(\mu, V)\] with

\[\mu_i = \mu + \alpha(treat_i) + \beta(day_i) + \gamma(treat_i, day_i)\] and covariance \(V\) having the following form:

\[ V_{i_1, i_2} = \begin{cases} 0 & \quad \text{if } subj_{i1} \neq subj_{i2} \text{ and } i_1 \neq i_2\\ \nu^2 + \tau^2*exp(\frac{-(day_{i_1} - day_{i_2})^2}{\rho^2}) & \quad \text{if } subj_{i1} = subj_{i2} \text{ and } i_1 \neq i_2 \\ \nu^2 + \tau^2 + \sigma^2 & \quad \text{if } i_1 = i_2 \end{cases} \]

In this model two observations very close to each other have covariance \(\nu^2 + \tau^2\), two observations very far have covariance \(\nu^2\)

This type of model can be fit with nlme package

library(nlme)

## Warning: package 'nlme' was built under R version 3.4.4

## 
## Attaching package: 'nlme'

## The following object is masked from 'package:lme4':
## 
##     lmList

?corClasses

## starting httpd help server ...

##  done

str(dat_red)

## Classes 'tbl_df', 'tbl' and 'data.frame':    240 obs. of  8 variables:
##  $ param  : chr  "Redness" "Redness" "Redness" "Redness" ...
##  $ paramcd: chr  "REDNESS" "REDNESS" "REDNESS" "REDNESS" ...
##  $ aval   : num  17.9 17.7 15.8 17.2 15.5 ...
##  $ ADY    : int  1 2 3 4 5 6 7 8 1 2 ...
##  $ avisit : Factor w/ 8 levels "Day 1","Day 2",..: 1 2 3 4 5 6 7 8 1 2 ...
##  $ subjid : Factor w/ 30 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 2 2 ...
##  $ fasfl  : chr  "Y" "Y" "Y" "Y" ...
##  $ trt01p : Factor w/ 3 levels "drug 1%","drug 2%",..: 3 3 3 3 3 3 3 3 3 3 ...

model2 <- lme(aval ~ avisit*trt01p,
              random= ~ 1|subjid,
              correlation=corGaus(form= ~ ADY|subjid, nugget=TRUE),
              data=dat_red)

model2

## Linear mixed-effects model fit by REML
##   Data: dat_red 
##   Log-restricted-likelihood: -399.1451
##   Fixed: aval ~ avisit * trt01p 
##               (Intercept)               avisitDay 2 
##               19.49493395               -0.68019970 
##               avisitDay 3               avisitDay 4 
##               -1.72135278               -1.66418045 
##               avisitDay 5               avisitDay 6 
##               -2.12187918               -1.88219555 
##               avisitDay 7               avisitDay 8 
##               -1.62350126               -3.38598872 
##             trt01pdrug 2%             trt01pdrug 3% 
##               -0.22718582                0.03474644 
## avisitDay 2:trt01pdrug 2% avisitDay 3:trt01pdrug 2% 
##               -1.24565963               -0.63569630 
## avisitDay 4:trt01pdrug 2% avisitDay 5:trt01pdrug 2% 
##               -1.99745680               -2.40608341 
## avisitDay 6:trt01pdrug 2% avisitDay 7:trt01pdrug 2% 
##               -2.46674762               -2.71482070 
## avisitDay 8:trt01pdrug 2% avisitDay 2:trt01pdrug 3% 
##               -4.32494765               -0.76171299 
## avisitDay 3:trt01pdrug 3% avisitDay 4:trt01pdrug 3% 
##               -1.17534468               -1.91081325 
## avisitDay 5:trt01pdrug 3% avisitDay 6:trt01pdrug 3% 
##               -3.14694029               -1.78482245 
## avisitDay 7:trt01pdrug 3% avisitDay 8:trt01pdrug 3% 
##               -3.05396441               -3.06099087 
## 
## Random effects:
##  Formula: ~1 | subjid
##         (Intercept) Residual
## StdDev:   0.8457847 1.554617
## 
## Correlation Structure: Gaussian spatial correlation
##  Formula: ~ADY | subjid 
##  Parameter estimate(s):
##     range    nugget 
## 2.1578670 0.2814681 
## Number of Observations: 240
## Number of Groups: 30

anova(model2)

##               numDF denDF  F-value p-value
## (Intercept)       1   189 5570.580  <.0001
## avisit            7   189   37.082  <.0001
## trt01p            2    27    9.660   7e-04
## avisit:trt01p    14   189    3.009   3e-04

#sighat <- extract.lme.cov(model2, data = sim)
#sighat2 <- extract.lme.cov2(model2, data = sim)

# model with unstructured covariance structure does not converge
# model3 <- lme(aval ~ avisit*trt01p,
#               random= ~ 1|subjid,
#               correlation=corSymm(),
#               data=dat_red)

Validation plots

plot(model2)

plot(model2, resid(., type="p") ~ fitted(.) | trt01p)

qqnorm(model2)

Differences of least squares means at Day 8

library(emmeans)

## Warning: package 'emmeans' was built under R version 3.4.4

emm <- emmeans(model2, pairwise ~ trt01p| avisit)
emm$contrasts

## avisit = Day 1:
##  contrast             estimate        SE df t.ratio p.value
##  drug 1% - drug 2%  0.22718582 0.7914779 27   0.287  0.9557
##  drug 1% - drug 3% -0.03474644 0.7914779 27  -0.044  0.9989
##  drug 2% - drug 3% -0.26193226 0.7914779 27  -0.331  0.9415
## 
## avisit = Day 2:
##  contrast             estimate        SE df t.ratio p.value
##  drug 1% - drug 2%  1.47284545 0.7914779 27   1.861  0.1695
##  drug 1% - drug 3%  0.72696655 0.7914779 27   0.918  0.6335
##  drug 2% - drug 3% -0.74587890 0.7914779 27  -0.942  0.6188
## 
## avisit = Day 3:
##  contrast             estimate        SE df t.ratio p.value
##  drug 1% - drug 2%  0.86288212 0.7914779 27   1.090  0.5282
##  drug 1% - drug 3%  1.14059825 0.7914779 27   1.441  0.3347
##  drug 2% - drug 3%  0.27771612 0.7914779 27   0.351  0.9345
## 
## avisit = Day 4:
##  contrast             estimate        SE df t.ratio p.value
##  drug 1% - drug 2%  2.22464262 0.7914779 27   2.811  0.0239
##  drug 1% - drug 3%  1.87606681 0.7914779 27   2.370  0.0630
##  drug 2% - drug 3% -0.34857581 0.7914779 27  -0.440  0.8990
## 
## avisit = Day 5:
##  contrast             estimate        SE df t.ratio p.value
##  drug 1% - drug 2%  2.63326923 0.7914779 27   3.327  0.0069
##  drug 1% - drug 3%  3.11219386 0.7914779 27   3.932  0.0015
##  drug 2% - drug 3%  0.47892462 0.7914779 27   0.605  0.8186
## 
## avisit = Day 6:
##  contrast             estimate        SE df t.ratio p.value
##  drug 1% - drug 2%  2.69393344 0.7914779 27   3.404  0.0057
##  drug 1% - drug 3%  1.75007601 0.7914779 27   2.211  0.0873
##  drug 2% - drug 3% -0.94385743 0.7914779 27  -1.193  0.4677
## 
## avisit = Day 7:
##  contrast             estimate        SE df t.ratio p.value
##  drug 1% - drug 2%  2.94200653 0.7914779 27   3.717  0.0026
##  drug 1% - drug 3%  3.01921797 0.7914779 27   3.815  0.0020
##  drug 2% - drug 3%  0.07721144 0.7914779 27   0.098  0.9948
## 
## avisit = Day 8:
##  contrast             estimate        SE df t.ratio p.value
##  drug 1% - drug 2%  4.55213347 0.7914779 27   5.751  <.0001
##  drug 1% - drug 3%  3.02624443 0.7914779 27   3.824  0.0020
##  drug 2% - drug 3% -1.52588904 0.7914779 27  -1.928  0.1503
## 
## P value adjustment: tukey method for comparing a family of 3 estimates

emm_options()

## $contrast
## $contrast$infer
## [1] FALSE  TRUE
## 
## 
## $disable.lmerTest
## [1] FALSE
## 
## $disable.pbkrtest
## [1] FALSE
## 
## $emmeans
## $emmeans$infer
## [1]  TRUE FALSE
## 
## 
## $estble.tol
## [1] 1e-08
## 
## $graphics.engine
## [1] "ggplot"
## 
## $lmer.df
## [1] "kenward-roger"
## 
## $lmerTest.limit
## [1] 3000
## 
## $msg.interaction
## [1] TRUE
## 
## $msg.nesting
## [1] TRUE
## 
## $pbkrtest.limit
## [1] 3000
## 
## $ref_grid
## $ref_grid$is.new.rg
## [1] TRUE
## 
## $ref_grid$infer
## [1] FALSE FALSE
## 
## 
## $save.ref_grid
## [1] TRUE
## 
## $simplify.names
## [1] TRUE

Second part R course

Alexandra Kuznetsova

19 nov 2018

Data for the second part

Import and plotting the data

Modelling of the data