## percep.R ## 15 august 2013 library(languageR) library(ggplot2) library(Hmisc) library(lme4) ## colours myPalette <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3") ## load helper functions source('http://www.lel.ed.ac.uk/~jkirby/khmer/helper.R') ## load raw data kuu <- read.csv('http://www.lel.ed.ac.uk/~jkirby/khmer/kuu.csv', header=T) kaa <- read.csv('http://www.lel.ed.ac.uk/~jkirby/khmer/kaa.csv', header=T) ## set custom theme background theme_set(theme_bw()) ############################################### ## Experiment 1: /kruu/ ############################################### ################################## ## EDA: subject-specific responses ################################## kruuResp <- ftable(xtabs ( ~ stimulus + response + condition + subject, data = kuu)) kruuResps <- droplevels(subset(as.data.frame(kruuResp), response=='kruu')) kruuResps$subject <- factor(kruuResps$subject,levels=c('s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20')) kruuResps$condition <- factor(kruuResps$condition, levels=c('f0', 'asp', 'breathy')) quartz(,8,6) kruuResp.plot <- ggplot(kruuResps, aes(stimulus, Freq/10)) + facet_wrap(~subject) + geom_line(aes(group=condition, colour=condition)) + opts(axis.title.x = theme_text(vjust = -0.5, hjust=0.6, size = 11), axis.title.y = theme_text(angle = 90, vjust = 0.125, size = 11)) + ylim(0,1) + scale_x_discrete("f0 stimulus step") + scale_y_continuous("% /kruu/ response") kruuResp.plot <- kruuResp.plot + scale_colour_manual(values=myPalette) ## or scale_colour_grey() print(kruuResp.plot) rm(kruuResps, kruuResp) ##################### ## coding & centering ##################### kuu$stimulus <- as.integer(kuu$stimulus) kuu$response <- factor(kuu$response, levels=c('kuu', 'kruu')) ## re-order response to make y = kruu = 1 kuu$condition <- factor(kuu$condition, levels=c('f0', 'asp', 'breathy')) ## re-order condition for treatment coding with f0 as base level # contrasts(kuu$condition) <- contr.helmert(3) ## Helmert coding ## center predictors, b/c (stimulus = 0) doesn't make any sense here kuu$c.stimulus <- kuu$stimulus - mean(kuu$stimulus) kuu$c.condition <- as.numeric(kuu$condition) - mean(as.numeric(kuu$condition), na.rm=T) ## remove participants who responded seemingly at random kuu <- droplevels(kuu[ ! kuu$subject %in% c('s2', 's3', 's19'), ]) ## remove obviously spurious responses (RTs < 200ms) nrow(subset(kuu, RT <= 200)) kuu <- subset(kuu, RT > 200) ################ ## model fitting ################ ## cf. http://hlplab.wordpress.com/2011/06/25/more-on-random-slopes/ ## null model kuu.model.null <- glmer(response ~ 1 + (1 | subject), family=binomial(link="logit"), data=kuu) ## model with crossed random effects (by-participant slopes and intercepts for stimulus:condition interaction) kuu.model.full <- glmer(response ~ c.stimulus * condition + (1 + c.stimulus * condition | subject), family = binomial(link = "logit"), data = kuu ) ## model with by-participant intercepts only kuu.model.int <- glmer(response ~ c.stimulus * condition + (1 | subject), family = binomial(link = "logit"), data=kuu) anova(kuu.model.full, kuu.model.int) ## the difference in chisq is highly significant ## slope-only models can be dangerous for other reasons (Barr et al, under review) ## intercepts for trial ## model with crossed random effects for subject AND random intercepts for trial ## WARNING: takes ~20min to converge #kuu.model.fuller <- glmer(response~c.stimulus*condition+(1+c.stimulus*condition|subject)+(1|trial), family=binomial(link="logit"), data=kuu) #anova(kuu.model.full,kuu.model.fuller) ## models without random slopes for interaction term kuu.model.trial.int <- glmer(response ~ c.stimulus * condition + trial + (1 + c.stimulus + condition + trial | subject), family = binomial(link = "logit"), data=kuu) kuu.model.trial <- glmer(response ~ c.stimulus + condition + trial + (1 + c.stimulus + condition + trial | subject), family = binomial(link="logit"), data=kuu) anova(kuu.model.int, kuu.model.trial.int) ## adding trial doesn't seem to matter much ## cf. simpler models (all including by-participant random slopes and intercepts) kuu.model.less <- glmer( response ~ c.stimulus * condition + (1 + c.stimulus + condition | subject), family = binomial(link = "logit"), data = kuu ) kuu.model.lesser <- glmer( response ~ c.stimulus + condition + (1 + c.stimulus + condition | subject), family = binomial(link = "logit"), data = kuu ) anova(kuu.model.full, kuu.model.less) anova(kuu.model.less, kuu.model.lesser) anova(kuu.model.full, kuu.model.lesser) ## adding other predictors (e.g. log(RT), age, education) doesn't significantly improve fit kuu.model.edu <- glmer( response ~ c.stimulus + condition + edu + (1 + c.stimulus + condition | subject), family = binomial(link = "logit"), data = kuu ) kuu.model.sex <- glmer( response ~ c.stimulus + condition + sex + (1 + c.stimulus + condition | subject), family = binomial(link = "logit"), data = kuu ) kuu.model.age <- glmer( response ~ c.stimulus + condition + age + (1 + c.stimulus + condition | subject), family = binomial(link = "logit"), data = kuu ) ## more importantly, significance of main covariates (c.stimulus and condition) and estimates of effect size and variance are stable across models ## conclusion: interaction term unnecessary, but we do need to allow by-participant random slopes and intercepts for both step and condition. fm1 <- kuu.model.lesser ################## ## goodness of fit ################## ## transform fitted log odds ratios into probabilities by hand probs = 1/(1+exp(-fitted(fm1))) ## ...or with linkinv probs = binomial()$linkinv(fitted(fm1)) ## then calculate Somers D_{xy} and index of concordance somers2(probs, as.numeric(kuu$response)-1) ## observed proportions vs mean predicted probabilities plotlogistic.fit.fnc(fm1, data=kuu) ################ ## sanity checks ################ ## correlations/collinearity kappa(fm1, exact=T) ## are the distribution of the BLUPs for the subject intercepts normally distributed? shapiro.test(ranef(fm1)$subject[,1]) ## looks good (these can also be plotted as a histogram, etc) ## what to do about high correlation between condition:breathy and the intercept? ## try a model with uncorrelated random slopes kuu$conditionasp <- as.matrix(model.matrix(~condition, kuu))[,2] kuu$conditionbreathy <- as.matrix(model.matrix(~condition, kuu))[,3] kuu.uncorr <- glmer(response ~ c.stimulus + conditionasp + conditionbreathy + (1 | subject) + (0 + conditionasp + conditionbreathy | subject) + (0 + c.stimulus | subject), family = binomial(link = "logit"), data = kuu ) anova(fm1, kuu.uncorr) ## significant difference, but no changes in significance of effects, or effect sizes to speak of ## ditto index of concordance, etc ########################### ## fixed and random effects ########################### ## fixed effects fixedEffectsTable(fm1) ## extract variance and correlations of random-effects terms remat <- summary(fm1)@REmat[,c(2:7)] rownames(remat) <- c(1:nrow(remat)) reffs <- as.data.frame(remat) reffs <- reffs[,c(1, 3:6)] colnames(reffs) <- c('subject random effect', 'st. dev.', 'step', 'asp', 'breathy') reffs[,2] <- round(as.numeric(as.character(reffs[,2])), 3) latex(reffs,title="",file="",rowname=NULL,booktabs=TRUE) ## examine subject random effects ranef(fm1)$subject ranefs <- as.data.frame(ranef(fm1)$subject) ranefs <- ranefs[c(1,12,13,14,15,16,17,2,3,4,5,6,7,8,9,10,11),c(1:4)] ranefs[,1] <- round(ranefs[,1],digits=3) ranefs[,2] <- round(ranefs[,2],digits=3) ranefs[,3] <- round(ranefs[,3],digits=3) ranefs[,4] <- round(ranefs[,4],digits=3) latex(ranefs,file="",title="",table.env=FALSE,booktabs=TRUE) ################ ## visualization ################ ## predict() by hand (modified from http://glmm.wikidot.com/examples, esp owl data) fm1a <- glmer( response ~ stimulus + condition + (1 + stimulus + condition | subject), family = binomial(link = "logit"), data = kuu ) kuudat <- with(kuu, expand.grid(stimulus=c(1:7), condition=levels(condition), response = 0)) mm = model.matrix(terms(fm1a),kuudat) kuudat$response = mm %*% fixef(fm1a) kuudat$Presponse = plogis(mm %*% fixef(fm1a)) ## compute the per-point variances (XVX^T) pvar1 <- diag(mm %*% tcrossprod(vcov(fm1a),mm)) ## add the estimated variance of the random-effects term in the mixed-effects model tvar1 <- pvar1 + VarCorr(fm1a)$subject[1] kuudat <- data.frame(kuudat, plo = kuudat$response-2*sqrt(pvar1), phi = kuudat$response+2*sqrt(pvar1)) ## here we plot the confidence interval on the mean of an "average" subject (i.e. not incorporating between-subject variation) x1.plot<-qplot(stimulus, response, data=kuudat, colour=condition, linetype=condition, ylab="average logit(/kruu/)", xlab="stimulus step on f0 continuum", main="average log-odds of /kruu/ response") + geom_smooth(aes(ymin = plo, ymax = phi), data=kuudat, stat="identity", size=1) + geom_point(size=3) x1.plot<-x1.plot + theme(legend.key.width = unit(0.75, "cm")) + guides(colour = guide_legend(override.aes = list(shape = NA))) ## we can also plot the responses in probability space: x2.plot<-qplot(stimulus, Presponse, data=kuudat, colour=condition, linetype=condition, ylab="average Pr(/kruu/)", xlab="stimulus step on f0 continuum") + geom_smooth(aes(ymin = plogis(plo), ymax = plogis(phi)), data=kuudat, stat="identity", size=1) + geom_point(size=3) + theme(plot.title = element_text(hjust = 0.75, vjust = 2, size = 12), axis.title.x = element_text(vjust = -0.5, hjust=0.6, size = 11), axis.title.y = element_text(angle = 90, vjust = 0.125, size = 11)) + ylim(0,1) + scale_colour_manual(values=myPalette) + scale_x_continuous(breaks=1:7) + scale_linetype_manual(values=c(1, 2, 3)) ## with error bars instead of shading: x2.plot<-qplot(stimulus, Presponse, data=kuudat, colour=condition, linetype=condition, ylab="average Pr(/kruu/)", xlab="stimulus step on f0 continuum") + geom_line(size=1) + geom_errorbar(aes(ymin = plogis(plo), ymax = plogis(phi), width=0.2), data=kuudat, stat="identity", size=1) + geom_point(size=3) + theme(plot.title = element_text(hjust = 0.75, vjust = 2, size = 12), axis.title.x = element_text(vjust = -0.5, hjust=0.6, size = 11), axis.title.y = element_text(angle = 90, vjust = 0.125, size = 11)) + ylim(0,1) + scale_x_continuous(breaks=1:7) + scale_linetype_manual(values=c(1, 2, 3)) + scale_colour_grey(start=0,end=0.6) # + scale_colour_manual(values=myPalette) x2.plot<-x2.plot + theme(legend.key.width = unit(1, "cm")) + guides(colour = guide_legend(override.aes = list(shape = NA))) ## just in probability space, please quartz(,4.5,3) print(x2.plot) ############################################### ## Experiment 2: /kraa/ ############################################### ################################## ## EDA: subject-specific responses ################################## kraaResp <- ftable(xtabs ( ~ stimulus + response + condition + subject, data = kaa)) kraaResps <- droplevels(subset(as.data.frame(kraaResp), response=='kraa')) kraaResps$subject <- factor(kraaResps$subject,levels=c('s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20')) kraaResps$condition <- factor(kraaResps$condition, levels=c('f0', 'asp', 'breathy', 'F1')) kraaResp.plot <- ggplot(kraaResps, aes(stimulus, Freq/10)) + facet_wrap(~subject) + geom_line(aes(group=condition, colour=condition)) + scale_colour_manual(values=myPalette) + opts(axis.title.x = theme_text(vjust = -0.5, hjust=0.6, size = 11), axis.title.y = theme_text(angle = 90, vjust = 0.125, size = 11)) + ylim(0,1) + scale_x_discrete("F1 stimulus step") + scale_y_continuous("% /kraa/ response") quartz(,8,6) print(kraaResp.plot) rm(kraaResps, kraaResp) ##################### ## coding & centering ##################### kaa$stimulus <- as.integer(kaa$stimulus) kaa$condition <- factor(kaa$condition, levels=c('f0', 'asp', 'breathy', 'F1')) ## re-order condition so colours match up kaa$response <- factor(kaa$response, levels=c('kaa', 'kraa')) ## re-order response to make y = kraa = 1 # contrasts(kaa$condition) <- contr.helmert(3) ## Helmert coding kaa$c.stimulus <- kaa$stimulus - mean(kaa$stimulus) ## center stimulus, since (stimulus = 0) doesn't make any sense here ## remove participants who responded seemingly at random kaa <- droplevels(kaa[ ! kaa$subject %in% c('s12', 's13', 's18', 's19'), ]) ## remove responses w/RTs < 200ms nrow(subset(kaa, RT <= 200)) ## 42 responses removed kaa <- subset(kaa, RT > 200) ## re-order condition for proper treatment coding kaa$condition <- factor(kaa$condition, levels=c('F1', 'f0', 'asp', 'breathy')) ################ ## model fitting ################ ## null model kaa.model.null <- glmer(response ~ 1 + (1 | subject), family=binomial(link="logit"), data=kaa) ## full model with interaction term: 'iteration limit reached without convergence'/'function evaluation limit reached without convergence' #kaa.model.full <- glmer(response ~ c.stimulus * condition + (1 + c.stimulus * condition | subject), family = binomial(link = "logit"), data = kaa) ## varying-slope model with interaction term: 'singular convergence' #kaa.model.slopes <- glmer(response ~ c.stimulus * condition + (1 | subject) + (0+c.stimulus:condition|subject) + (0+c.stimulus|subject) + (0+condition|subject), family = binomial(link = "logit"), data = kaa) ## quasi-full models kaa.model.full <- glmer(response ~ c.stimulus * condition + (1 + c.stimulus + condition | subject), family = binomial(link="logit"), data = kaa) kaa.model.no.interact <- glmer(response ~ c.stimulus + condition + (1 + c.stimulus + condition | subject), family = binomial(link="logit"), data = kaa) kaa.model.intercept.only <- glmer(response ~ c.stimulus + condition + (1 | subject), family = binomial(link = "logit"), data = kaa) anova(kaa.model.full, kaa.model.no.interact) ## difference marginally significant at best anova(kaa.model.no.interact, kaa.model.intercept.only) ## ...but additional random effects structure appears justified ## what about intercepts for trial? kaa.model.trial <- glmer(response ~ c.stimulus + condition + trial + (1|subject) + (1|trial), family = binomial(link="logit"), data=kaa) kaa.model.trial.int <- glmer(response ~ c.stimulus * condition + trial + (1 | subject) + (1|trial), family = binomial(link = "logit"), data=kaa) anova(kaa.model.trial, kaa.model.trial.int) ## interaction probably not necessary anova(kaa.model.trial, kaa.model.no.interact) ## adding trial seriously decreases log-likelihood ## adding other predictors , like log(RT) and age, doesn't significantly improve fit either fm2 <- kaa.model.no.interact ################## ## goodness of fit ################## ## index of concordance probs = binomial()$linkinv(fitted(fm2)) somers2(probs, as.numeric(kaa$response)-1) ## observed proportions vs mean predicted probabilities plotlogistic.fit.fnc(fm2, data=kaa) ################ ## sanity checks ################ ## correlation/collinearity kappa(fm2, exact=T) ## are the distribution of the BLUPs for the subject intercepts normally distributed? shapiro.test(ranef(fm2)$subject[,1]) ## looks good (these can also be plotted as a histogram, etc) ## similar correlation issues as with /kuu/ data ## a model with uncorrelated slopes: kaa$conditionf0 <-as.matrix(model.matrix(~condition, kaa))[,2] kaa$conditionasp <-as.matrix(model.matrix(~condition, kaa))[,3] kaa$conditionbreathy<- as.matrix(model.matrix(~condition, kaa))[,4] kaa.uncorr <- glmer(response ~ c.stimulus + conditionf0 + conditionasp + conditionbreathy + (1 | subject) + (0 + conditionf0 + conditionasp + conditionbreathy | subject) + (0 + c.stimulus | subject), family = binomial(link = "logit"), data = kaa ) anova(fm2,kaa.uncorr) ## here the significance of conditionasp does change: in the uncorrelated slopes model, it is not significant. ## also some of the parameter estimates change ########################### ## fixed and random effects ########################### ## fixed effects fixedEffectsTable(fm2) ## extract variance and correlations of random-effects terms remat <- summary(fm2)@REmat[,c(2:8)] rownames(remat) <- c(1:nrow(remat)) reffs <- as.data.frame(remat) reffs <- reffs[,c(1, 3:7)] colnames(reffs) <- c('', 'st. dev.', 'step', 'f0', 'asp', 'breathy') reffs[,2] <- round(as.numeric(as.character(reffs[,2])), 3) latex(reffs,title="",file="",rowname=NULL,booktabs=TRUE) ## subject random effects ranef(fm2)$subject ranefs <- as.data.frame(ranef(fm2)$subject) ranefs <- ranefs[c(1,8,10,11,12,13,14,15,16,2,3,4,5,6,7,9),] ranefs[,1] <- round(ranefs[,1],digits=3) ranefs[,2] <- round(ranefs[,2],digits=3) ranefs[,3] <- round(ranefs[,3],digits=3) ranefs[,4] <- round(ranefs[,4],digits=3) ranefs[,5] <- round(ranefs[,5],digits=3) latex(ranefs,file="",title="",table.env=FALSE,booktabs=TRUE) rm(ranefs) ################ ## visualization ################ ## model for visualization kaa$condition <- factor(kaa$condition, levels=c('f0', 'asp', 'breathy', 'F1')) ## re-order condition so colours match up fm2a <- glmer( response ~ stimulus + condition + (1 + stimulus + condition | subject), family = binomial(link = "logit"), data = kaa ) ## predict() by hand kaadat <- with(kaa, expand.grid(stimulus=c(1:7), condition=levels(condition), response = 0)) mm = model.matrix(terms(fm2a),kaadat) kaadat$response = mm %*% fixef(fm2a) kaadat$Presponse = plogis(mm %*% fixef(fm2a)) ## compute the per-point variances (XVX^T) pvar1 <- diag(mm %*% tcrossprod(vcov(fm2a),mm)) ## add the estimated variance of the random-effects term in the mixed-effects model tvar1 <- pvar1 + VarCorr(fm2a)$subject[1] kaadat <- data.frame(kaadat, plo = kaadat$response-2*sqrt(pvar1), phi = kaadat$response+2*sqrt(pvar1)) ## compute the confidence interval on the mean of an "average" subject (i.e. not incorporating between-subject variation) x3.plot<-qplot(stimulus, response, data=kaadat, colour=condition, ylab="average logit(/krɑɑ/)", xlab="stimulus step on F1 continuum", main="average log-odds of /krɑɑ/ response") + geom_smooth(aes(ymin = plo, ymax = phi), data=kaadat, stat="identity", size=1) + geom_point(size=3) + scale_colour_manual(values=myPalette) ## probability space x4.plot<-qplot(stimulus, Presponse, data=kaadat, linetype=condition, colour=condition, ylab="average Pr(/krɑɑ/)", xlab="stimulus step on F1 continuum") + geom_smooth(aes(ymin = plogis(plo), ymax = plogis(phi)), data=kaadat, stat="identity", size=1) + geom_point(size=3) + scale_colour_manual(values=myPalette) + theme(axis.title.x = theme_text(vjust = -0.5, hjust=0.6, size = 11), axis.title.y = theme_text(angle = 90, vjust = 0.125, size = 11)) + ylim(0, 1) + scale_x_continuous(breaks=1:7) + scale_linetype_manual(values=c(1, 2, 3, 4)) ## with error bars instead of shading x4.plot<-qplot(stimulus, Presponse, data=kaadat, colour=condition, linetype=condition, ylab="average Pr(/krɑɑ/)", xlab="stimulus step on F1/F2 continuum") + geom_line(size=1) + geom_errorbar(aes(ymin = plogis(plo), ymax = plogis(phi), width=0.2), data=kaadat, stat="identity", size=1) + geom_point(size=3) + theme(plot.title = element_text(hjust = 0.75, vjust = 2, size = 12), axis.title.x = element_text(vjust = -0.5, hjust=0.6, size = 11), axis.title.y = element_text(angle = 90, vjust = 0.125, size = 11)) + ylim(0,1) + scale_x_continuous(breaks=1:7) + scale_linetype_manual(values=c(1, 2, 3, 4)) + scale_colour_grey(start=0,end=0.6) #+ scale_colour_manual(values=myPalette) x4.plot <- x4.plot + theme(legend.key.width = unit(1, "cm")) + guides(colour = guide_legend(override.aes = list(shape = NA))) ## just in probability space, please quartz(,4.5,3) print(x4.plot) ################################## ## Appendix: binned residual plots ## (Gelman & Hill, 2007) ################################## ## by hand, we can overplot and save space (needs a legend): #binnedplot(fitted(kuu.model.full), resid(kuu.model.full), nclass=30, xlab="Estimated Pr(kruu)") ## nclass = 30, w/about 100 items per bin #aa <- data.frame(binned.resids(subset(kuu, condition=='f0')$stimulus, resid(kuu.model.full), 30)$binned) #bb <- data.frame(binned.resids(subset(kuu, condition=='asp')$stimulus, resid(kuu.model.full), 30)$binned) #cc <- data.frame(binned.resids(subset(kuu, condition=='breathy')$stimulus, resid(kuu.model.full), 30)$binned) #plot(range(bb$xbar), range(bb$ybar, bb$X2se, -bb$X2se), main='Residuals vs. input', xlab='Stimulus step on f0 continuum', ylab='Average residual', type='n') #abline(0, 0, lty = 2) #lines(aa$xbar, aa$X2se) #lines(aa$xbar, -aa$X2se) #points(aa$xbar, aa$ybar, pch = 19) #lines(bb$xbar, bb$X2se, lty=2) #lines(bb$xbar, -bb$X2se, lty=2) #points(bb$xbar, bb$ybar, pch = 20) #lines(cc$xbar, cc$X2se, lty=3) #lines(cc$xbar, -cc$X2se, lty=3) #points(cc$xbar, cc$ybar, pch = 21) #par(mfcol=c(1,1))