## plots & analysis for kirby2014transitions (ISSP) ## last tested 11 may 2014 with 3.1.0 (2014-04-10) -- "Spring Dance" require(ggplot2) require(plyr) require(reshape2) require(lme4) require(grid) require(prob) require(lmerTest) ##overloads lmer ## helper function std.error <- function(x, na.rm=T) { sqrt(var(x, na.rm=na.rm)/length(x[complete.cases(x)])) } ## set Tufte theme theme_set(theme_bw()) tstyle = theme(legend.position="top", axis.title.x = element_text(vjust = -0.75, hjust=0.5, size = 14), axis.title.y = element_text(angle = 90, vjust = 0.125, size = 14), legend.text = element_text(size=14), legend.title = element_text(size=14), axis.text.x=element_text(size=14), axis.text.y=element_text(size=14), strip.text.x=element_text(size=14)) ## load raw data dur <- read.csv('issp_dur.csv', header=T) ## set of items that potentially *could* have a transitional element: i.e. things w/onset clusters ev <- droplevels(subset(dur, id=='ev')) ## remove a few outliers where t<0 ev <- subset(ev, t>0) ## length of initial cluster ev$clen <- ev$t-ev$m ## proportion of total length ev$cprop <- ev$clen/ev$t ## reordering ev$c1<-factor(ev$c1, levels=c('p','t','k','m','l')) ev$c2<-factor(ev$c2, levels= c('p','t','k','b','d','m','n','ŋ','l','r','s','h')) ev$c2.manner<-factor(ev$c2.manner, levels=c('vless stop','fricative','rhotic','liquid', 'nas','vcd stop','glottal')) ev$condition<-factor(ev$condition, levels=c('slow','fast')) ev$c1.place<-factor(ev$c1.place, levels=c('labial','alveolar','velar')) ev$c2.place<-factor(ev$c2.place, levels=c('labial','alveolar','velar','glottal')) ev$porder<-factor(ev$porder, levels=c('front-back','back-front')) ########################################### ## 3.1: Distribution of voiced transitions ########################################### ## Table 1 round(xtabs(~c1+c2+condition,data=subset(ev,v>0)) / xtabs(~c1+c2+condition,data=ev),2) ## by subject round(xtabs(~condition+subject,data=subset(ev,v>0)) / xtabs(~condition+subject,data=ev),2) ## models ## Class 3 rate.class3<-glmer(hasv~c1+c2+condition+(1|subject)+(1|item),data=subset(ev,huff=='Class 3'),family=binomial(link="logit")) ## Class 2 /n ŋ l/ rate.class2<-glmer(hasv~c1+c2+condition+(1|subject)+(1|item),data=subset(ev,huff=='Class 2'&c2%in%c('n','l','ŋ')),family=binomial(link="logit")) ###################################### ## 3.2: Duration of voiced transitions ###################################### ## plots clong <- melt(data=ev, id.var=c("subject", "condition", "hasv"), measure.vars=c("clen")) clen.mean <- ddply(clong, .(subject, condition, hasv), summarise, "mean" = mean(value)) clen.error <- ddply(clong, .(subject, condition, hasv), summarise, "sd" = sd(value), "se" = std.error(value), "ci" = std.error(value) * qt(0.95/2 + .5, nrow(clong)-1)) clen.sum <- na.omit(merge(clen.mean, clen.error, by=c("subject", "condition", "hasv"), all=T)) clen.sum$subject <- factor(clen.sum$subject, levels=c('s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10')) clen.sum$condition <- factor(clen.sum$condition, levels=c('slow', 'fast')) names(clen.sum)[3]<-"hasV" quartz(,7.6,5) ggplot(clen.sum, aes(x = condition, y = mean, fill = hasV)) + geom_bar(position = position_dodge(width=0.9), stat="identity") + geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci), position=position_dodge(width=0.9), width=0.75) + facet_wrap(~subject,nrow=2) + ylab("mean cluster length (ms)") + tstyle + scale_fill_grey(start=0.4,end=0.8) ## over subjects clong <- melt(data=ev, id.var=c("condition", "hasv"), measure.vars=c("clen")) clen.mean <- ddply(clong, .(condition, hasv), summarise, "mean" = mean(value)) clen.error <- ddply(clong, .(condition, hasv), summarise, "sd" = sd(value), "se" = std.error(value), "ci" = std.error(value) * qt(0.95/2 + .5, nrow(clong)-1)) clen.sum <- na.omit(merge(clen.mean, clen.error, by=c("condition", "hasv"), all=T)) clen.sum$condition <- factor(clen.sum$condition, levels=c('slow', 'fast')) names(clen.sum)[2]<-"hasV" ggplot(clen.sum, aes(x = condition, y = mean, fill = hasV)) + geom_bar(position = position_dodge(width=0.9), stat="identity") + geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci), position=position_dodge(width=0.9),width=0.75) + ylab("mean cluster length (ms)") + tstyle + scale_fill_grey(start=0.4, end=0.8) ## over subjects by class clong <- melt(data=ev, id.var=c("condition", "hasv", "huff"), measure.vars=c("clen")) clen.mean <- ddply(clong, .(condition, hasv, huff), summarise, "mean" = mean(value)) clen.error <- ddply(clong, .(condition, hasv, huff), summarise, "sd" = sd(value), "se" = std.error(value), "ci" = std.error(value) * qt(0.95/2 + .5, nrow(clong)-1)) clen.sum <- na.omit(merge(clen.mean, clen.error, by=c("condition", "hasv", "huff"), all=T)) clen.sum$condition <- factor(clen.sum$condition, levels=c('slow', 'fast')) names(clen.sum)[2]<-"hasV" ggplot(clen.sum, aes(x = condition, y = mean, fill = hasV)) + geom_bar(position = position_dodge(width=0.9), stat="identity") + geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci), position=position_dodge(width=0.9),width=0.75) + facet_wrap(~huff) + ylab("mean cluster length (ms)") + tstyle + scale_fill_grey(start=0.4, end=0.8) ## duration model m0 <- lmer(clen~hasv*condition+c1+c2+(1+condition|subject)+(1+condition|item), data=ev) ####################################################### ## 3.3: comparison with lexical vowels ####################################################### vcomp<-droplevels(subset(dur, t>0 & ((id=='ev' & hasv=='Y')|id=='mono'))) ## compute length of CC-sequence vcomp$clen<-0 vcomp[vcomp$id=='ev',]$clen <- vcomp[vcomp$id=='ev',]$t-vcomp[vcomp$id=='ev',]$m vcomp[vcomp$id=='mono',]$clen <- vcomp[vcomp$id=='mono',]$t levels(vcomp$id)<-c(levels(vcomp$id), 'CvN') vcomp[vcomp$id=='mono'&vcomp$c2.manner=='nas',]$id<-'CvN' vcomp$cc<-rep('OO') vcomp[(vcomp$c1.manner=='vless stop' & vcomp$c2.manner=='nas'),]$cc<-'OS' vcomp[(vcomp$c1.manner=='vless stop' & vcomp$c2.manner=='liquid'),]$cc<-'OS' vcomp[vcomp$c1.manner=='nas',]$cc<-'SO' vcomp[vcomp$c1.manner=='lateral',]$cc<-'SO' vcomp[(vcomp$c1.manner=='nas' & vcomp$c2.manner=='nas'),]$cc<-'SS' vcomp[(vcomp$c1.manner=='nas' & vcomp$c2.manner=='liquid'),]$cc<-'SS' vcomp<-droplevels(subset(vcomp,c2.manner!='rhotic')) oo<-droplevels(subset(vcomp,cc=='OO')) os<-droplevels(subset(vcomp,cc=='OS')) ## models ## obstruent-obstruent oo.model<-lmer(clen~id*condition+(1|subject)+(1|item),data=oo) ## obstruent-sonorant os.model<-lmer(clen~id*condition+(1|subject)+(1|item),data=os) ###################################### # 3.4: formant transitions of \exists ###################################### fmt<-read.csv('issp_vout.csv') fmt$F1<-as.numeric(as.character(fmt$F1)) fmt$F2<-as.numeric(as.character(fmt$F2)) fmt<-fmt[,c(1:8,12:14)] dmt<-dur[,c(1:5,19:33)] ## include hasv column to check that it's done what we want it to... fdur<-merge(fmt,dmt) fdur<-droplevels(subset(fdur,id=='ev')) ## now what does format transition look like by c1 and c2? ## averaging over subjects simps <- na.omit(droplevels(subset(fdur, huff!='Class 1' & c2.place!='glottal' & !(c2 %in% c('r','l','s')) & !(subject %in% c('s5','s6'))))) flong <- melt(data=simps, id.var=c("condition", "t", "c1.place", "c2.place"), measure.vars=c("F1", "F2")) flong$t<-as.factor(flong$t) f2.mean <- ddply(subset(flong,variable=='F2'), .(condition, t, c1.place, c2.place), summarise, "mean" = mean(value)) f2.error <- ddply(subset(flong,variable=='F2'), .(condition, t, c1.place, c2.place), summarise, "sd" = sd(value), "se" = std.error(value), "ci" = std.error(value) * qt(0.95/2 + .5, nrow(subset(flong,variable=='F2'))-1)) f2.sum <- na.omit(merge(f2.mean, f2.error, by=c("condition", "t", "c1.place", "c2.place"), all=T)) f2.sum$condition <- factor(f2.sum$condition, levels=c('slow', 'fast')) f2.plot.avg<-ggplot(f2.sum, aes(x=t, y=mean, colour=c1.place, linetype=condition, group=c1.place:condition)) + facet_wrap(~c2.place) f2.plot.avg + geom_point(aes(shape=c1.place)) + geom_line() + geom_errorbar(width=0.01, aes(condition=mean, ymin=mean-se, ymax=mean+se)) + xlab("position in v") + ylab("F2 (Hz)") + tstyle ## models ## does F2 at t=2 lie on the straight line between F2 at t=1 and F2 at t=3? t1 = droplevels(subset(fdur, t==1 & c2.place!='glottal')) t2 = droplevels(subset(fdur, t==2 & c2.place!='glottal')) t3 = droplevels(subset(fdur, t==3 & c2.place!='glottal')) t2$seg.1 <- t1$F2-t2$F2 t2$seg.2 <- t2$F2-t3$F2 ## remove crazy things qplot(x=F2,y=t2$seg.1,data=t2,geom="point",colour=c2.place) + facet_wrap(~c1.place) qplot(x=F2,y=t2$seg.2,data=t2,geom="point",colour=c2.place) + facet_wrap(~c1.place) summary(lmer(seg.1~1+(1|subject)+(1|item), data=subset(t2, c1.place==c2.place))) summary(lmer(seg.2~1+(1|subject)+(1|item), data=subset(t2, c1.place==c2.place))) ####################################################### ## 3.5: Voiceless transitions ####################################################### ptk<-droplevels(subset(ev, o/t < 0.4&o<125&c1%in%c('p','t','k')&!(condition=='fast'&c2.manner=='vless stop'&hasv=='Y'))) levels(ptk$c2.manner) <- c('/pt/', '/ps/', '/pr/', '/pl/', '/pn/', '/pd/', '/ph/') ptk$c2.manner <- factor(ptk$c2.manner, levels=c('/ph/', '/pl/', '/pn/', '/ps/', '/pt/', '/pd/', '/pr/')) ptk$ctype <- ptk$c2.manner levels(ptk$ctype)<-c(levels(ptk$ctype), '/pn/ (3)') ptk$ctype[ptk$ctype=='/pn/'&ptk$huff=='Class 3']<-'/pn/ (3)' levels(ptk$ctype)<-c(levels(ptk$ctype), '/pn/ (2)') ptk$ctype[ptk$ctype=='/pn/']<-'/pn/ (2)' ptk<-droplevels(ptk) ptk$ctype <- factor(ptk$ctype, levels=c('/ph/', '/pl/', '/pn/ (2)', '/ps/', '/pt/', '/pn/ (3)', '/pd/', '/pr/')) ptk$dgroup <- rep('short') ptk[ptk$ctype %in% c('/ph/', '/pl/', '/pn/ (2)'),]$dgroup <- rep('long') ptk$dgroup <- as.factor(ptk$dgroup) ptk$cgroup <- rep('other') ptk[ptk$ctype=='/ph/',]$cgroup <- rep('ph') ptk[ptk$ctype=='/pr/',]$cgroup <- rep('pr') ptk$cgroup<-factor(ptk$cgroup,levels=c('ph','other','pr')) ## Figure 4 qplot(c2.manner,o,colour=huff,fill=huff,data=ptk,geom='boxplot') + facet_wrap(~condition, scales="free_x") + xlab("cluster type") + ylab("C1 release duration (ms)") + theme_bw() + theme(legend.position="top",panel.margin=unit(1,"lines")) + scale_colour_grey(start=0.5,end=0.2) + scale_fill_grey(start=0.1,end=1) + labs(colour="Huffman class", fill="Huffman class") ## models ## backwards difference coding my.backward.diff<-matrix(c(-7/8,1/8,1/8,1/8,1/8,1/8,1/8,1/8, -6/8,-6/8,2/8,2/8,2/8,2/8,2/8,2/8, -5/8,-5/8,-5/8,3/8,3/8,3/8,3/8,3/8, -4/8,-4/8,-4/8,-4/8,4/8,4/8,4/8,4/8, -3/8,-3/8,-3/8,-3/8,-3/8,5/8,5/8,5/8, -2/8,-2/8,-2/8,-2/8,-2/8,-2/8,6/8,6/8, -1/8,-1/8,-1/8,-1/8,-1/8,-1/8,-1/8,7/8), ncol=7) contrasts(ptk$ctype)<-my.backward.diff ## ctype ordered so coefficient comparison matches order in Figure 4 dslow<-lmer(o~hasv+dgroup+(1|subject)+(1|item),data=subset(ptk,condition=='slow')) dfast<-lmer(o~hasv+dgroup+(1|subject)+(1|item),data=subset(ptk,condition=='fast')) ####################################################### ## 3.6: Cluster length as proportion of syllable ####################################################### ## Figure 5 qplot(c2.manner,clen,colour=huff,fill=huff,data=ptk,geom='boxplot') + facet_wrap(~condition, scales="free_x") + xlab("cluster type") + ylab("CC cluster duration (ms)") + theme_bw() + theme(legend.position="top",panel.margin=unit(1,"lines")) + scale_colour_grey(start=0.5,end=0.2) + scale_fill_grey(start=0.1,end=1) + labs(colour="Huffman class", fill="Huffman class") ## cluster length ev$cgroup<-rep('/pt/') ev[ev$c1%in%c('p','t','k')&ev$c2%in%c('b','d'),]$cgroup<-'/pd/' ev[ev$c1%in%c('p','t','k')&ev$c2%in%c('n','ŋ','l'),]$cgroup<-'/pn/' ev[ev$c1%in%c('p','t','k')&ev$c2%in%c('n','ŋ','l'),]$cgroup<-'/pn/' ev[ev$c1%in%c('p','t','k')&ev$c2%in%c('h'),]$cgroup<-'/ph/' ev[ev$c1%in%c('p','t','k')&ev$c2%in%c('r'),]$cgroup<-'/pr/' ev[ev$c1%in%c('p','t','k')&ev$c2%in%c('l'),]$cgroup<-'/pl/' ev[ev$c1%in%c('l')&ev$c2%in%c('p'),]$cgroup<-'/lp/' ev[ev$c1%in%c('m')&ev$c2%in%c('p','t'),]$cgroup<-'/mt/' ev[ev$c1%in%c('m')&ev$c2%in%c('b','d'),]$cgroup<-'/md/' ev[ev$c1%in%c('l')&ev$c2%in%c('b'),]$cgroup<-'/lb/' ev[ev$c1%in%c('m')&ev$c2%in%c('n','l'),]$cgroup<-'/mn/' ev[ev$c1%in%c('m')&ev$c2%in%c('r'),]$cgroup<-'/mr/' ev[ev$c1%in%c('m')&ev$c2%in%c('s'),]$cgroup<-'/ms/' ev[ev$c1%in%c('m','l')&ev$c2%in%c('h'),]$cgroup<-'/mh/' ev$cgroup<-as.factor(ev$cgroup) ev$cgroup<- factor(ev$cgroup, levels=c()) ## difference within class 1 class1<-droplevels(subset(ptk, huff=='Class 1')) contrasts(class1$c2.manner)<-contr.helmert(3) c1<-lmer(clen~c2.manner+hasv*condition+(1|subject)+(1|item),data=class1) ## classes 2 and 3 class23<-droplevels(subset(ptk, huff!='Class 1')) clus0<-lmer(clen~c2.manner+hasv+condition+(1|subject)+(1|item),data=class23) clus1<-lmer(clen~c2.manner+hasv*condition+(1|subject)+(1|item),data=class23) ## upshot: there are phonetic differences that cross-cut the 'class' systems ## yet, class system reflects something about patterns of gestural organisation in this language. ##################################### ## Not in paper: place order effects ##################################### ## appearance of v round(xtabs(~porder+subject+condition,data=subset(ev,v>0)) / xtabs(~porder+subject+condition,data=ev),2) porder.df<-as.data.frame(round(xtabs(~porder+subject+condition,data=subset(ev,v>0)) / xtabs(~porder+subject+condition,data=ev),2)) porder.df$subject<-factor(porder.df$subject,levels=c('s1','s2','s3','s4','s5','s6','s7','s8','s9','s10')) ## by subject ggplot(data=porder.df, aes(x = condition, y = Freq, fill = porder)) + geom_bar(position = position_dodge(), stat="identity") + facet_wrap(~subject, nrow=2) + ylab(expression(paste("proportion of clusters with ", symbol("\044")))) + theme(legend.position="top",panel.grid.major = element_blank()) ## over subjects ggplot(data=porder.df, aes(x = condition, y = Freq, fill = porder)) + geom_bar(position = position_dodge(), stat="identity") + ylab (expression(paste("proportion of clusters with ", symbol("\044")))) + theme(legend.position="top",panel.grid.major = element_blank()) ## model has1<-glmer(hasv~porder*condition+(1+condition|subject)+(1+condition|item),family=binomial(link="logit"),data=ev) ## duration of v ## by subject pdur <- melt(data=subset(ev,v>0), id.var=c("subject", "condition", "porder"), measure.vars=c("v")) pdur.mean <- ddply(pdur, .(subject, condition, porder), summarise, "mean" = mean(value)) pdur.error <- ddply(pdur, .(subject, condition, porder), summarise, "sd" = sd(value), "se" = std.error(value), "ci" = std.error(value) * qt(0.95/2 + .5, nrow(pdur)-1)) pdur.sum <- na.omit(merge(pdur.mean, pdur.error, by=c("subject", "condition", "porder"), all=T)) pdur.sum$subject <- factor(pdur.sum$subject, levels=c('s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10')) pdur.sum$condition <- factor(pdur.sum$condition, levels=c('slow', 'fast')) ggplot(data=pdur.sum, aes(x = condition, y = mean, fill = porder)) + geom_bar(position = position_dodge(), stat="identity") + geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci), position=position_dodge()) + facet_wrap(~subject, nrow=2) + ylab("mean excr v length (ms)") + tstyle + scale_fill_grey(start=0.4, end=0.8) ## over subjects pdur <- melt(data=subset(ev,v>0), id.var=c("condition", "porder"), measure.vars=c("v")) pdur.mean <- ddply(pdur, .(condition, porder), summarise, "mean" = mean(value)) pdur.error <- ddply(pdur, .(condition, porder), summarise, "sd" = sd(value), "se" = std.error(value), "ci" = std.error(value) * qt(0.95/2 + .5, nrow(pdur)-1)) pdur.sum <- na.omit(merge(pdur.mean, pdur.error, by=c("condition", "porder"), all=T)) pdur.sum$condition <- factor(pdur.sum$condition, levels=c('slow', 'fast')) ggplot(data=pdur.sum, aes(x = condition, y = mean, fill = porder)) + geom_bar(position = position_dodge(), stat="identity") + geom_errorbar(aes(ymin=mean-ci, ymax=mean+ci), position=position_dodge()) + ylab("mean excr v length (ms)") + tstyle + scale_fill_grey(start=0.4, end=0.8) ## model m1<-lmer(v~porder*condition+(1+condition|subject)+(1+condition|item),data=subset(ev, v>0)) ## porder is not significant, nor is the interaction term