# R code for Kirby and Yu (2007) 'Lexical and phonotactic effects on wordlikeness judgments in Cantonese' # Version: 19 April 2017 # Tested with: R version 3.2.4 (2016-03-10) "Very Secure Dishes" # Written by: James Kirby # This R code, together with the accompanying .txt file, largely reproduces the # analyses and figures of the 2007 ICPhS paper. Discrepencies, where present, # have been noted. # The raw (non-averaged) response data are also available as part of this dataset at # http://lel.ed.ac.uk/~jkirby/cantonese/results1_10.txt # This code is (largely) designed to replicate what is in the paper. This is not # an endorsement of the analysis; we would do things rather differently now. # By making the data and code available, we hope to encourage alternative analyses of # these data. library(plyr) library(gplots) # SEM std.error <- function(x) sd(x)/sqrt(length(x)) # load data all = read.delim("http://lel.ed.ac.uk/~jkirby/cantonese/average2.txt") # slightly updated for 2017 all$Code1<-factor(all$Code1) all$Code2<-factor(all$Code2) all$Code3<-factor(all$Code3) all$syl_type <- rep('Lex') all[all$Code2=='1',]$syl_type <- 'Acc' all[all$Code2%in%c('2','3'),]$syl_type <- 'Tone' all[all$Code2%in%c('4','5','6'),]$syl_type <- 'Lab' all[all$Code2%in%c('7','8'),]$syl_type <- 'Cor' all[all$Code2%in%c('9','10'),]$syl_type <- 'Mul' # Figure 1 means <- ddply(all, .(syl_type), summarise, mean=mean(Zscorearcsin), sem=std.error(Zscorearcsin)) means <- means[order(means$mean, decreasing=TRUE),] # graphics device quartz(width=6, height=6) mybarcol<-"grey20" # this seems to reverse the error bars for Tone and Acc in Fig 1 of the paper mp <- barplot2(means$mean, names.arg=means$syl_type, ylab='Zscorearcsin', xlab='syllable type', plot.ci = TRUE, ci.l = (means$mean-means$sem), ci.u = (means$mean+means$sem), plot.grid = TRUE, grid.inc=9, col="grey",col.sub = mybarcol, legend.text=c('Lex=Lexical','Tone=Onset-Tone','Acc=Accidental', 'Lab=Labial','Mul=Multiple','Cor=Coronal')) # Figure 2 non<-subset(all,all$Code1!=0) words<-subset(all,all$Code1==0) plot(words$nnd,words$Zscorearcsin,pch=19,xlab="weighted neighborhood density",ylab="Zscorearcsin") points(non$nnd,non$Zscorearcsin,pch=21) legend(300,-.5,c("words","nonwords"),pch=c(19,21)) abline(lm(all$Zscorearcsin~all$nnd)) # Statistics in runnign text wilcox.test(non$goodness, words$goodness) # can only assume 39217.5 was a typo?? wilcox.test(subset(all, syl_type=='Acc')$goodness, subset(all, syl_type=='Tone')$goodness) # otoh none of the U statistics match exactly # full regression full <- lm(Zscorearcsin~nnd+ptpnt_token, data=all) # matches # partial regression nd_only <- lm(Zscorearcsin~nnd, data=all) # dfs match but adj R2 does not # descriptive stats match mean(words$nnd) sd(words$nnd) mean(non$nnd) sd(non$nnd) # groupwise regressions # words match, but reported R^2 is not actually adjusted word.model <- lm(Zscorearcsin~nnd+ptpnt_token, data=words) # nonwords match non.model <- lm(Zscorearcsin~nnd+ptpnt_token, data=non) non.model2 <- lm(Zscorearcsin~nnd, data=non)