#gaussfit by Eric Collins #06 May 2009 #input.csv is a single peak and surrounding few bp; #extraneous peaks should be manually replaced with NA's; #bp in rows; samples in cols; rm(list=ls()) cutoff <- 62000 inputfile = "example.csv" library(MASS) peaks <- read.csv(inputfile,header=T) xes <- peaks[,1] peaks[,1] <- NULL cols <- ncol(peaks) rows <- nrow(peaks) newpeaks <- data.frame(row.names=xes) peaks.coefs <- data.frame(row.names=c("a","w","xc","obs.max","pred.max","pct.diff")) flin <-function(x) x/100 linxes <- flin((100*min(xes)):(100*max(xes))) # par(ask=T) for (i in 1:cols) { cutout <- which(peaks[,i]>cutoff) newpeaks[,i] <- replace(peaks[,i],cutout,NA) pts <- list(x=xes,y=newpeaks[,i]) a.est <- max(peaks[,i],na.rm=T) xc.est <- xes[which(max(peaks[,i],na.rm=T)==peaks[,i])] y0 <- min(peaks[,i],na.rm=T) peaks.nls <- nls(paste('y~a*sqrt(2/pi)/w*exp(-2*((x-xc)/w)^2)+',y0),pts, start=list (a=a.est,w=.5,xc=xc.est), trace=TRUE, na.omit) a <- coef(peaks.nls)[1] w <- coef(peaks.nls)[2] xc <- coef(peaks.nls)[3] obs.max <- max(peaks[,i],na.rm=T) pred.max <- sqrt(2/pi)/w*a pct.diff <- (obs.max - pred.max)/obs.max peaks.coefs[,i] <- rbind(a,w,xc,obs.max,pred.max,pct.diff) if (pred.max>obs.max) ylm <- c(-0.04*pred.max,1.04*pred.max) else ylm <- c(-0.04*obs.max,1.04*obs.max) png(filename=paste(inputfile,i,"png",sep="."),width = 480, height = 480, units="px",pointsize=12,bg="white") plot(xes,newpeaks[,i],main=colnames(peaks[i]),yaxs='i',ylim=ylm,xlab="DNA (bp)",ylab="signal intensity") lines(linxes,y0+a*sqrt(2/pi)/w*exp(-2*((linxes-xc)/w)^2)) dev.off() } colnames(newpeaks) <- colnames(peaks) colnames(peaks.coefs) <- colnames(peaks) write.csv(t(peaks.coefs),"output.csv")