How good is fillPeaks ?

August 19, 2011, 12:47:44 PM

fillPeaks() will add intensities for peaks not observed in a certain sample, but in some/most of the other samples of a smple class. The question is:
How good does fillPeaks() estimate the intenisty for my data ?
One way to estimate the quality is to take an xcmsSet grouped without NA values (i.e. one class and minfrac=1), remove the peaks of one sample, and fill them back in.
This way you can create a scatterplot of found peaks vs. filled peaks, and also check for which intensities they occur:

[attachment=1:mvim2ak9]FaahKOscatterFilledFound.png[/attachment:mvim2ak9]
Original vs. filled-in intensities

[attachment=0:mvim2ak9]FaahKOintensityFilledFoundRatio.png[/attachment:mvim2ak9]
Intensity ratios vs. intensity

For our evaluation of Bruker microTOFq data, we found that 94% of the restored intensities are correct, 1% had a deviation of factor 1.1 or more, and 5% had a ratio of less than 0.9 compared to the original peaks.
[attachment=3:mvim2ak9]ScatterFilledFound.png[/attachment:mvim2ak9]Original vs. filled-in intensities
[attachment=2:mvim2ak9]IntensityFilledFoundRatio.png[/attachment:mvim2ak9]

This is the code used:

Code: [Select]

library(faahKO)
 
 ## Extract xcmsSet with KO only
 xss <- split(faahko, f=sampclass(faahko))
 xs <- xss[[2]]
 
 xsg <- group(xs, minfrac=1)
 
 ## remove extra peaks within groups
 peaks <- cbind(peaks(xsg),(1:nrow(peaks(xsg))))
 meds <- as.vector(groupval(xsg,"medret"))
 meds <- meds[which(!is.na(meds))]
 peaks <-peaks[meds,]
 colnames(peaks) <- c(colnames(peaks(xsg)),"index")
 
 ## cleanup groupidx list
 for (a in 1: length(groupidx(xsg))) {
     gxs=NULL
     for (b in 1:length(groupidx(xsg)[[a]])){
         if (any(meds == groupidx(xsg)[[a]][b]))
             gxs<-c(gxs,groupidx(xsg)[[a]][b])
     }
     groupidx(xsg)[[a]] <- gxs
 }
 
 ## change index in groupidx
 for (a in 1: length(groupidx(xsg))) {
     gxs=NULL
     for (b in 1:length(groupidx(xsg)[[a]])){
         groupidx(xsg)[[a]][b] <- which(peaks[,"index"]==groupidx(xsg)[[a]][b])
     }
 }
 
 ## Write "cleaned" peaklist
 peaks(xsg) <- peaks[,(1:(ncol(peaks)-1))]
 
 ## Create backup for later comparison
 xsbackup <- xsg
 
 ## remove peaks from last sample
 lasts <- max(peaks(xsg)[,"sample"])
 lsmin <- min(which(peaks(xsg)[,"sample"]==lasts))
 
 ## remove peaksIDs from groupidx
 for (a in 1: length(groupidx(xsg))){
     groupidx(xsg)[[a]] <- groupidx(xsg)[[a]][which(groupidx(xsg)[[a]]<lsmin)]
 }
 
 ## remove peaks from peaklist
 peaks(xsg) <- peaks(xsg)[1:(lsmin-1),]
 
 ##
 ## Preparations finished, now fillPeaks()
 ##
 xsgf <- fillPeaks(xsg)
 
 ## fillPeaks again ?!
 xsbackup2 <- fillPeaks(xsbackup)
 
 ## compare peaks in both xcmsSets groupwise and samplewise
 gxorig <- groupidx(xsbackup2) ## das fillpeaks-ergebnis des backups
 gxfill <- groupidx(xsgf) ## das fillpeaks-ergebnis des XS ohne sample lasts
 
 ## plot original and filled peaks
 mxo<-NA
 mxf<-NA
 ino<-NA
 inf<-NA
 for (a in 1: length(gxorig))
 {
     opeak <- gxorig[[a]][which(peaks(xsbackup2)[gxorig[[a]],"sample"]==length(sampnames(xs)))]
     fpeak <- gxfill[[a]][which(peaks(xsgf)[gxfill[[a]],"sample"]==length(sampnames(xs)))]
     mxo[a] <- peaks(xsbackup2)[opeak,"mz"]
     mxf[a] <- peaks(xsgf)[fpeak,"mz"]
     ino[a] <- peaks(xsbackup2)[opeak,"into"]
     inf[a] <- peaks(xsgf)[fpeak,"into"]
 }
 
 png(filename = "faahKOscatterFilledFound.png")
 plot(log(ino),log(inf))
 dev.off()
 png(filename = "faahKOintensityFilledFoundRatio.png")
 plot(ino,inf/ino)
 dev.off()

[attachment deleted by admin]