fillPeaks() will add intensities for peaks not observed in a certain sample, but in some/most of the other samples of a smple class. The question is:
How good does fillPeaks() estimate the intenisty for my data ?
One way to estimate the quality is to take an xcmsSet grouped without NA values (i.e. one class and minfrac=1), remove the peaks of one sample, and fill them back in.
This way you can create a scatterplot of found peaks vs. filled peaks, and also check for which intensities they occur:
[attachment=1:mvim2ak9]FaahKOscatterFilledFound.png[/attachment:mvim2ak9]
Original vs. filled-in intensities
[attachment=0:mvim2ak9]FaahKOintensityFilledFoundRatio.png[/attachment:mvim2ak9]
Intensity ratios vs. intensity
For our evaluation of Bruker microTOFq data, we found that 94% of the restored intensities are correct, 1% had a deviation of factor 1.1 or more, and 5% had a ratio of less than 0.9 compared to the original peaks.
[attachment=3:mvim2ak9]ScatterFilledFound.png[/attachment:mvim2ak9]Original vs. filled-in intensities
[attachment=2:mvim2ak9]IntensityFilledFoundRatio.png[/attachment:mvim2ak9]
This is the code used:
library(faahKO)
## Extract xcmsSet with KO only
xss <- split(faahko, f=sampclass(faahko))
xs <- xss[[2]]
xsg <- group(xs, minfrac=1)
## remove extra peaks within groups
peaks <- cbind(peaks(xsg),(1:nrow(peaks(xsg))))
meds <- as.vector(groupval(xsg,"medret"))
meds <- meds[which(!is.na(meds))]
peaks <-peaks[meds,]
colnames(peaks) <- c(colnames(peaks(xsg)),"index")
## cleanup groupidx list
for (a in 1: length(groupidx(xsg))) {
gxs=NULL
for (b in 1:length(groupidx(xsg)[[a]])){
if (any(meds == groupidx(xsg)[[a]][b]))
gxs<-c(gxs,groupidx(xsg)[[a]][b])
}
groupidx(xsg)[[a]] <- gxs
}
## change index in groupidx
for (a in 1: length(groupidx(xsg))) {
gxs=NULL
for (b in 1:length(groupidx(xsg)[[a]])){
groupidx(xsg)[[a]][b] <- which(peaks[,"index"]==groupidx(xsg)[[a]][b])
}
}
## Write "cleaned" peaklist
peaks(xsg) <- peaks[,(1:(ncol(peaks)-1))]
## Create backup for later comparison
xsbackup <- xsg
## remove peaks from last sample
lasts <- max(peaks(xsg)[,"sample"])
lsmin <- min(which(peaks(xsg)[,"sample"]==lasts))
## remove peaksIDs from groupidx
for (a in 1: length(groupidx(xsg))){
groupidx(xsg)[[a]] <- groupidx(xsg)[[a]][which(groupidx(xsg)[[a]]<lsmin)]
}
## remove peaks from peaklist
peaks(xsg) <- peaks(xsg)[1:(lsmin-1),]
##
## Preparations finished, now fillPeaks()
##
xsgf <- fillPeaks(xsg)
## fillPeaks again ?!
xsbackup2 <- fillPeaks(xsbackup)
## compare peaks in both xcmsSets groupwise and samplewise
gxorig <- groupidx(xsbackup2) ## das fillpeaks-ergebnis des backups
gxfill <- groupidx(xsgf) ## das fillpeaks-ergebnis des XS ohne sample lasts
## plot original and filled peaks
mxo<-NA
mxf<-NA
ino<-NA
inf<-NA
for (a in 1: length(gxorig))
{
opeak <- gxorig[[a]][which(peaks(xsbackup2)[gxorig[[a]],"sample"]==length(sampnames(xs)))]
fpeak <- gxfill[[a]][which(peaks(xsgf)[gxfill[[a]],"sample"]==length(sampnames(xs)))]
mxo[a] <- peaks(xsbackup2)[opeak,"mz"]
mxf[a] <- peaks(xsgf)[fpeak,"mz"]
ino[a] <- peaks(xsbackup2)[opeak,"into"]
inf[a] <- peaks(xsgf)[fpeak,"into"]
}
png(filename = "faahKOscatterFilledFound.png")
plot(log(ino),log(inf))
dev.off()
png(filename = "faahKOintensityFilledFoundRatio.png")
plot(ino,inf/ino)
dev.off()
[attachment deleted by admin]