1
MS-DIAL / Re: Not detecting/integrating both partially-separated peaks (MS-DIAL v4.48 Windows)
Have you been able to make any progress on modifying the source code?
Many thanks,
Taylan
This section allows you to view all Messages made by this member. Note that you can only see Messages made in areas you currently have access to.
library(igraph)
dfr <- read.csv('GnpsEdge_0_20202251139_COMBINED.csv') # combined edge list from GNPS
edgelist <- dfr[,c("ID1", "ID2")]
# I use "as.character" below because if the vector is integers instead of characters,
# the resulting components output includes ALL integers
# from 1 to the max value in edges. Thus, new nodes can be created artificially.
edges <- as.character(as.vector(t(edgelist)))
g1 <- graph(edges, directed=FALSE)
comp1 <- components(g1)
comp.membership <- data.frame(cbind(node=as.numeric(names(comp1$membership))
, annotation.group=as.numeric(comp1$membership)))
j <- grep("NEG_DDA", fileNames(xdata)) # index of DDA files
MS2.file.paths <- fileNames(xdata)[j] # file paths of DDA files
MS2.file.names <- gsub(".*/", "", MS2.file.paths) # names of DDA files
fd <- featureDefinitions(xdata) # extract feature defs as a new object
fv <- featureValues(xdata) # extract feature values as a new object
fv.filtered <- fv[, colnames(fv) %in% MS2.file.names] # filter feature values to include only DDA samples
cp <- chromPeaks(xdata) # extract chromatographic peaks as a new object
cp <- cbind(rowid=seq(nrow(cp)), cp) # add a temporary column to cp for matching with 'peakidx' in feature definitions (suggested by CoreyG)
cp.filtered <- cp[which(cp[,which(colnames(cp) == "sample")] %in% j),] # filter cp to include only DDA samples
peakidx.filtered <- list() # create object to store results of loop below
for(i in 1:length(fd@listData$peakidx)){
temp <- match(fd@listData$peakidx[[i]], cp.filtered[,"rowid"])
peakidx.filtered[[i]] <- temp[which(!is.na(temp))]
} # end loop; this filters to include only peakidx in fd that correspond to peaks in DDA samples
fd.filtered <- fd # duplicate original feature definitions
fd.filtered@listData$peakidx <- peakidx.filtered # overwrite peakidx in duplicated fd with filtered peakidx generated by loop above
xdata.filtered <- filterFile(xdata, MS2.file.names, keepAdjustedRtime=TRUE) # create a new xdata object with only DDA samples; correspondence results are removed and will be added back in below
file_factor <- factor(cp.filtered[, "sample"]) # outputs a vector to correspond peak number with sample number
cp.filtered.split <- split.data.frame(cp.filtered, f=file_factor) # splits cp.filtered (a dataframe) into a list of dataframes (one for each remaining DDA sample)
cp.filtered.v2 <- c() # new object for storing results
for(i in 1:length(cp.filtered.split)){
cp.filtered.split[[i]][,"sample"] <- i
cp.filtered.v2 <- rbind(cp.filtered.v2, cp.filtered.split[[i]])
} #end loop; renumbers samples in filtered cp list, starting at 1
chromPeaks(xdata.filtered) <- cp.filtered.v2 # GOAL !!!
featureDefinitions(xdata.filtered) <- fd.filtered # GOAL !!!
# export MS1 and MS2 features
filteredMs2Spectra <- featureSpectra(xdata.filtered, return.type = "Spectra")
filteredMs2Spectra <- clean(filteredMs2Spectra, all = TRUE)
filteredMs2Spectra <- formatSpectraForGNPS(filteredMs2Spectra)
writeMgfData(filteredMs2Spectra, paste(run.name, "_ms2spectra_all.mgf", sep=""))
# generate data table (i.e. peak table) in format needed for GNPS/FBMN
setwd(save.dir)
featuresDef <- featureDefinitions(xdata.filtered)
featuresIntensities <- featureValues(xdata.filtered, value = "into")
dataTable <- merge(featuresDef, featuresIntensities, by = 0, all = TRUE)
dataTable <- dataTable[, !(colnames(dataTable) %in% c("peakidx"))]
write.table(dataTable, paste(run.name, "_xcms_all.txt", sep=""), sep = "\t", quote = FALSE, row.names = FALSE) # UPLOAD TO GNPS for FBMN
#export MS2 features only
setwd(save.dir)
filteredMs2Spectra_maxTic <- combineSpectra(filteredMs2Spectra,
fcol = "feature_id",
method = maxTic)
writeMgfData(filteredMs2Spectra_maxTic, paste(run.name, "_ms2spectra_maxTic.mgf", sep="")) # UPLOAD TO GNPS for FBMN
filteredDataTable <- dataTable[which(dataTable$Row.names %in% filteredMs2Spectra@elementMetadata$feature_id),]
write.table(filteredDataTable, paste(run.name, "_xcms_onlyMS2.txt", sep=""), sep = "\t", quote = FALSE, row.names = FALSE) #NOTE: apparently this is the table that can be used with GNPS/FBMN
> featureValues(xdata.filtered) <- fv.filtered
Error in featureValues(xdata.filtered) <- fv.filtered :
could not find function "featureValues<-"
featureDefinitions(xdata.filtered)<-fd.filteredI'm not quite sure how this happens. And while the newly populated feature values have the same dimensions as fv.filtered, they are not identical:
> all(fv.filtered == featureValues(xdata.filtered))
[1] FALSE
pks <- chromPeaks(xdata) # extract chromatographic peaks as a separate object...and got an error message
xdata_filtered <- filterMsLevel(as(xdata, "OnDiskMSnExp"), 2L) # extract MS2 data as a separate object
## Split data per file
file_factor <- factor(pks[, "sample"]) #define each sample as a separate factor
pks <- split.data.frame(pks, f = file_factor) # make a separate dataframe for each sample
xdata_filtered <- lapply(as.integer(levels(file_factor)), filterFile, object = xdata_filtered)
## You then need to loop through xdata_filtered and pks for the samples you need. Each entry in xdata_filtered becomes 'x'; and 'pks' is the corresponding entry in the pks list.
n <- 1 # index in xdata_filtered and pks
method = 'closest_mz'
sps <- spectra(xdata_filtered[[n]])
pmz <- precursorMz(xdata_filtered[[n]])
rtm <- rtime(xdata_filtered[[n]])
#https://github.com/sneumann/xcms/blob/557b936967271690140e19224be707d87ea63168/R/functions-XCMSnExp.R#L1877
## Make sure you define all the required parameters i.e. method = 'closest_mz'
res <- vector(mode = "list", nrow(pks[[n]]))
for (i in 1:nrow(pks[[n]])) {
if (is.na(pks[[n]][i, "mz"]))
next
idx <- which(pmz >= pks[[n]][i, "mzmin"] & pmz <= pks[[n]][i, "mzmax"] &
rtm >= pks[[n]][i, "rtmin"] & rtm <= pks[[n]][i, "rtmax"])
if (length(idx)) {
if (length(idx) > 1 & method != "all") {
if (method == "closest_rt")
idx <- idx[order(abs(rtm[idx] - pks[[n]][i, "rt"]))][1]
if (method == "closest_mz")
idx <- idx[order(abs(pmz[idx] - pks[[n]][i, "mz"]))][1]
if (method == "signal") {
sps_sub <- sps[idx]
ints <- vapply(sps_sub, function(z) sum(intensity(z)),
numeric(1))
idx <- idx[order(abs(ints - pks[[n]][i, "maxo"]))][1]
}
}
res[[i]] <- lapply(sps[idx], function(z) {
z@fromFile = fromFile
z
})
}
}
Error in (function (cl, name, valueClass) :
assignment of an object of class “standardGeneric” is not valid for @‘fromFile’ in an object of class “Spectrum2”; is(value, "integer") is not TRUE
If that is sounding like too much stuffing around, you can save featureDefinitions, featureValues and chromPeaks. Filter the file to just the DDA samples. Then you can edit these to be internally consistent and write them back to xdata.
j <- grep("NEG_DDA", fileNames(xdata))
MS2.file.paths <- fileNames(xdata)[j]
MS2.file.names <- gsub(".*/", "", MS2.file.paths)
# extracting parts of xdata & filtering them ------------------
fd <- featureDefinitions(xdata)
fv <- featureValues(xdata)
fv.filtered <- fv[, colnames(fv) %in% MS2.file.names]
cp <- chromPeaks(xdata)
cp.filtered <- cp[which(cp[,which(colnames(cp) == "sample")] %in% j),]
# filtering files in xdata -----------------------------------------
xdata.filtered <- filterFile(xdata, MS2.file.names, keepAdjustedRtime=TRUE)
# changing sample numbers in filtered, extracted chromatographic peak object to match those in filtered xdata object
# if the sample numbers do not match, an error is generated
file_factor <- factor(cp.filtered[, "sample"])
cp.filtered.split <- split.data.frame(cp.filtered, f=file_factor)
cp.filtered.v2 <- c()
for(i in 1:length(cp.filtered.split)){
cp.filtered.split[[i]][,"sample"] <- i
cp.filtered.v2 <- rbind(cp.filtered.v2, cp.filtered.split[[i]])
} #end for loop
# incorporate filtered peaks back into filtered xdata
chromPeaks(xdata.filtered) <- cp.filtered.v2
featureDefinitions(xdata.filtered) <- fd
Error in validObject(object) :
invalid class “XCMSnExp” object: Some of the indices in column 'peakidx' of element 'featureDefinitions' do not match rows of the 'chromPeaks' matrix!
peak.list <- as.numeric(gsub("CP", "", row.names(chromPeaks(xdata.filtered))))
for(i in 1:length(fd@listData$peakidx)){ # this loop removes chromatographic peaks in "fd" that are not present in cp.filtered.v2 (i.e. chromPeaks(xdata.filtered) )
k <- which(fd@listData$peakidx[[i]] %in% peak.list)
fd@listData$peakidx[[i]] <- fd@listData$peakidx[[i]][k]
} # end for loop
featureDefinitions(xdata.filtered) <- fd