org.fhcrc.cpl.viewer.ms2.commandline
Class PostProcessPepXMLCLM

java.lang.Object
  extended by org.fhcrc.cpl.toolbox.commandline.BaseCommandLineModuleImpl
      extended by org.fhcrc.cpl.viewer.commandline.modules.BaseViewerCommandLineModuleImpl
          extended by org.fhcrc.cpl.viewer.ms2.commandline.PostProcessPepXMLCLM
All Implemented Interfaces:
CommandLineModule

public class PostProcessPepXMLCLM
extends BaseViewerCommandLineModuleImpl
implements CommandLineModule

post-process a pepxml file


Field Summary
protected static org.apache.log4j.Logger _log
           
protected  boolean adjustQuantZeroAreas
           
protected  java.lang.String badProteinPrefix
           
protected  boolean excludeProteinPrefixQuantOnly
           
protected  java.util.Map<java.io.File,java.lang.Float> fileMedianLogRatioMap
           
protected  java.util.Map<java.io.File,java.util.Map<java.lang.Integer,java.lang.Float>> fileNumCysteinesMedianLogRatioMap
           
protected  boolean filterByProteinPrefix
           
protected  java.lang.String goodProteinPrefix
           
protected  java.util.Set<java.lang.String> heavyPeptidesAllRuns
           
protected  java.lang.String[] labelExplanations
           
protected  java.lang.String[] labelStrings
           
protected  int labelType
           
protected  java.util.Set<java.lang.String> lightPeptidesAllRuns
           
protected  float maxExpect
           
protected  DeltaMassArgumentDefinition.DeltaMassWithType maxFracDeltaMass
           
protected  float maxQuantExpect
           
protected  boolean medianCenter
           
protected  boolean medianCenterAllRunsTogether
           
protected  boolean medianCenterByNumCysteines
           
protected  float minPeptideProphet
           
protected  float minPeptideProphetForMedian
           
protected  float minQuantPeptideProphet
           
protected  int minRatiosForMedianCenter
           
protected  java.io.File outDir
           
protected  java.io.File outFile
           
protected  java.util.Set<java.lang.String> peptidesToStrip
           
protected  java.io.File[] pepXmlFiles
           
protected  int percentileForQuantZeroAreaAdjustment
           
protected  java.util.Set<java.lang.String> proteinsToKeep
           
protected  java.util.Set<java.lang.String> proteinsToStrip
           
protected  boolean requirePepXmlExtension
           
protected  boolean showCharts
           
protected  boolean stripLightIDs
           
protected  boolean stripQuantMissingLightOrHeavyAcrossAll
           
protected  boolean stripQuantMissingLightOrHeavyWithinRun
           
protected  boolean stripQuantNotInHeavyAcrossAll
           
protected  boolean stripQuantSingleScans
           
protected  boolean stripQuantZeroAreas
           
 
Fields inherited from class org.fhcrc.cpl.toolbox.commandline.BaseCommandLineModuleImpl
mArgumentDefs, mArgumentValues, mArgumentValueStrings, mCommandName, mHelpMessage, mShortDescription, mUsageMessage
 
Fields inherited from interface org.fhcrc.cpl.toolbox.commandline.CommandLineModule
MODULE_HELP_AUTOMATIC, MODULE_USAGE_AUTOMATIC, UNNAMED_ARG_SERIES_SEPARATOR
 
Constructor Summary
PostProcessPepXMLCLM()
           
 
Method Summary
protected  void addLightHeavyPeptides(FeatureSet featureSet)
           
protected  void adjustQuantZeroAreas(FeatureSet featureSet)
           
 void assignArgumentValues()
          the first step in invoking your module.
protected  void calcLogMedianRatiosAllFiles()
          TODO: fold this in with loadLightHeavyPeptides This is some really weird stuff, right here.
protected  java.lang.String calcOutputFilename(java.lang.String inputFilename)
           
protected  int countCysteines(java.lang.String peptide)
           
protected  java.util.Map<java.lang.String,java.util.Map<java.lang.Integer,java.util.List<Feature>>> createPeptideChargeFeatureListMap(FeatureSet featureSet)
           
 void execute()
          do the actual work
protected  void filterByProteinPrefix(FeatureSet featureSet)
          Does filtering by protein prefix.
protected  void filterOnQualityScores(FeatureSet featureSet)
           
protected  void handleFeatureFile(java.io.File featureFile, java.io.File outputFile)
           
protected  void init()
           
protected  void loadLightHeavyPeptidesAcrossAll()
           
protected  double log2(double input)
          Log base 2
protected  java.util.List<java.lang.Float> logMedianCenterOn0(java.util.List<java.lang.Float> allInputList, java.util.List<java.lang.Float> inputListForMedianCalc, java.lang.String chartTitleSuffix)
          median-centers the natural log on 0, not in place
protected  void processFeatureSet(FeatureSet featureSet)
           
protected  java.util.List<java.lang.String> readOneStringPerLine(java.io.File file)
          Read each line as a String, stopping at first whitespace
protected  void stripQuantWithoutLightOrHeavyIDWithinSet(FeatureSet featureSet)
           
 
Methods inherited from class org.fhcrc.cpl.viewer.commandline.modules.BaseViewerCommandLineModuleImpl
createUnnamedFeatureFileArgumentDefinition, createUnnamedSeriesFeatureFileArgumentDefinition, getFastaFileArgumentValue, getFeatureSetArgumentValue, getModificationListArgumentValue
 
Methods inherited from class org.fhcrc.cpl.toolbox.commandline.BaseCommandLineModuleImpl
addArgumentDefinition, addArgumentDefinitions, addArgumentDefinitions, assertArgumentAbsent, assertArgumentAbsent, assertArgumentPresent, assertArgumentPresent, createArgsTableHTML, createUnnamedFileArgumentDefinition, createUnnamedSeriesFileArgumentDefinition, digestArguments, getAdvancedArgumentDefinitions, getArgumentDefinition, getArgumentDefinitions, getArgumentDefinitionsSortedForDisplay, getArgumentValue, getArgumentValues, getArgumentValueStrings, getBasicArgumentDefinitions, getBooleanArgumentValue, getCommandName, getDeltaMassArgumentValue, getDoubleArgumentValue, getDoubleArrayArgumentValue, getFileArgumentValue, getFileArrayArgumentValue, getFloatArgumentValue, getFullHelp, getHelpMessage, getHtmlHelpFragment, getIntegerArgumentValue, getPrintWriter, getShortDescription, getStringArgumentValue, getUnnamedArgumentValue, getUnnamedFileArgumentValue, getUnnamedSeriesArgumentValues, getUnnamedSeriesFileArgumentValues, getUsage, hasAdvancedArguments, hasArgumentValue, hasUnnamedArgumentValue, hasUnnamedSeriesArgumentValue, invoke, makeHtmlSafe, sortArgDefsForDisplay, toString
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
 
Methods inherited from interface org.fhcrc.cpl.toolbox.commandline.CommandLineModule
digestArguments, getAdvancedArgumentDefinitions, getArgumentDefinition, getArgumentDefinitions, getArgumentDefinitionsSortedForDisplay, getArgumentValueStrings, getBasicArgumentDefinitions, getCommandName, getFullHelp, getHelpMessage, getHtmlHelpFragment, getShortDescription, getUsage, invoke
 

Field Detail

_log

protected static org.apache.log4j.Logger _log

pepXmlFiles

protected java.io.File[] pepXmlFiles

medianCenter

protected boolean medianCenter

medianCenterByNumCysteines

protected boolean medianCenterByNumCysteines

medianCenterAllRunsTogether

protected boolean medianCenterAllRunsTogether

stripQuantMissingLightOrHeavyWithinRun

protected boolean stripQuantMissingLightOrHeavyWithinRun

stripQuantMissingLightOrHeavyAcrossAll

protected boolean stripQuantMissingLightOrHeavyAcrossAll

stripQuantNotInHeavyAcrossAll

protected boolean stripQuantNotInHeavyAcrossAll

stripLightIDs

protected boolean stripLightIDs

adjustQuantZeroAreas

protected boolean adjustQuantZeroAreas

stripQuantZeroAreas

protected boolean stripQuantZeroAreas

stripQuantSingleScans

protected boolean stripQuantSingleScans

fileMedianLogRatioMap

protected java.util.Map<java.io.File,java.lang.Float> fileMedianLogRatioMap

fileNumCysteinesMedianLogRatioMap

protected java.util.Map<java.io.File,java.util.Map<java.lang.Integer,java.lang.Float>> fileNumCysteinesMedianLogRatioMap

filterByProteinPrefix

protected boolean filterByProteinPrefix

percentileForQuantZeroAreaAdjustment

protected int percentileForQuantZeroAreaAdjustment

badProteinPrefix

protected java.lang.String badProteinPrefix

goodProteinPrefix

protected java.lang.String goodProteinPrefix

excludeProteinPrefixQuantOnly

protected boolean excludeProteinPrefixQuantOnly

outFile

protected java.io.File outFile

outDir

protected java.io.File outDir

peptidesToStrip

protected java.util.Set<java.lang.String> peptidesToStrip

proteinsToStrip

protected java.util.Set<java.lang.String> proteinsToStrip

proteinsToKeep

protected java.util.Set<java.lang.String> proteinsToKeep

showCharts

protected boolean showCharts

minRatiosForMedianCenter

protected int minRatiosForMedianCenter

minPeptideProphetForMedian

protected float minPeptideProphetForMedian

minPeptideProphet

protected float minPeptideProphet

minQuantPeptideProphet

protected float minQuantPeptideProphet

maxExpect

protected float maxExpect

maxQuantExpect

protected float maxQuantExpect

requirePepXmlExtension

protected boolean requirePepXmlExtension

maxFracDeltaMass

protected DeltaMassArgumentDefinition.DeltaMassWithType maxFracDeltaMass

labelType

protected int labelType

lightPeptidesAllRuns

protected java.util.Set<java.lang.String> lightPeptidesAllRuns

heavyPeptidesAllRuns

protected java.util.Set<java.lang.String> heavyPeptidesAllRuns

labelStrings

protected java.lang.String[] labelStrings

labelExplanations

protected java.lang.String[] labelExplanations
Constructor Detail

PostProcessPepXMLCLM

public PostProcessPepXMLCLM()
Method Detail

init

protected void init()

assignArgumentValues

public void assignArgumentValues()
                          throws ArgumentValidationException
Description copied from interface: CommandLineModule
the first step in invoking your module. The values assigned to the various arguments by the user are passed to your module for storage and additional validation. Any communication with the user about their argument values should be done by this method.

Specified by:
assignArgumentValues in interface CommandLineModule
Throws:
ArgumentValidationException

readOneStringPerLine

protected java.util.List<java.lang.String> readOneStringPerLine(java.io.File file)
                                                         throws ArgumentValidationException
Read each line as a String, stopping at first whitespace

Parameters:
file -
Returns:
Throws:
ArgumentValidationException

execute

public void execute()
             throws CommandLineModuleExecutionException
do the actual work

Specified by:
execute in interface CommandLineModule
Throws:
CommandLineModuleExecutionException

calcLogMedianRatiosAllFiles

protected void calcLogMedianRatiosAllFiles()
                                    throws CommandLineModuleExecutionException
TODO: fold this in with loadLightHeavyPeptides This is some really weird stuff, right here. This method handles calculation of median log ratios, regardless of whether you're doing it separately by file or all together, by number of cysteines or not. It creates a map data structure that gets used down below, with a key for each file. If we're doing global centering, the value gets repeated for each key. This is a bit silly, but it makes things simpler below.

Throws:
CommandLineModuleExecutionException

loadLightHeavyPeptidesAcrossAll

protected void loadLightHeavyPeptidesAcrossAll()
                                        throws CommandLineModuleExecutionException
Throws:
CommandLineModuleExecutionException

calcOutputFilename

protected java.lang.String calcOutputFilename(java.lang.String inputFilename)

handleFeatureFile

protected void handleFeatureFile(java.io.File featureFile,
                                 java.io.File outputFile)
                          throws CommandLineModuleExecutionException
Parameters:
featureFile -
outputFile -
Throws:
CommandLineModuleExecutionException

filterOnQualityScores

protected void filterOnQualityScores(FeatureSet featureSet)

countCysteines

protected int countCysteines(java.lang.String peptide)

processFeatureSet

protected void processFeatureSet(FeatureSet featureSet)

adjustQuantZeroAreas

protected void adjustQuantZeroAreas(FeatureSet featureSet)

filterByProteinPrefix

protected void filterByProteinPrefix(FeatureSet featureSet)
Does filtering by protein prefix. if excludeProteinPrefixQuantOnly is true, then this "filtering" just means removing quantitation

Parameters:
featureSet -

createPeptideChargeFeatureListMap

protected java.util.Map<java.lang.String,java.util.Map<java.lang.Integer,java.util.List<Feature>>> createPeptideChargeFeatureListMap(FeatureSet featureSet)

addLightHeavyPeptides

protected void addLightHeavyPeptides(FeatureSet featureSet)

stripQuantWithoutLightOrHeavyIDWithinSet

protected void stripQuantWithoutLightOrHeavyIDWithinSet(FeatureSet featureSet)

log2

protected double log2(double input)
Log base 2

Parameters:
input -
Returns:

logMedianCenterOn0

protected java.util.List<java.lang.Float> logMedianCenterOn0(java.util.List<java.lang.Float> allInputList,
                                                             java.util.List<java.lang.Float> inputListForMedianCalc,
                                                             java.lang.String chartTitleSuffix)
median-centers the natural log on 0, not in place

Parameters:
allInputList -
inputListForMedianCalc - this gets munged
Returns:


Fred Hutchinson Cancer Research Center