org.fhcrc.cpl.viewer.amt
Class AmtMatchProbabilityAssigner

java.lang.Object
  extended by org.fhcrc.cpl.viewer.amt.AmtMatchProbabilityAssigner

public class AmtMatchProbabilityAssigner
extends java.lang.Object

This class assigns a probability to every AMT match, based on the distribution of target and decoy mass and H errors. First, all matches (including multiple matches to the same feature) are mined for their mass and H error values. Those values are fed to an Expectation-Maximization algorithm, implemented in R, which estimates parameters for a mixed distribution. We assume the true distribution to be a uniform distribution of false matches mixed with a bivariate normal distribution of true matches. We also feed the EM algorithm an initial estimate of the proportion of matches in the bivariate normal distribution, which is based on a match to a decoy database. The EM algorithm returns distribution parameters, as well as a probability for each match indicating how likely it is that the match is in the bivariate normal distribution of true matches. Lots of charts are provided in order to let the user judge the success of the algorithm in modeling the distribution.


Field Summary
protected  boolean converged
           
protected  java.util.List<java.lang.Double> decoyAreas
           
protected  java.util.List<java.lang.Integer> decoyWindowFeatureNumbers
           
static float DEFAULT_EM_MAX_DELTA_P_FOR_STABLE
           
static int DEFAULT_EM_MAX_ITERATIONS_STABLE_FOR_CONVERGENCE
           
static int DEFAULT_MAX_EM_ITERATIONS
           
static float DEFAULT_MAX_SECONDBEST_PROBABILITY
           
static int DEFAULT_MIN_EM_ITERATIONS
           
static float DEFAULT_MIN_MATCH_PROBABILITY
           
static float DEFAULT_MIN_SECONDBEST_PROBABILITY_DIFFERENCE
           
protected  float expectedTrue
           
protected  float initialProportionTrue
           
protected  boolean keepStatistics
           
static float KS_CUTOFF_FOR_WARN
           
protected  float ks_score_x
           
protected  float ks_score_y
           
static int MAX_R_PROB_ASSIGNMENT_MILLIS
           
static int MAX_TREE_DEPTH
           
protected  float maxDeltaElution
           
protected  float maxDeltaMass
           
protected  int maxEMIterations
           
protected  int maxRProbAssignmentMillis
           
protected  float maxSecondBestProbability
           
protected static float maxXScaff
           
protected static float maxYScaff
           
protected  float meanProbability
           
protected  float minDeltaElution
           
protected  float minDeltaMass
           
protected  int minEMIterations
           
protected  float minMatchProbability
           
protected  float minSecondBestProbabilityDifference
           
protected static float minXScaff
           
protected static float minYScaff
           
protected  float mu_x
           
protected  float mu_y
           
static int NUM_DECOY_POINTS
           
protected  int num_iterations
           
static int NUM_TARGET_POINTS
           
protected  float proportion
           
protected  float quantileBetaX
           
protected  float quantileBetaY
           
protected  float quantileCorrX
           
protected  float quantileCorrY
           
static int SCALING_FACTOR
           
protected  float sigma_x
           
protected  float sigma_y
           
protected  java.util.List<java.lang.Double> targetAreas
           
protected  java.util.List<java.lang.Integer> targetWindowFeatureNumbers
           
protected  float totalElutionRange
           
protected  float totalMassRange
           
 
Constructor Summary
AmtMatchProbabilityAssigner(float minDeltaMass, float maxDeltaMass, float minDeltaElution, float maxDeltaElution, float minMatchProbability)
          Set the loose matching parameters.
 
Method Summary
 java.util.List<Feature> assignMatchesAndProbabilities(FeatureSetMatcher.FeatureMatchingResult targetMatchingResult, FeatureSetMatcher.FeatureMatchingResult decoyMatchingResult, boolean showCharts)
          Use the initial (loose-tolerance) matching data, to both the target and decoy databases, to determine a probability for each loose match.
protected  double calculateLocalNormalizedDensity(double xpos, double ypos, double expectedNumTrue, double binSizeX, double binSizeY)
          Use the normal CDF to calculate the density of a small area of the distribution
 float[] calculateProbabilitiesEM(java.util.List<java.lang.Float> targetMassErrorDataList, java.util.List<java.lang.Float> targetHErrorDataList, float proportionTrue, boolean showCharts)
          Use the Expectation Maximization algorithm to determine match probabilities by modeling the true hits as a normal distribution, and the false hits as a uniform distribution, superimposed in the target distribution.
 float getExpectedTrue()
           
 float getInitialProportionTrue()
           
 float getKsScoreX()
           
 float getKsScoreY()
           
 int getMaxEMIterations()
           
 int getMaxRProbAssignmentMillis()
           
 float getMaxSecondBestProbability()
           
 int getMinEMIterations()
           
 float getMinSecondBestProbabilityDifference()
           
 float getMuX()
           
 float getMuY()
           
 int getNumIterations()
           
 float getQuantileBetaX()
           
 float getQuantileBetaY()
           
 float getQuantileCorrX()
           
 float getQuantileCorrY()
           
 float getSigmaX()
           
 float getSigmaY()
           
 boolean isConverged()
           
protected  boolean isInScaffRange(double x, double y)
           
 void setInitialProportionTrue(float initialProportionTrue)
           
 void setMaxEMIterations(int maxEMIterations)
           
 void setMaxRProbAssignmentMillis(int maxRProbAssignmentMillis)
           
 void setMaxSecondBestProbability(float maxSecondBestProbability)
           
 void setMinEMIterations(int minEMIterations)
           
 void setMinSecondBestProbabilityDifference(float minSecondBestProbabilityDifference)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

minXScaff

protected static float minXScaff

maxXScaff

protected static float maxXScaff

minYScaff

protected static float minYScaff

maxYScaff

protected static float maxYScaff

MAX_R_PROB_ASSIGNMENT_MILLIS

public static final int MAX_R_PROB_ASSIGNMENT_MILLIS
See Also:
Constant Field Values

maxRProbAssignmentMillis

protected int maxRProbAssignmentMillis

minDeltaMass

protected float minDeltaMass

maxDeltaMass

protected float maxDeltaMass

minDeltaElution

protected float minDeltaElution

maxDeltaElution

protected float maxDeltaElution

totalMassRange

protected float totalMassRange

totalElutionRange

protected float totalElutionRange

DEFAULT_MIN_EM_ITERATIONS

public static final int DEFAULT_MIN_EM_ITERATIONS
See Also:
Constant Field Values

DEFAULT_MAX_EM_ITERATIONS

public static final int DEFAULT_MAX_EM_ITERATIONS
See Also:
Constant Field Values

DEFAULT_EM_MAX_DELTA_P_FOR_STABLE

public static final float DEFAULT_EM_MAX_DELTA_P_FOR_STABLE
See Also:
Constant Field Values

DEFAULT_EM_MAX_ITERATIONS_STABLE_FOR_CONVERGENCE

public static final int DEFAULT_EM_MAX_ITERATIONS_STABLE_FOR_CONVERGENCE
See Also:
Constant Field Values

minEMIterations

protected int minEMIterations

maxEMIterations

protected int maxEMIterations

minMatchProbability

protected float minMatchProbability

KS_CUTOFF_FOR_WARN

public static final float KS_CUTOFF_FOR_WARN
See Also:
Constant Field Values

DEFAULT_MIN_MATCH_PROBABILITY

public static final float DEFAULT_MIN_MATCH_PROBABILITY
See Also:
Constant Field Values

maxSecondBestProbability

protected float maxSecondBestProbability

minSecondBestProbabilityDifference

protected float minSecondBestProbabilityDifference

MAX_TREE_DEPTH

public static final int MAX_TREE_DEPTH
See Also:
Constant Field Values

SCALING_FACTOR

public static final int SCALING_FACTOR
See Also:
Constant Field Values

NUM_TARGET_POINTS

public static final int NUM_TARGET_POINTS
See Also:
Constant Field Values

NUM_DECOY_POINTS

public static final int NUM_DECOY_POINTS
See Also:
Constant Field Values

DEFAULT_MAX_SECONDBEST_PROBABILITY

public static final float DEFAULT_MAX_SECONDBEST_PROBABILITY
See Also:
Constant Field Values

DEFAULT_MIN_SECONDBEST_PROBABILITY_DIFFERENCE

public static final float DEFAULT_MIN_SECONDBEST_PROBABILITY_DIFFERENCE
See Also:
Constant Field Values

keepStatistics

protected boolean keepStatistics

targetAreas

protected java.util.List<java.lang.Double> targetAreas

decoyAreas

protected java.util.List<java.lang.Double> decoyAreas

targetWindowFeatureNumbers

protected java.util.List<java.lang.Integer> targetWindowFeatureNumbers

decoyWindowFeatureNumbers

protected java.util.List<java.lang.Integer> decoyWindowFeatureNumbers

initialProportionTrue

protected float initialProportionTrue

meanProbability

protected float meanProbability

expectedTrue

protected float expectedTrue

ks_score_x

protected float ks_score_x

ks_score_y

protected float ks_score_y

quantileCorrX

protected float quantileCorrX

quantileCorrY

protected float quantileCorrY

quantileBetaX

protected float quantileBetaX

quantileBetaY

protected float quantileBetaY

mu_x

protected float mu_x

mu_y

protected float mu_y

sigma_x

protected float sigma_x

sigma_y

protected float sigma_y

num_iterations

protected int num_iterations

converged

protected boolean converged

proportion

protected float proportion
Constructor Detail

AmtMatchProbabilityAssigner

public AmtMatchProbabilityAssigner(float minDeltaMass,
                                   float maxDeltaMass,
                                   float minDeltaElution,
                                   float maxDeltaElution,
                                   float minMatchProbability)
Set the loose matching parameters. Possibly should just specify the target and decoy matching results here, too.

Parameters:
minDeltaMass -
maxDeltaMass -
minDeltaElution -
maxDeltaElution -
minMatchProbability -
Method Detail

isInScaffRange

protected boolean isInScaffRange(double x,
                                 double y)

assignMatchesAndProbabilities

public java.util.List<Feature> assignMatchesAndProbabilities(FeatureSetMatcher.FeatureMatchingResult targetMatchingResult,
                                                             FeatureSetMatcher.FeatureMatchingResult decoyMatchingResult,
                                                             boolean showCharts)
                                                      throws java.io.IOException
Use the initial (loose-tolerance) matching data, to both the target and decoy databases, to determine a probability for each loose match. Probabilities get assigned directly to the MS1 features. This method also makes the peptide assignments: one peptide per MS1 feature, break ties using probability of match. Probability for each match is (estimated target density - estimated decoy density) / estimated target density

Parameters:
targetMatchingResult -
decoyMatchingResult -
showCharts -
Throws:
java.io.IOException

calculateProbabilitiesEM

public float[] calculateProbabilitiesEM(java.util.List<java.lang.Float> targetMassErrorDataList,
                                        java.util.List<java.lang.Float> targetHErrorDataList,
                                        float proportionTrue,
                                        boolean showCharts)
                                 throws java.io.IOException
Use the Expectation Maximization algorithm to determine match probabilities by modeling the true hits as a normal distribution, and the false hits as a uniform distribution, superimposed in the target distribution.

Parameters:
targetMassErrorDataList -
targetHErrorDataList -
proportionTrue -
showCharts -
Returns:
Throws:
java.io.IOException

calculateLocalNormalizedDensity

protected double calculateLocalNormalizedDensity(double xpos,
                                                 double ypos,
                                                 double expectedNumTrue,
                                                 double binSizeX,
                                                 double binSizeY)
Use the normal CDF to calculate the density of a small area of the distribution

Parameters:
xpos -
ypos -
expectedNumTrue -
binSizeX -
binSizeY -
Returns:

getInitialProportionTrue

public float getInitialProportionTrue()

setInitialProportionTrue

public void setInitialProportionTrue(float initialProportionTrue)

getExpectedTrue

public float getExpectedTrue()

getKsScoreX

public float getKsScoreX()

getKsScoreY

public float getKsScoreY()

getQuantileCorrX

public float getQuantileCorrX()

getQuantileCorrY

public float getQuantileCorrY()

getQuantileBetaX

public float getQuantileBetaX()

getQuantileBetaY

public float getQuantileBetaY()

getMaxRProbAssignmentMillis

public int getMaxRProbAssignmentMillis()

setMaxRProbAssignmentMillis

public void setMaxRProbAssignmentMillis(int maxRProbAssignmentMillis)

getMuX

public float getMuX()

getMuY

public float getMuY()

getSigmaX

public float getSigmaX()

getSigmaY

public float getSigmaY()

getNumIterations

public int getNumIterations()

getMaxSecondBestProbability

public float getMaxSecondBestProbability()

setMaxSecondBestProbability

public void setMaxSecondBestProbability(float maxSecondBestProbability)

getMinSecondBestProbabilityDifference

public float getMinSecondBestProbabilityDifference()

setMinSecondBestProbabilityDifference

public void setMinSecondBestProbabilityDifference(float minSecondBestProbabilityDifference)

getMaxEMIterations

public int getMaxEMIterations()

setMaxEMIterations

public void setMaxEMIterations(int maxEMIterations)

getMinEMIterations

public int getMinEMIterations()

setMinEMIterations

public void setMinEMIterations(int minEMIterations)

isConverged

public boolean isConverged()


Fred Hutchinson Cancer Research Center