org.fhcrc.cpl.viewer.amt
Class AmtDatabase

java.lang.Object
  extended by org.fhcrc.cpl.viewer.amt.AmtDatabase
All Implemented Interfaces:
java.lang.Cloneable

public class AmtDatabase
extends java.lang.Object
implements java.lang.Cloneable

A representation of a full AMT database. Stores everything we'd want to read from or write to amtXml files, and allows us to manipulate the database. An AMT database contains a hierarchical structure, exactly equivalent to the structure defined in the amtXml schema. Briefly: -An AMT database contains a number of runs. -An AMT database also contains a number of known Aminoacid Modifications, which are referenced in the runs and in the modification states (see below) -An AMT database also contains a number of peptide entries. -Each peptide entry contains multiple Modification states. -Each Modification State contains multiple observations. -Each observation knows its hydrophobicity, quality score, and the run it came from. Retention times for observations can come from MS/MS observations, or from matched MS1 feature times (preferred). This is controlled by AmtDatabaseBuilder and isn't tracked in the database, so you'd best keep track yourself. There are obvious connections between AMT databases and peptides, and Features and FeatureSets, but all references to Features are kept out of this class.


Field Summary
static java.lang.String DEFAULT_HYDROPHOBICITY_ALGORITHM_NAME
           
static double DEFAULT_HYDROPHOBICITY_ALGORITHM_VERSION
           
static double DEFAULT_MODIFICATION_MASS_ROUNDING_FACTOR
           
static int DEFAULT_PRECISION
           
protected  java.util.List<MS2Modification> mAminoacidModifications
           
protected  java.util.HashMap<MS2Modification,java.lang.Integer> mAminoacidModificationSequenceMap
           
protected  java.io.File mAmtDBSourceFile
           
protected  java.util.HashMap<java.lang.String,AmtPeptideEntry> mAmtPeptideEntryMap
           
protected  java.util.List<AmtRunEntry> mAmtRunEntries
           
protected  java.util.HashMap<AmtRunEntry,java.lang.Integer> mAmtRunSequenceMap
           
protected  java.lang.String mHydrophobicityAlgorithmName
           
protected  double mHydrophobicityAlgorithmVersion
           
static float MODIFICATION_EQUALITY_MASS_TOLERANCE
           
 
Constructor Summary
AmtDatabase()
           
 
Method Summary
 void addAminoacidModification(MS2Modification newMod)
          Add a run.
 void addObservation(java.lang.String peptideSequence, java.util.List<MS2Modification>[] ms2Modifications, double qualityScore, double hydrophobicity, AmtRunEntry runEntry, int spectralCount, double timeInRun)
          Add an observation.
 void addObservation(java.lang.String peptideSequence, java.util.List<MS2Modification>[] ms2Modifications, double qualityScore, double hydrophobicity, AmtRunEntry runEntry, java.util.Map<java.lang.String,java.lang.Integer> spectralCountsMap, double timeInRun)
          add an observation, having already resolved the modifications
 void addObservationsFromAnotherDatabase(AmtDatabase otherDatabase)
          For each entry in another AmtDatabase, add all that entry's observations to this database.
 void addObservationsFromEntry(AmtPeptideEntry newEntry)
           
 void addObservationsFromEntry(AmtPeptideEntry newEntry, java.util.Map<MS2Modification,MS2Modification> oldNewModMap)
          Given an entry, add all the observations from all modification states in that entry to the database.
 void addOrOverrideEntriesWithAnotherDatabase(AmtDatabase otherDatabase)
          For each entry in another AmtDatabase, add it to this database, overriding an existing entry if one exists.
protected  void addOrOverrideEntry(AmtPeptideEntry overridingEntry)
          Add a peptide entry, blowing away the existing entry if it was there.
 java.util.Map<MS2Modification,MS2Modification> addRunEntry(AmtRunEntry newRunEntry)
          Add a run.
 double calculateMeanDifferenceFromPredictedHydro()
          Calculate the mean difference of all median peptide H observations from prediction
 double calculateStandardDeviationDifferenceFromPredictedHydro()
          Calculate the standard deviation of all deviations of median peptide H observations from prediction
 boolean contains(java.lang.String peptideSequence)
           
 MS2Modification findExistingEquivalentModification(MS2Modification newMS2Mod)
           
 MS2Modification getAminoacidModificationBySequence(int sequence)
          Get the mod with the specified sequence.
 MS2Modification[] getAminoacidModifications()
          Return an array containing all runs
 java.io.File getAmtDBSourceFile()
           
 AmtPeptideEntry[] getEntries()
          This isn't free, because internally we store these as a HashMap
 AmtPeptideEntry getEntry(java.lang.String peptideSequence)
           
 java.lang.String getHydrophobicityAlgorithmName()
           
 double getHydrophobicityAlgorithmVersion()
           
 double getMaxTimeInRun(AmtRunEntry runEntry)
          Return the maximum time-in-run value for any observation in this run
 double getMinTimeInRun(AmtRunEntry runEntry)
          Return the minimum time-in-run value for any observation in this run
 AmtPeptideEntry.AmtPeptideObservation[] getObservationsForRun(AmtRunEntry runEntry)
          this REALLY isn't free
 AmtPeptideEntry[] getPeptideEntriesForRun(AmtRunEntry runEntry)
           
 java.lang.String[] getPeptides()
           
 AmtRunEntry getRunBySequence(int sequence)
          Get the run with the specified sequence.
 AmtRunEntry[] getRuns()
          Return an array containing all runs
 int getSequenceForAminoacidModification(MS2Modification mod)
           
 int getSequenceForRun(AmtRunEntry runEntry)
           
protected  void init()
          Initialize hashtables, etc
 int numAminoacidModifications()
          count 'em
 int numEntries()
          count 'em
 int numRuns()
          count 'em
 void removeEntry(java.lang.String peptideSequence)
          Remove an entry for a given sequence
 java.util.List<MS2Modification>[] resolveMods(java.lang.String peptideSequence, java.util.List<ModifiedAminoAcid>[] modifiedAminoAcids, AmtRunEntry runEntry)
          Add an observation.
 void resolveModsAndAddObservation(java.lang.String peptideSequence, java.util.List<ModifiedAminoAcid>[] modifiedAminoAcids, double qualityScore, double hydrophobicity, AmtRunEntry runEntry, java.util.Map<java.lang.String,java.lang.Integer> spectralCountsMap, double timeInRun)
          Add an observation.
 MS2Modification resolveMS2VariableModification(java.lang.String residue, float massDiff, AmtRunEntry runEntry)
          Given an instance of a modification, figure out which MS2Modification within this run it represents.
 void saveToTsvSpreadsheet(java.io.File tsvFile)
          Save to a tsv file in some hokey format
 void setAmtDBSourceFile(java.io.File mAmtDBSourceFile)
           
 void setHydrophobicityAlgorithmName(java.lang.String hydrophobicityAlgorithmName)
           
 void setHydrophobicityAlgorithmVersion(double hydrophobicityAlgorithmVersion)
           
 java.lang.String toString()
          VERY basic summary information
 java.lang.Object waistDeepCopy()
          Structure of database will be copied.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
 

Field Detail

DEFAULT_PRECISION

public static int DEFAULT_PRECISION

mAmtPeptideEntryMap

protected java.util.HashMap<java.lang.String,AmtPeptideEntry> mAmtPeptideEntryMap

mAmtRunEntries

protected java.util.List<AmtRunEntry> mAmtRunEntries

MODIFICATION_EQUALITY_MASS_TOLERANCE

public static final float MODIFICATION_EQUALITY_MASS_TOLERANCE
See Also:
Constant Field Values

mAminoacidModifications

protected java.util.List<MS2Modification> mAminoacidModifications

mAmtRunSequenceMap

protected java.util.HashMap<AmtRunEntry,java.lang.Integer> mAmtRunSequenceMap

mAminoacidModificationSequenceMap

protected java.util.HashMap<MS2Modification,java.lang.Integer> mAminoacidModificationSequenceMap

mAmtDBSourceFile

protected java.io.File mAmtDBSourceFile

DEFAULT_MODIFICATION_MASS_ROUNDING_FACTOR

public static final double DEFAULT_MODIFICATION_MASS_ROUNDING_FACTOR
See Also:
Constant Field Values

DEFAULT_HYDROPHOBICITY_ALGORITHM_NAME

public static final java.lang.String DEFAULT_HYDROPHOBICITY_ALGORITHM_NAME
See Also:
Constant Field Values

DEFAULT_HYDROPHOBICITY_ALGORITHM_VERSION

public static final double DEFAULT_HYDROPHOBICITY_ALGORITHM_VERSION
See Also:
Constant Field Values

mHydrophobicityAlgorithmName

protected java.lang.String mHydrophobicityAlgorithmName

mHydrophobicityAlgorithmVersion

protected double mHydrophobicityAlgorithmVersion
Constructor Detail

AmtDatabase

public AmtDatabase()
Method Detail

init

protected void init()
Initialize hashtables, etc


toString

public java.lang.String toString()
VERY basic summary information

Overrides:
toString in class java.lang.Object
Returns:

waistDeepCopy

public java.lang.Object waistDeepCopy()
Structure of database will be copied. Individual entries, however, will be the same, so don't mess with them. Same with run entries. Hence, not a deep copy, not completely shallow. Waist-deep.

Returns:

findExistingEquivalentModification

public MS2Modification findExistingEquivalentModification(MS2Modification newMS2Mod)

resolveMS2VariableModification

public MS2Modification resolveMS2VariableModification(java.lang.String residue,
                                                      float massDiff,
                                                      AmtRunEntry runEntry)
Given an instance of a modification, figure out which MS2Modification within this run it represents. If none, return null

Returns:

resolveModsAndAddObservation

public void resolveModsAndAddObservation(java.lang.String peptideSequence,
                                         java.util.List<ModifiedAminoAcid>[] modifiedAminoAcids,
                                         double qualityScore,
                                         double hydrophobicity,
                                         AmtRunEntry runEntry,
                                         java.util.Map<java.lang.String,java.lang.Integer> spectralCountsMap,
                                         double timeInRun)
Add an observation. Requires all the things that an observation needs to know about, including the AmtRunEntry that it's associated with -- and that run entry must be a valid entry in THIS database

Parameters:
peptideSequence -
modifiedAminoAcids -
qualityScore -
hydrophobicity -
runEntry -

addObservation

public void addObservation(java.lang.String peptideSequence,
                           java.util.List<MS2Modification>[] ms2Modifications,
                           double qualityScore,
                           double hydrophobicity,
                           AmtRunEntry runEntry,
                           java.util.Map<java.lang.String,java.lang.Integer> spectralCountsMap,
                           double timeInRun)
add an observation, having already resolved the modifications

Parameters:
peptideSequence -
ms2Modifications -
qualityScore -
hydrophobicity -
runEntry -
spectralCountsMap -
timeInRun -

resolveMods

public java.util.List<MS2Modification>[] resolveMods(java.lang.String peptideSequence,
                                                     java.util.List<ModifiedAminoAcid>[] modifiedAminoAcids,
                                                     AmtRunEntry runEntry)
Add an observation. Requires all the things that an observation needs to know about, including the AmtRunEntry that it's associated with -- and that run entry must be a valid entry in THIS database

Parameters:
peptideSequence -
modifiedAminoAcids -
runEntry -

addObservation

public void addObservation(java.lang.String peptideSequence,
                           java.util.List<MS2Modification>[] ms2Modifications,
                           double qualityScore,
                           double hydrophobicity,
                           AmtRunEntry runEntry,
                           int spectralCount,
                           double timeInRun)
Add an observation. Requires all the things that an observation needs to know about, including the AmtRunEntry that it's associated with -- and that run entry must be a valid entry in THIS database

Parameters:
peptideSequence -
qualityScore -
hydrophobicity -
runEntry -

addObservationsFromEntry

public void addObservationsFromEntry(AmtPeptideEntry newEntry)

addObservationsFromEntry

public void addObservationsFromEntry(AmtPeptideEntry newEntry,
                                     java.util.Map<MS2Modification,MS2Modification> oldNewModMap)
Given an entry, add all the observations from all modification states in that entry to the database. If there's no existing entry for this peptide, create one

Parameters:
newEntry -

addObservationsFromAnotherDatabase

public void addObservationsFromAnotherDatabase(AmtDatabase otherDatabase)
For each entry in another AmtDatabase, add all that entry's observations to this database. If this involves creating new entries here, so be it. If entries already exist, augment them with the new data

Parameters:
otherDatabase -

addOrOverrideEntriesWithAnotherDatabase

public void addOrOverrideEntriesWithAnotherDatabase(AmtDatabase otherDatabase)
For each entry in another AmtDatabase, add it to this database, overriding an existing entry if one exists. All data from existing overridden entries will be lost. No checking is done to determine if we have any orphan runs.

Parameters:
otherDatabase -

saveToTsvSpreadsheet

public void saveToTsvSpreadsheet(java.io.File tsvFile)
                          throws java.io.FileNotFoundException
Save to a tsv file in some hokey format

Parameters:
tsvFile -
Throws:
java.io.FileNotFoundException

getEntries

public AmtPeptideEntry[] getEntries()
This isn't free, because internally we store these as a HashMap

Returns:

getPeptideEntriesForRun

public AmtPeptideEntry[] getPeptideEntriesForRun(AmtRunEntry runEntry)

getObservationsForRun

public AmtPeptideEntry.AmtPeptideObservation[] getObservationsForRun(AmtRunEntry runEntry)
this REALLY isn't free

Parameters:
runEntry -
Returns:

getMinTimeInRun

public double getMinTimeInRun(AmtRunEntry runEntry)
Return the minimum time-in-run value for any observation in this run

Parameters:
runEntry -
Returns:

getMaxTimeInRun

public double getMaxTimeInRun(AmtRunEntry runEntry)
Return the maximum time-in-run value for any observation in this run

Parameters:
runEntry -
Returns:

getPeptides

public java.lang.String[] getPeptides()

getEntry

public AmtPeptideEntry getEntry(java.lang.String peptideSequence)

contains

public boolean contains(java.lang.String peptideSequence)

addOrOverrideEntry

protected void addOrOverrideEntry(AmtPeptideEntry overridingEntry)
Add a peptide entry, blowing away the existing entry if it was there. Make no attempt to reconcile runs.

Parameters:
overridingEntry -

removeEntry

public void removeEntry(java.lang.String peptideSequence)
Remove an entry for a given sequence

Parameters:
peptideSequence -

numEntries

public int numEntries()
count 'em

Returns:

numAminoacidModifications

public int numAminoacidModifications()
count 'em

Returns:

addAminoacidModification

public void addAminoacidModification(MS2Modification newMod)
Add a run. The sequence of this mod will be the new size of the ArrayList after addition


getAminoacidModificationBySequence

public MS2Modification getAminoacidModificationBySequence(int sequence)
Get the mod with the specified sequence. Note: sequence is one-based and ArrayLists are zero-based, so we subtract one when referencing the ArrayList;

Parameters:
sequence - (one-based)
Returns:

getAminoacidModifications

public MS2Modification[] getAminoacidModifications()
Return an array containing all runs

Returns:

getSequenceForAminoacidModification

public int getSequenceForAminoacidModification(MS2Modification mod)
Returns:
the sequence of this run entry, or -1 if it's not in the database

numRuns

public int numRuns()
count 'em

Returns:

addRunEntry

public java.util.Map<MS2Modification,MS2Modification> addRunEntry(AmtRunEntry newRunEntry)
Add a run. The sequence of this run will be the new size of the ArrayList after addition

Parameters:
newRunEntry -

getRunBySequence

public AmtRunEntry getRunBySequence(int sequence)
Get the run with the specified sequence. Note: sequence is one-based and ArrayLists are zero-based, so we subtract one when referencing the ArrayList;

Parameters:
sequence - (one-based)
Returns:

getRuns

public AmtRunEntry[] getRuns()
Return an array containing all runs

Returns:

getSequenceForRun

public int getSequenceForRun(AmtRunEntry runEntry)
Parameters:
runEntry -
Returns:
the sequence of this run entry, or -1 if it's not in the database

calculateMeanDifferenceFromPredictedHydro

public double calculateMeanDifferenceFromPredictedHydro()
Calculate the mean difference of all median peptide H observations from prediction

Returns:

calculateStandardDeviationDifferenceFromPredictedHydro

public double calculateStandardDeviationDifferenceFromPredictedHydro()
Calculate the standard deviation of all deviations of median peptide H observations from prediction

Returns:

getHydrophobicityAlgorithmName

public java.lang.String getHydrophobicityAlgorithmName()

setHydrophobicityAlgorithmName

public void setHydrophobicityAlgorithmName(java.lang.String hydrophobicityAlgorithmName)

getHydrophobicityAlgorithmVersion

public double getHydrophobicityAlgorithmVersion()

setHydrophobicityAlgorithmVersion

public void setHydrophobicityAlgorithmVersion(double hydrophobicityAlgorithmVersion)

getAmtDBSourceFile

public java.io.File getAmtDBSourceFile()

setAmtDBSourceFile

public void setAmtDBSourceFile(java.io.File mAmtDBSourceFile)


Fred Hutchinson Cancer Research Center