Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
614 changes: 147 additions & 467 deletions mzLib/FlashLFQ/FlashLFQResults.cs

Large diffs are not rendered by default.

70 changes: 0 additions & 70 deletions mzLib/FlashLFQ/IntensityNormalizationEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,16 @@ namespace FlashLFQ
{
public class IntensityNormalizationEngine
{
private const int numPeptidesDesiredFromEachFraction = 500;
private const int numPeptidesDesiredInMatrix = 5000;
private readonly FlashLfqResults results;
private readonly bool integrate;
private readonly bool silent;
private readonly bool quantifyAmbiguousPeptides;
private readonly int maxThreads;

public IntensityNormalizationEngine(FlashLfqResults results, bool integrate, bool silent, int maxThreads, bool quantifyAmbiguousPeptides = false)
{
this.results = results;
this.integrate = integrate;
this.silent = silent;
this.maxThreads = maxThreads;
}

/// <summary>
Expand Down Expand Up @@ -317,72 +313,6 @@ private void NormalizeBioreps()
}
}

/// <summary>
/// This method takes a list of peptides and creates a subset list of peptides to normalize with, to avoid
/// excessive computation time in normalization functions.
/// </summary>
//private List<Peptide> SubsetData(List<Peptide> initialList, List<SpectraFileInfo> spectraFiles)
//{
// List<SpectraFileInfo>[] bothBioreps = new List<SpectraFileInfo>[2];
// var temp1 = spectraFiles.GroupBy(p => p.Condition).ToList();
// if (temp1.Count() == 1)
// {
// // normalizing bioreps within a condition
// var temp2 = spectraFiles.GroupBy(p => p.BiologicalReplicate).ToList();
// bothBioreps[0] = temp2[0].ToList();
// bothBioreps[1] = temp2[1].ToList();
// }
// else
// {
// // normalizing bioreps between conditions
// bothBioreps[0] = temp1[0].ToList();
// bothBioreps[1] = temp1[1].ToList();
// }

// HashSet<Peptide> subsetList = new HashSet<Peptide>();
// int maxFractionIndex = bothBioreps.SelectMany(p => p).Max(v => v.Fraction);

// foreach (var biorep in bothBioreps)
// {
// List<int> fractions = biorep.Select(p => p.Fraction).Distinct().ToList();

// int numToAddPerFraction = numPeptidesDesiredInMatrix / fractions.Count;
// if (numToAddPerFraction < numPeptidesDesiredFromEachFraction)
// {
// numToAddPerFraction = numPeptidesDesiredFromEachFraction;
// }

// int[] peptidesAddedPerFraction = new int[fractions.Count];
// Queue<Peptide>[] peptidesObservedInEachFraction = new Queue<Peptide>[fractions.Count];

// foreach (var file in biorep)
// {
// if (peptidesObservedInEachFraction[file.Fraction] == null)
// {
// peptidesObservedInEachFraction[file.Fraction] = new Queue<Peptide>(initialList.Where(p => p.GetIntensity(file) > 0)
// .OrderByDescending(p => p.GetIntensity(file)));
// }
// }

// foreach (var fraction in fractions)
// {
// while (peptidesAddedPerFraction[fraction] < numToAddPerFraction && peptidesObservedInEachFraction[fraction].Any())
// {
// var peptide = peptidesObservedInEachFraction[fraction].Dequeue();

// // don't need to check if the return list already contains the peptide because it's a HashSet (no duplicates are allowed)
// subsetList.Add(peptide);

// // this peptide is in the return list regardless of whether or not it was actually just added;
// // we just want to guarantee this fraction has 500 peptides in the return list to normalize with
// peptidesAddedPerFraction[fraction]++;
// }
// }
// }

// return subsetList.ToList();
//}

/// <summary>
/// Calculates normalization factors for fractionated data using a bounded Nelder-Mead optimization algorithm.
/// Called by NormalizeFractions().
Expand Down
40 changes: 40 additions & 0 deletions mzLib/FlashLFQ/Interfaces/IQuantifiablePeptide.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
using System.Collections.Generic;

namespace FlashLFQ
{
/// <summary>
/// Defines the interface for peptides that can be quantified
/// </summary>
public interface IQuantifiablePeptide
{
/// <summary>
/// The peptide sequence
/// </summary>
string Sequence { get; }

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably use BaseSequence or FullSequence so it maps better with our existing objects.


/// <summary>
/// Whether this peptide should be used for protein quantification
/// </summary>
bool UseForProteinQuant { get; }

/// <summary>
/// The protein groups this peptide belongs to
/// </summary>
IEnumerable<IQuantifiableProteinGroup> ProteinGroups { get; }

/// <summary>
/// Gets the intensity for a specific file
/// </summary>
double GetIntensity(IQuantifiableSpectraFile fileInfo);

/// <summary>
/// Gets the detection type for a specific file
/// </summary>
DetectionType GetDetectionType(IQuantifiableSpectraFile fileInfo);

/// <summary>
/// Determines if this peptide has unambiguous quantification
/// </summary>
bool UnambiguousPeptideQuant();
}
}
25 changes: 25 additions & 0 deletions mzLib/FlashLFQ/Interfaces/IQuantifiableProteinGroup.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
using System.Collections.Generic;

namespace FlashLFQ
{
/// <summary>
/// Defines the interface for protein groups that can be quantified
/// </summary>
public interface IQuantifiableProteinGroup
{
/// <summary>
/// The protein group name/identifier
/// </summary>
string ProteinGroupName { get; }

/// <summary>
/// Gets the intensity for a specific file
/// </summary>
double GetIntensity(IQuantifiableSpectraFile fileInfo);

/// <summary>
/// Sets the intensity for a specific file
/// </summary>
void SetIntensity(IQuantifiableSpectraFile fileInfo, double intensity);
}
}
33 changes: 33 additions & 0 deletions mzLib/FlashLFQ/Interfaces/IQuantifiableSpectraFile.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
namespace FlashLFQ
{
/// <summary>
/// Defines the interface for spectra file information
/// </summary>
public interface IQuantifiableSpectraFile
{
/// <summary>
/// The filename without extension
/// </summary>
string FilenameWithoutExtension { get; }

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cap on Name


/// <summary>
/// The experimental condition
/// </summary>
string Condition { get; }

/// <summary>
/// The biological replicate number
/// </summary>
int BiologicalReplicate { get; }

/// <summary>
/// The technical replicate number
/// </summary>
int TechnicalReplicate { get; }

/// <summary>
/// The fraction number
/// </summary>
int Fraction { get; }
}
}
27 changes: 26 additions & 1 deletion mzLib/FlashLFQ/Peptide.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

namespace FlashLFQ
{
public class Peptide
public class Peptide : IQuantifiablePeptide
{
public readonly string Sequence;
public readonly string BaseSequence;
Expand Down Expand Up @@ -289,5 +289,30 @@ public bool UnambiguousPeptideQuant()
{
return Intensities.Values.Any(p => p > 0) && DetectionTypes.Values.Any(p => p != DetectionType.MSMSAmbiguousPeakfinding);
}

// Explicit interface implementations for IQuantifiablePeptide
string IQuantifiablePeptide.Sequence => Sequence;
bool IQuantifiablePeptide.UseForProteinQuant => UseForProteinQuant;
IEnumerable<IQuantifiableProteinGroup> IQuantifiablePeptide.ProteinGroups => ProteinGroups.Cast<IQuantifiableProteinGroup>();

double IQuantifiablePeptide.GetIntensity(IQuantifiableSpectraFile fileInfo)
{
if (fileInfo is SpectraFileInfo spectraFileInfo)
{
return GetIntensity(spectraFileInfo);
}
throw new System.ArgumentException("File info must be of type SpectraFileInfo");
}

DetectionType IQuantifiablePeptide.GetDetectionType(IQuantifiableSpectraFile fileInfo)
{
if (fileInfo is SpectraFileInfo spectraFileInfo)
{
return GetDetectionType(spectraFileInfo);
}
throw new System.ArgumentException("File info must be of type SpectraFileInfo");
}

bool IQuantifiablePeptide.UnambiguousPeptideQuant() => UnambiguousPeptideQuant();
}
}
26 changes: 25 additions & 1 deletion mzLib/FlashLFQ/ProteinGroup.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

namespace FlashLFQ
{
public class ProteinGroup
public class ProteinGroup : IQuantifiableProteinGroup
{
public readonly string ProteinGroupName;
public readonly string GeneName;
Expand Down Expand Up @@ -111,5 +111,29 @@ public override int GetHashCode()
{
return ProteinGroupName.GetHashCode();
}

// Explicit interface implementation for IQuantifiableProteinGroup
string IQuantifiableProteinGroup.ProteinGroupName => ProteinGroupName;

double IQuantifiableProteinGroup.GetIntensity(IQuantifiableSpectraFile fileInfo)
{
if (fileInfo is SpectraFileInfo spectraFileInfo)
{
return GetIntensity(spectraFileInfo);
}
throw new System.ArgumentException("File info must be of type SpectraFileInfo");
}

void IQuantifiableProteinGroup.SetIntensity(IQuantifiableSpectraFile fileInfo, double intensity)
{
if (fileInfo is SpectraFileInfo spectraFileInfo)
{
SetIntensity(spectraFileInfo, intensity);
}
else
{
throw new System.ArgumentException("File info must be of type SpectraFileInfo");
}
}
}
}

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Love the dependency injection and inversion of control here

Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
using Easy.Common.Extensions;
using MathNet.Numerics.Statistics;
using System;
using System.Collections.Generic;
using System.Linq;

namespace FlashLFQ
{
/// <summary>
/// Defines the strategy interface for protein quantification algorithms
/// </summary>
public interface IProteinQuantificationStrategy<TPeptide, TProteinGroup, TSpectraFile>
where TPeptide : IQuantifiablePeptide
where TProteinGroup : IQuantifiableProteinGroup
where TSpectraFile : IQuantifiableSpectraFile
{
/// <summary>
/// Quantifies proteins based on their constituent peptides
/// </summary>
void QuantifyProteins(
Dictionary<TProteinGroup, List<TPeptide>> proteinGroupToPeptides,
List<TSpectraFile> spectraFiles,
bool useSharedPeptides);
}

/// <summary>
/// Generic protein quantification engine that uses a strategy pattern
/// to support different quantification algorithms (Top3, MedianPolish, etc.)
/// </summary>
public class GenericProteinQuantificationEngine<TPeptide, TProteinGroup, TSpectraFile>
where TPeptide : IQuantifiablePeptide
where TProteinGroup : IQuantifiableProteinGroup
where TSpectraFile : IQuantifiableSpectraFile
{
private readonly IProteinQuantificationStrategy<TPeptide, TProteinGroup, TSpectraFile> _strategy;
private readonly Dictionary<string, TPeptide> _peptideModifiedSequences;
private readonly Dictionary<string, TProteinGroup> _proteinGroups;
private readonly List<TSpectraFile> _spectraFiles;

public GenericProteinQuantificationEngine(
IProteinQuantificationStrategy<TPeptide, TProteinGroup, TSpectraFile> strategy,
Dictionary<string, TPeptide> peptideModifiedSequences,
Dictionary<string, TProteinGroup> proteinGroups,
List<TSpectraFile> spectraFiles)
{
_strategy = strategy ?? throw new ArgumentNullException(nameof(strategy));
_peptideModifiedSequences = peptideModifiedSequences ?? throw new ArgumentNullException(nameof(peptideModifiedSequences));
_proteinGroups = proteinGroups ?? throw new ArgumentNullException(nameof(proteinGroups));
_spectraFiles = spectraFiles ?? throw new ArgumentNullException(nameof(spectraFiles));
}

/// <summary>
/// Runs the protein quantification using the configured strategy
/// </summary>
public void Run(bool useSharedPeptides = false)
{
// Reset protein intensities to 0
foreach (var proteinGroup in _proteinGroups)
{
foreach (TSpectraFile file in _spectraFiles)
{
proteinGroup.Value.SetIntensity(file, 0);
}
}

// Associate peptides with proteins
Dictionary<TProteinGroup, List<TPeptide>> proteinGroupToPeptides =
new Dictionary<TProteinGroup, List<TPeptide>>();

List<TPeptide> peptides = _peptideModifiedSequences.Values
.Where(p => p.UnambiguousPeptideQuant())
.ToList();

foreach (TPeptide peptide in peptides)
{
if (!peptide.UseForProteinQuant)
{
continue;
}

// Handle shared peptides based on useSharedPeptides parameter
bool isSharedPeptide = peptide.ProteinGroups.Count() > 1;
if (!useSharedPeptides && isSharedPeptide)
{
continue;
}

foreach (var pg in peptide.ProteinGroups)
{
TProteinGroup proteinGroup = (TProteinGroup)pg;

if (proteinGroupToPeptides.TryGetValue(proteinGroup, out var peptidesForThisProtein))
{
peptidesForThisProtein.Add(peptide);
}
else
{
proteinGroupToPeptides.Add(proteinGroup, new List<TPeptide> { peptide });
}
}
}

// Execute the quantification strategy
_strategy.QuantifyProteins(proteinGroupToPeptides, _spectraFiles, useSharedPeptides);
}
}
}
Loading
Loading