Histogram.cs 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using System.Threading.Tasks;
  6. namespace ScottPlot
  7. {
  8. [Obsolete("ScottPlot.Histogram is now ScottPlot.Statistics.Histogram", true)]
  9. public class Histogram
  10. {
  11. public Histogram(double[] values, double? min = null, double? max = null, double? binSize = null, double? binCount = null, bool ignoreOutOfBounds = true)
  12. {
  13. throw new NotImplementedException("ScottPlot.Histogram is now ScottPlot.Statistics.Histogram");
  14. }
  15. };
  16. }
  17. namespace ScottPlot.Statistics
  18. {
  19. // TODO: This class needs refactoring to improve names.
  20. // Use numpy.histogram as a reference: https://numpy.org/doc/stable/reference/generated/numpy.histogram.html
  21. public class Histogram
  22. {
  23. /// <summary>
  24. /// Lower edges of bins used to create the histogram
  25. /// </summary>
  26. public readonly double[] bins;
  27. /// <summary>
  28. /// Total number of values in each bin.
  29. /// </summary>
  30. public readonly double[] counts;
  31. /// <summary>
  32. /// Fractional number of values in each bin.
  33. /// The total of all values in this array is 1.0.
  34. /// </summary>
  35. public readonly double[] countsFrac;
  36. /// <summary>
  37. /// Cumulative total number of values in each bin.
  38. /// The returned array will start near 0.0 and end near 1.0.
  39. /// </summary>
  40. public readonly double[] cumulativeCounts;
  41. /// <summary>
  42. /// Probability density (fraction) for each bin based on the mean and standard deviation of the population.
  43. /// The sum of all these values is 1.0
  44. /// </summary>
  45. public readonly double[] probability;
  46. /// <summary>
  47. /// This is the probability density curve normalized to its peak, so its maximum value is 1.0
  48. /// </summary>
  49. public readonly double[] countsFracCurve;
  50. /// <summary>
  51. /// Cumulative probability density fraction for each bin
  52. /// </summary>
  53. public readonly double[] cumulativeFrac;
  54. /// <summary>
  55. /// Distance between each bin
  56. /// </summary>
  57. public readonly double binSize;
  58. /// <summary>
  59. /// Population mean
  60. /// </summary>
  61. public readonly double mean;
  62. /// <summary>
  63. /// Population standard deviation
  64. /// </summary>
  65. public readonly double stdev;
  66. /// <summary>
  67. /// Compute the histogram of a set of data.
  68. /// Bins are identically sized and evenly spaced.
  69. /// </summary>
  70. /// <param name="values">input data</param>
  71. /// <param name="min">manually-defined lower edge of first bin</param>
  72. /// <param name="max">manually-defined upper edge of last bin</param>
  73. /// <param name="binSize">manually-defined width of each bin</param>
  74. /// <param name="binCount">resize bins as needed so this number of bins is achieved</param>
  75. /// <param name="ignoreOutOfBounds">if True, values below min or above max will be ignored</param>
  76. public Histogram(double[] values, double? min = null, double? max = null, double? binSize = null, double? binCount = null, bool ignoreOutOfBounds = true)
  77. {
  78. var population = new Population(values);
  79. mean = population.mean;
  80. stdev = population.stDev;
  81. min = (min is null) ? population.minus3stDev : min.Value;
  82. max = (max is null) ? population.plus3stDev : max.Value;
  83. if (min >= max)
  84. throw new ArgumentException($"max ({max}) cannot be greater than min ({min})");
  85. if ((binCount != null) && (binSize != null))
  86. throw new ArgumentException("binCount and binSize cannot both be given");
  87. double defaultBinCount = 100;
  88. double span = max.Value - min.Value;
  89. if (binSize == null)
  90. {
  91. if (binCount == null)
  92. binSize = span / defaultBinCount;
  93. else
  94. binSize = span / binCount;
  95. }
  96. if (ignoreOutOfBounds == false)
  97. {
  98. // add an extra bin on each side of the histogram
  99. min -= binSize;
  100. max += binSize;
  101. }
  102. bins = BinBySize((double)binSize, (double)min, (double)max);
  103. this.binSize = bins[1] - bins[0];
  104. counts = GetHistogram(values, bins, ignoreOutOfBounds);
  105. cumulativeCounts = GetCumulative(counts);
  106. countsFrac = GetNormalized(counts);
  107. cumulativeFrac = GetCumulative(countsFrac);
  108. countsFracCurve = population.GetDistribution(bins, false);
  109. probability = population.GetDistribution(bins, true);
  110. }
  111. private static double[] GetNormalized(double[] values)
  112. {
  113. double[] countsFrac = new double[values.Length];
  114. for (int i = 0; i < countsFrac.Length; i++)
  115. countsFrac[i] = values[i] / values.Sum();
  116. return countsFrac;
  117. }
  118. private static double[] GetCumulative(double[] values)
  119. {
  120. double[] cumulaltive = new double[values.Length];
  121. cumulaltive[0] = values[0];
  122. for (int i = 1; i < cumulaltive.Length; i++)
  123. cumulaltive[i] = cumulaltive[i - 1] + values[i];
  124. return cumulaltive;
  125. }
  126. public static double[] BinBySize(double binSize, double min, double max)
  127. {
  128. double span = (double)max - (double)min;
  129. int binCount = (int)(span / binSize);
  130. double[] bins = new double[binCount];
  131. for (int i = 0; i < bins.Length; i++)
  132. bins[i] = i * (double)binSize + (double)min;
  133. return bins;
  134. }
  135. private static double[] GetHistogram(double[] values, double[] bins, bool ignoreOutOfBounds = true)
  136. {
  137. double binSize = bins[1] - bins[0];
  138. double[] counts = new double[bins.Length];
  139. for (int i = 0; i < values.Length; i++)
  140. {
  141. int index = (int)((values[i] - bins[0]) / binSize);
  142. if (index < 0)
  143. {
  144. if (!ignoreOutOfBounds)
  145. counts[0] += 1;
  146. }
  147. else if (index >= counts.Length)
  148. {
  149. if (!ignoreOutOfBounds)
  150. counts[counts.Length - 1] += 1;
  151. }
  152. else
  153. {
  154. counts[index] += 1;
  155. }
  156. }
  157. return counts;
  158. }
  159. }
  160. }