Regression.cs 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. using System;
  2. using System.Linq;
  3. namespace ScottPlot.Statistics
  4. {
  5. public class LinearRegressionLine
  6. {
  7. public readonly double slope;
  8. public readonly double offset;
  9. public readonly double rSquared;
  10. private readonly int pointCount;
  11. private readonly double[] xs;
  12. private readonly double[] ys;
  13. public LinearRegressionLine(double[] xs, double[] ys)
  14. {
  15. if ((xs.Length != ys.Length) || (xs.Length < 2))
  16. {
  17. throw new ArgumentException("xs and ys must be the same length and have at least 2 points");
  18. }
  19. pointCount = ys.Length;
  20. this.xs = xs;
  21. this.ys = ys;
  22. (slope, offset, rSquared) = GetCoefficients(xs, ys);
  23. }
  24. public LinearRegressionLine(double[] ys, double firstX, double xSpacing)
  25. {
  26. // this constructor doesn't require an X array to be passed in at all
  27. pointCount = ys.Length;
  28. double[] xs = new double[pointCount];
  29. for (int i = 0; i < pointCount; i++)
  30. {
  31. xs[i] = firstX + xSpacing * i;
  32. }
  33. this.xs = xs;
  34. this.ys = ys;
  35. (slope, offset, rSquared) = GetCoefficients(xs, ys);
  36. }
  37. public override string ToString()
  38. {
  39. return $"Linear fit for {pointCount} points: Y = {slope}x + {offset} (R² = {rSquared})";
  40. }
  41. private static (double, double, double) GetCoefficients(double[] xs, double[] ys)
  42. {
  43. double sumXYResidual = 0;
  44. double sumXSquareResidual = 0;
  45. double meanX = xs.Average();
  46. double meanY = ys.Average();
  47. for (int i = 0; i < xs.Length; i++)
  48. {
  49. sumXYResidual += (xs[i] - meanX) * (ys[i] - meanY);
  50. sumXSquareResidual += (xs[i] - meanX) * (xs[i] - meanX);
  51. }
  52. // Note: least-squares regression line always passes through (x̅,y̅)
  53. double slope = sumXYResidual / sumXSquareResidual;
  54. double offset = meanY - (slope * meanX);
  55. // calcualte R squared (https://en.wikipedia.org/wiki/Coefficient_of_determination)
  56. double ssTot = 0;
  57. double ssRes = 0;
  58. for (int i = 0; i < ys.Length; i++)
  59. {
  60. double thisY = ys[i];
  61. double distanceFromMeanSquared = Math.Pow(thisY - meanY, 2);
  62. ssTot += distanceFromMeanSquared;
  63. double modelY = slope * xs[i] + offset;
  64. double distanceFromModelSquared = Math.Pow(thisY - modelY, 2);
  65. ssRes += distanceFromModelSquared;
  66. }
  67. double rSquared = 1 - ssRes / ssTot;
  68. return (slope, offset, rSquared);
  69. }
  70. public double GetValueAt(double x)
  71. {
  72. return offset + slope * x;
  73. }
  74. public double[] GetValues()
  75. {
  76. double[] values = new double[pointCount];
  77. for (int i = 0; i < pointCount; i++)
  78. {
  79. values[i] = GetValueAt(xs[i]);
  80. }
  81. return values;
  82. }
  83. public double[] GetResiduals()
  84. {
  85. // the residual is the difference between the actual and predicted value
  86. double[] residuals = new double[ys.Length];
  87. for (int i = 0; i < ys.Length; i++)
  88. {
  89. residuals[i] = ys[i] - GetValueAt(xs[i]);
  90. }
  91. return residuals;
  92. }
  93. }
  94. }