1. /* ===========================================================
  2. * JFreeChart : a free chart library for the Java(tm) platform
  3. * ===========================================================
  4. *
  5. * (C) Copyright 2000-2005, by Object Refinery Limited and Contributors.
  6. *
  7. * Project Info: http://www.jfree.org/jfreechart/index.html
  8. *
  9. * This library is free software; you can redistribute it and/or modify it under the terms
  10. * of the GNU Lesser General Public License as published by the Free Software Foundation;
  11. * either version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
  14. * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. * See the GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License along with this
  18. * library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
  19. * Boston, MA 02111-1307, USA.
  20. *
  21. * [Java is a trademark or registered trademark of Sun Microsystems, Inc.
  22. * in the United States and other countries.]
  23. *
  24. * ---------------
  25. * Statistics.java
  26. * ---------------
  27. * (C) Copyright 2000-2005, by Matthew Wright and Contributors.
  28. *
  29. * Original Author: Matthew Wright;
  30. * Contributor(s): David Gilbert (for Object Refinery Limited);
  31. *
  32. * $Id: Statistics.java,v 1.3 2005/01/14 17:30:59 mungady Exp $
  33. *
  34. * Changes (from 08-Nov-2001)
  35. * --------------------------
  36. * 08-Nov-2001 : Added standard header and tidied Javadoc comments (DG);
  37. * Moved from JFreeChart to package com.jrefinery.data.* in JCommon class
  38. * library (DG);
  39. * 24-Jun-2002 : Removed unnecessary local variable (DG);
  40. * 07-Oct-2002 : Fixed errors reported by Checkstyle (DG);
  41. * 26-May-2004 : Moved calculateMean() method from BoxAndWhiskerCalculator (DG);
  42. * 02-Jun-2004 : Fixed bug in calculateMedian() method (DG);
  43. * 11-Jan-2005 : Removed deprecated code in preparation for the 1.0.0 release (DG);
  44. *
  45. */
  46. package org.jfree.data.statistics;
  47. import java.util.ArrayList;
  48. import java.util.Collection;
  49. import java.util.Collections;
  50. import java.util.Iterator;
  51. import java.util.List;
  52. /**
  53. * A utility class that provides some simple statistical functions.
  54. */
  55. public abstract class Statistics {
  56. /**
  57. * Returns the mean of an array of numbers.
  58. *
  59. * @param values the values (<code>null</code> permitted, returns <code>Double.NaN</code>).
  60. *
  61. * @return The mean.
  62. */
  63. public static double calculateMean(Number[] values) {
  64. double result = Double.NaN;
  65. if (values != null && values.length > 0) {
  66. double sum = 0.0;
  67. int counter = 0;
  68. for (; counter < values.length; counter++) {
  69. sum = sum + values[counter].doubleValue();
  70. }
  71. result = (sum / counter);
  72. }
  73. return result;
  74. }
  75. /**
  76. * Returns the mean of a collection of <code>Number</code> objects.
  77. *
  78. * @param values the values (<code>null</code> permitted, returns <code>Double.NaN</code>).
  79. *
  80. * @return The mean.
  81. */
  82. public static double calculateMean(Collection values) {
  83. double result = Double.NaN;
  84. int count = 0;
  85. double total = 0.0;
  86. Iterator iterator = values.iterator();
  87. while (iterator.hasNext()) {
  88. Object object = iterator.next();
  89. if (object != null && object instanceof Number) {
  90. Number number = (Number) object;
  91. total = total + number.doubleValue();
  92. count = count + 1;
  93. }
  94. }
  95. if (count > 0) {
  96. result = total / count;
  97. }
  98. return result;
  99. }
  100. /**
  101. * Calculates the median for a list of values (<code>Number</code> objects). The list
  102. * of values will be sorted first.
  103. *
  104. * @param values the values.
  105. *
  106. * @return The median.
  107. */
  108. public static double calculateMedian(List values) {
  109. return calculateMedian(values, true);
  110. }
  111. /**
  112. * Calculates the median for a list of values (<code>Number</code> objects) that are assumed
  113. * to be in ascending order.
  114. *
  115. * @param values the values.
  116. * @param copyAndSort a flag that controls whether the list of values is copied and sorted.
  117. *
  118. * @return The median.
  119. */
  120. public static double calculateMedian(List values, boolean copyAndSort) {
  121. double result = Double.NaN;
  122. if (values != null) {
  123. if (copyAndSort) {
  124. int itemCount = values.size();
  125. List copy = new ArrayList(itemCount);
  126. for (int i = 0; i < itemCount; i++) {
  127. copy.add(i, values.get(i));
  128. }
  129. Collections.sort(copy);
  130. values = copy;
  131. }
  132. int count = values.size();
  133. if (count > 0) {
  134. if (count % 2 == 1) {
  135. if (count > 1) {
  136. Number value = (Number) values.get((count - 1) / 2);
  137. result = value.doubleValue();
  138. }
  139. else {
  140. Number value = (Number) values.get(0);
  141. result = value.doubleValue();
  142. }
  143. }
  144. else {
  145. Number value1 = (Number) values.get(count / 2 - 1);
  146. Number value2 = (Number) values.get(count / 2);
  147. result = (value1.doubleValue() + value2.doubleValue()) / 2.0;
  148. }
  149. }
  150. }
  151. return result;
  152. }
  153. /**
  154. * Calculates the median for a sublist within a list of values (<code>Number</code> objects).
  155. *
  156. * @param values the values (in any order).
  157. * @param start the start index.
  158. * @param end the end index.
  159. *
  160. * @return The median.
  161. */
  162. public static double calculateMedian(List values, int start, int end) {
  163. return calculateMedian(values, start, end, true);
  164. }
  165. /**
  166. * Calculates the median for a sublist within a list of values (<code>Number</code> objects).
  167. * The entire list will be sorted if the <code>ascending</code< argument is <code>false</code>.
  168. *
  169. * @param values the values.
  170. * @param start the start index.
  171. * @param end the end index.
  172. * @param copyAndSort a flag that that controls whether the list of values is copied and
  173. * sorted.
  174. *
  175. * @return The median.
  176. */
  177. public static double calculateMedian(List values, int start, int end,
  178. boolean copyAndSort) {
  179. double result = Double.NaN;
  180. if (copyAndSort) {
  181. List working = new ArrayList(end - start + 1);
  182. for (int i = start; i <= end; i++) {
  183. working.add(values.get(i));
  184. }
  185. Collections.sort(working);
  186. result = calculateMedian(working, false);
  187. }
  188. else {
  189. int count = end - start + 1;
  190. if (count > 0) {
  191. if (count % 2 == 1) {
  192. if (count > 1) {
  193. Number value = (Number) values.get(start + (count - 1) / 2);
  194. result = value.doubleValue();
  195. }
  196. else {
  197. Number value = (Number) values.get(start);
  198. result = value.doubleValue();
  199. }
  200. }
  201. else {
  202. Number value1 = (Number) values.get(start + count / 2 - 1);
  203. Number value2 = (Number) values.get(start + count / 2);
  204. result = (value1.doubleValue() + value2.doubleValue()) / 2.0;
  205. }
  206. }
  207. }
  208. return result;
  209. }
  210. /**
  211. * Returns the standard deviation of a set of numbers.
  212. *
  213. * @param data the data.
  214. *
  215. * @return The standard deviation of a set of numbers.
  216. */
  217. public static double getStdDev(Number[] data) {
  218. double avg = calculateMean(data);
  219. double sum = 0.0;
  220. for (int counter = 0; counter < data.length; counter++) {
  221. double diff = data[counter].doubleValue() - avg;
  222. sum = sum + diff * diff;
  223. }
  224. return Math.sqrt(sum / (data.length - 1));
  225. }
  226. /**
  227. * Fits a straight line to a set of (x, y) data, returning the slope and
  228. * intercept.
  229. *
  230. * @param xData the x-data.
  231. * @param yData the y-data.
  232. *
  233. * @return A double array with the intercept in [0] and the slope in [1].
  234. */
  235. public static double[] getLinearFit(Number[] xData, Number[] yData) {
  236. // check arguments...
  237. if (xData.length != yData.length) {
  238. throw new IllegalArgumentException(
  239. "Statistics.getLinearFit(): array lengths must be equal.");
  240. }
  241. double[] result = new double[2];
  242. // slope
  243. result[1] = getSlope(xData, yData);
  244. // intercept
  245. result[0] = calculateMean(yData) - result[1] * calculateMean(xData);
  246. return result;
  247. }
  248. /**
  249. * Finds the slope of a regression line using least squares.
  250. *
  251. * @param xData an array of Numbers (the x values).
  252. * @param yData an array of Numbers (the y values).
  253. *
  254. * @return The slope.
  255. */
  256. public static double getSlope(Number[] xData, Number[] yData) {
  257. // check arguments...
  258. if (xData.length != yData.length) {
  259. throw new IllegalArgumentException(
  260. "Statistics.getSlope(...): array lengths must be equal.");
  261. }
  262. // ********* stat function for linear slope ********
  263. // y = a + bx
  264. // a = ybar - b * xbar
  265. // sum(x * y) - (sum (x) * sum(y)) / n
  266. // b = ------------------------------------
  267. // sum (x^2) - (sum(x)^2 / n
  268. // *************************************************
  269. // sum of x, x^2, x * y, y
  270. double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0;
  271. int counter;
  272. for (counter = 0; counter < xData.length; counter++) {
  273. sx = sx + xData[counter].doubleValue();
  274. sxx = sxx + Math.pow(xData[counter].doubleValue(), 2);
  275. sxy = sxy + yData[counter].doubleValue() * xData[counter].doubleValue();
  276. sy = sy + yData[counter].doubleValue();
  277. }
  278. return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter);
  279. }
  280. /**
  281. * Calculates the correlation between two datasets. Both arrays should contain the same number
  282. * of items. Null values are treated as zero.
  283. * <P>
  284. * Information about the correlation calculation was obtained from:
  285. *
  286. * http://trochim.human.cornell.edu/kb/statcorr.htm
  287. *
  288. * @param data1 the first dataset.
  289. * @param data2 the second dataset.
  290. *
  291. * @return The correlation.
  292. */
  293. public static double getCorrelation(Number[] data1, Number[] data2) {
  294. if (data1 == null) {
  295. throw new IllegalArgumentException("Null 'data1' argument.");
  296. }
  297. if (data2 == null) {
  298. throw new IllegalArgumentException("Null 'data2' argument.");
  299. }
  300. if (data1.length != data2.length) {
  301. throw new IllegalArgumentException(
  302. "'data1' and 'data2' arrays must have same length."
  303. );
  304. }
  305. int n = data1.length;
  306. double sumX = 0.0;
  307. double sumY = 0.0;
  308. double sumX2 = 0.0;
  309. double sumY2 = 0.0;
  310. double sumXY = 0.0;
  311. for (int i = 0; i < n; i++) {
  312. double x = 0.0;
  313. if (data1[i] != null) {
  314. x = data1[i].doubleValue();
  315. }
  316. double y = 0.0;
  317. if (data2[i] != null) {
  318. y = data2[i].doubleValue();
  319. }
  320. sumX = sumX + x;
  321. sumY = sumY + y;
  322. sumXY = sumXY + (x * y);
  323. sumX2 = sumX2 + (x * x);
  324. sumY2 = sumY2 + (y * y);
  325. }
  326. return (n * sumXY - sumX * sumY)
  327. / Math.pow((n * sumX2 - sumX * sumX) * (n * sumY2 - sumY * sumY), 0.5);
  328. }
  329. /**
  330. * Returns a data set for a moving average on the data set passed in.
  331. *
  332. * @param xData an array of the x data.
  333. * @param yData an array of the y data.
  334. * @param period the number of data points to average
  335. *
  336. * @return a double[][] the length of the data set in the first dimension,
  337. * with two doubles for x and y in the second dimension
  338. */
  339. public static double[][] getMovingAverage(Number[] xData,
  340. Number[] yData,
  341. int period) {
  342. // check arguments...
  343. if (xData.length != yData.length) {
  344. throw new IllegalArgumentException(
  345. "Statistics.getMovingAverage(...): array lengths must be equal."
  346. );
  347. }
  348. if (period > xData.length) {
  349. throw new IllegalArgumentException(
  350. "Statistics.getMovingAverage(...): period can't be longer than dataset."
  351. );
  352. }
  353. double[][] result = new double[xData.length - period][2];
  354. for (int i = 0; i < result.length; i++) {
  355. result[i][0] = xData[i + period].doubleValue();
  356. // holds the moving average sum
  357. double sum = 0.0;
  358. for (int j = 0; j < period; j++) {
  359. sum += yData[i + j].doubleValue();
  360. }
  361. sum = sum / period;
  362. result[i][1] = sum;
  363. }
  364. return result;
  365. }
  366. }