2022-07-05 15:55:47 +02:00
|
|
|
<?php
|
|
|
|
|
2024-02-02 15:16:11 +00:00
|
|
|
declare(strict_types=1);
|
|
|
|
|
2022-07-05 15:55:47 +02:00
|
|
|
namespace DNW\Skills\Numerics;
|
2010-08-28 22:05:41 -04:00
|
|
|
|
2010-10-08 21:44:36 -04:00
|
|
|
/**
|
|
|
|
* Computes Gaussian (bell curve) values.
|
|
|
|
*
|
2023-08-01 13:53:19 +00:00
|
|
|
* @author Jeff Moser <jeff@moserware.com>
|
|
|
|
* @copyright 2010 Jeff Moser
|
2010-10-08 21:44:36 -04:00
|
|
|
*/
|
2024-03-19 14:10:11 +00:00
|
|
|
class GaussianDistribution
|
2010-08-28 22:05:41 -04:00
|
|
|
{
|
2024-03-19 12:49:14 +00:00
|
|
|
private const DEFAULT_STANDARD_DEVIATION = 1.0;
|
2024-03-19 14:38:55 +00:00
|
|
|
|
2024-03-19 12:49:14 +00:00
|
|
|
private const DEFAULT_MEAN = 0.0;
|
2024-03-19 14:38:55 +00:00
|
|
|
|
2024-02-26 09:36:05 +00:00
|
|
|
/**
|
|
|
|
* Square Root 2π.
|
|
|
|
* Precalculated constant for performance reasons
|
|
|
|
* sqrt(2*pi)
|
2024-02-27 08:08:48 +00:00
|
|
|
*
|
2024-02-26 09:43:53 +00:00
|
|
|
* @link https://www.wolframalpha.com/input?i=sqrt%282*pi%29 Source of value
|
2024-02-26 09:36:05 +00:00
|
|
|
*/
|
2024-02-15 10:25:28 +00:00
|
|
|
private const M_SQRT_2_PI = 2.5066282746310005024157652848110452530069867406099383166299235763;
|
2024-02-21 13:48:37 +00:00
|
|
|
|
2024-02-26 09:36:05 +00:00
|
|
|
/**
|
|
|
|
* Log of Square Root 2π.
|
|
|
|
* Precalculated constant for performance reasons
|
|
|
|
* log(sqrt(2*pi))
|
2024-02-27 08:08:48 +00:00
|
|
|
*
|
2024-02-26 09:43:53 +00:00
|
|
|
* @link https://www.wolframalpha.com/input?i=log%28sqrt%282*pi%29%29 Source of value
|
2024-02-26 09:36:05 +00:00
|
|
|
*/
|
2024-02-15 10:25:28 +00:00
|
|
|
private const M_LOG_SQRT_2_PI = 0.9189385332046727417803297364056176398613974736377834128171515404;
|
2024-02-21 13:48:37 +00:00
|
|
|
|
2010-10-08 21:44:36 -04:00
|
|
|
// precision and precisionMean are used because they make multiplying and dividing simpler
|
2024-03-19 14:10:11 +00:00
|
|
|
// (see the accompanying math paper for more details)
|
2024-03-19 12:49:14 +00:00
|
|
|
private float $precision = 1.0;
|
2022-07-05 15:55:47 +02:00
|
|
|
|
2024-03-19 12:49:14 +00:00
|
|
|
private float $precisionMean = 0.0;
|
2022-07-05 15:55:47 +02:00
|
|
|
|
2024-03-19 12:49:14 +00:00
|
|
|
private float $variance = 1.0;
|
2010-10-08 21:44:36 -04:00
|
|
|
|
2024-03-19 12:49:14 +00:00
|
|
|
public function __construct(private float $mean = self::DEFAULT_MEAN, private float $standardDeviation = self::DEFAULT_STANDARD_DEVIATION)
|
2016-05-24 14:10:39 +02:00
|
|
|
{
|
2024-03-19 14:38:55 +00:00
|
|
|
if ($mean == self::DEFAULT_MEAN && $standardDeviation == self::DEFAULT_STANDARD_DEVIATION) {
|
2024-03-19 12:49:14 +00:00
|
|
|
//Use all the defaults
|
|
|
|
return;
|
|
|
|
}
|
2010-09-30 08:25:31 -04:00
|
|
|
|
2024-03-19 12:49:14 +00:00
|
|
|
$this->variance = BasicMath::square($standardDeviation);
|
2023-08-01 14:02:12 +00:00
|
|
|
if ($this->variance != 0) {
|
|
|
|
$this->precision = 1.0 / $this->variance;
|
|
|
|
$this->precisionMean = $this->precision * $this->mean;
|
2016-05-24 14:10:39 +02:00
|
|
|
} else {
|
2023-08-01 14:02:12 +00:00
|
|
|
$this->precision = \INF;
|
2010-09-30 08:25:31 -04:00
|
|
|
|
2023-08-01 14:02:12 +00:00
|
|
|
$this->precisionMean = $this->mean == 0 ? 0 : \INF;
|
2010-09-30 08:25:31 -04:00
|
|
|
}
|
2016-05-24 14:10:39 +02:00
|
|
|
}
|
|
|
|
|
2023-08-01 12:13:24 +00:00
|
|
|
public function getMean(): float
|
2010-08-28 22:05:41 -04:00
|
|
|
{
|
2023-08-01 14:02:12 +00:00
|
|
|
return $this->mean;
|
2010-08-28 22:05:41 -04:00
|
|
|
}
|
2016-05-24 14:10:39 +02:00
|
|
|
|
2023-08-01 12:13:24 +00:00
|
|
|
public function getVariance(): float
|
2010-08-28 22:05:41 -04:00
|
|
|
{
|
2023-08-01 14:02:12 +00:00
|
|
|
return $this->variance;
|
2010-08-28 22:05:41 -04:00
|
|
|
}
|
2016-05-24 14:10:39 +02:00
|
|
|
|
2023-08-01 12:13:24 +00:00
|
|
|
public function getStandardDeviation(): float
|
2010-08-28 22:05:41 -04:00
|
|
|
{
|
2023-08-01 14:02:12 +00:00
|
|
|
return $this->standardDeviation;
|
2010-08-28 22:05:41 -04:00
|
|
|
}
|
2010-09-18 11:11:44 -04:00
|
|
|
|
2023-08-01 12:13:24 +00:00
|
|
|
public function getPrecision(): float
|
2010-09-18 11:11:44 -04:00
|
|
|
{
|
2023-08-01 14:02:12 +00:00
|
|
|
return $this->precision;
|
2010-09-18 11:11:44 -04:00
|
|
|
}
|
|
|
|
|
2023-08-01 12:13:24 +00:00
|
|
|
public function getPrecisionMean(): float
|
2010-09-18 11:11:44 -04:00
|
|
|
{
|
2023-08-01 14:02:12 +00:00
|
|
|
return $this->precisionMean;
|
2010-09-18 11:11:44 -04:00
|
|
|
}
|
2016-05-24 14:10:39 +02:00
|
|
|
|
2024-02-26 09:36:05 +00:00
|
|
|
/**
|
2024-02-27 08:08:48 +00:00
|
|
|
* Great derivation of this is at
|
|
|
|
*
|
|
|
|
* @link http://www.astro.psu.edu/~mce/A451_2/A451/downloads/notes0.pdf
|
2024-02-26 09:36:05 +00:00
|
|
|
*/
|
2023-08-01 12:13:24 +00:00
|
|
|
public function getNormalizationConstant(): float
|
2016-05-24 14:10:39 +02:00
|
|
|
{
|
2024-02-15 10:12:45 +00:00
|
|
|
return 1.0 / (self::M_SQRT_2_PI * $this->standardDeviation);
|
2010-08-28 22:05:41 -04:00
|
|
|
}
|
2016-05-24 14:10:39 +02:00
|
|
|
|
2023-08-01 12:13:24 +00:00
|
|
|
public static function fromPrecisionMean(float $precisionMean, float $precision): self
|
2010-08-28 22:05:41 -04:00
|
|
|
{
|
|
|
|
$result = new GaussianDistribution();
|
2023-08-01 14:02:12 +00:00
|
|
|
$result->precision = $precision;
|
|
|
|
$result->precisionMean = $precisionMean;
|
2010-09-25 18:25:56 -04:00
|
|
|
|
2016-05-24 14:10:39 +02:00
|
|
|
if ($precision != 0) {
|
2023-08-01 14:02:12 +00:00
|
|
|
$result->variance = 1.0 / $precision;
|
|
|
|
$result->standardDeviation = sqrt($result->variance);
|
|
|
|
$result->mean = $result->precisionMean / $result->precision;
|
2016-05-24 14:10:39 +02:00
|
|
|
} else {
|
2023-08-01 14:02:12 +00:00
|
|
|
$result->variance = \INF;
|
|
|
|
$result->standardDeviation = \INF;
|
|
|
|
$result->mean = \NAN;
|
2010-09-25 18:25:56 -04:00
|
|
|
}
|
2022-07-05 15:55:47 +02:00
|
|
|
|
2010-08-28 22:05:41 -04:00
|
|
|
return $result;
|
|
|
|
}
|
2016-05-24 14:10:39 +02:00
|
|
|
|
2024-02-26 09:36:05 +00:00
|
|
|
/**
|
|
|
|
* For details, see http://www.tina-vision.net/tina-knoppix/tina-memo/2003-003.pdf
|
|
|
|
* for multiplication, the precision mean ones are easier to write :)
|
|
|
|
*/
|
2023-08-01 12:13:24 +00:00
|
|
|
public static function multiply(GaussianDistribution $left, GaussianDistribution $right): self
|
2010-08-28 22:05:41 -04:00
|
|
|
{
|
2023-08-01 14:02:12 +00:00
|
|
|
return GaussianDistribution::fromPrecisionMean($left->precisionMean + $right->precisionMean, $left->precision + $right->precision);
|
2010-08-28 22:05:41 -04:00
|
|
|
}
|
2016-05-24 14:10:39 +02:00
|
|
|
|
2024-02-26 09:36:05 +00:00
|
|
|
/**
|
|
|
|
* Computes the absolute difference between two Gaussians
|
|
|
|
*/
|
2023-08-01 12:13:24 +00:00
|
|
|
public static function absoluteDifference(GaussianDistribution $left, GaussianDistribution $right): float
|
2010-08-28 22:05:41 -04:00
|
|
|
{
|
|
|
|
return max(
|
2023-08-01 14:02:12 +00:00
|
|
|
abs($left->precisionMean - $right->precisionMean),
|
|
|
|
sqrt(abs($left->precision - $right->precision))
|
2016-05-24 16:31:21 +02:00
|
|
|
);
|
2010-08-28 22:05:41 -04:00
|
|
|
}
|
|
|
|
|
2024-02-26 09:36:05 +00:00
|
|
|
/**
|
|
|
|
* Computes the absolute difference between two Gaussians
|
|
|
|
*/
|
2023-08-01 12:13:24 +00:00
|
|
|
public static function subtract(GaussianDistribution $left, GaussianDistribution $right): float
|
2010-08-28 22:05:41 -04:00
|
|
|
{
|
2010-09-28 08:12:06 -04:00
|
|
|
return GaussianDistribution::absoluteDifference($left, $right);
|
2010-08-28 22:05:41 -04:00
|
|
|
}
|
2016-05-24 14:10:39 +02:00
|
|
|
|
2023-08-01 12:13:24 +00:00
|
|
|
public static function logProductNormalization(GaussianDistribution $left, GaussianDistribution $right): float
|
2010-08-28 22:05:41 -04:00
|
|
|
{
|
2023-08-01 14:02:12 +00:00
|
|
|
if (($left->precision == 0) || ($right->precision == 0)) {
|
2010-08-28 22:05:41 -04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-08-01 14:02:12 +00:00
|
|
|
$varianceSum = $left->variance + $right->variance;
|
|
|
|
$meanDifference = $left->mean - $right->mean;
|
2010-08-28 22:05:41 -04:00
|
|
|
|
2024-02-15 10:12:45 +00:00
|
|
|
return -self::M_LOG_SQRT_2_PI - (log($varianceSum) / 2.0) - (BasicMath::square($meanDifference) / (2.0 * $varianceSum));
|
2010-08-28 22:05:41 -04:00
|
|
|
}
|
2016-05-24 14:10:39 +02:00
|
|
|
|
2023-08-01 12:13:24 +00:00
|
|
|
public static function divide(GaussianDistribution $numerator, GaussianDistribution $denominator): self
|
2010-08-28 22:05:41 -04:00
|
|
|
{
|
2016-05-24 16:31:21 +02:00
|
|
|
return GaussianDistribution::fromPrecisionMean(
|
2023-08-01 14:02:12 +00:00
|
|
|
$numerator->precisionMean - $denominator->precisionMean,
|
|
|
|
$numerator->precision - $denominator->precision
|
2016-05-24 16:31:21 +02:00
|
|
|
);
|
2010-08-28 22:05:41 -04:00
|
|
|
}
|
|
|
|
|
2023-08-01 12:13:24 +00:00
|
|
|
public static function logRatioNormalization(GaussianDistribution $numerator, GaussianDistribution $denominator): float
|
2010-08-28 22:05:41 -04:00
|
|
|
{
|
2023-08-01 14:02:12 +00:00
|
|
|
if (($numerator->precision == 0) || ($denominator->precision == 0)) {
|
2010-08-28 22:05:41 -04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-08-01 14:02:12 +00:00
|
|
|
$varianceDifference = $denominator->variance - $numerator->variance;
|
|
|
|
$meanDifference = $numerator->mean - $denominator->mean;
|
2010-08-28 22:05:41 -04:00
|
|
|
|
2024-02-15 10:12:45 +00:00
|
|
|
return log($denominator->variance) + self::M_LOG_SQRT_2_PI - log($varianceDifference) / 2.0 +
|
2016-05-24 15:12:29 +02:00
|
|
|
BasicMath::square($meanDifference) / (2 * $varianceDifference);
|
2010-08-28 22:05:41 -04:00
|
|
|
}
|
2016-05-24 14:10:39 +02:00
|
|
|
|
2023-08-01 12:13:24 +00:00
|
|
|
public static function at(float $x, float $mean = 0.0, float $standardDeviation = 1.0): float
|
2010-08-28 22:05:41 -04:00
|
|
|
{
|
|
|
|
// See http://mathworld.wolfram.com/NormalDistribution.html
|
|
|
|
// 1 -(x-mean)^2 / (2*stdDev^2)
|
|
|
|
// P(x) = ------------------- * e
|
|
|
|
// stdDev * sqrt(2*pi)
|
|
|
|
|
2024-02-15 10:12:45 +00:00
|
|
|
$multiplier = 1.0 / ($standardDeviation * self::M_SQRT_2_PI);
|
2016-05-24 15:12:29 +02:00
|
|
|
$expPart = exp((-1.0 * BasicMath::square($x - $mean)) / (2 * BasicMath::square($standardDeviation)));
|
2022-07-05 15:55:47 +02:00
|
|
|
|
2022-07-05 16:21:06 +02:00
|
|
|
return $multiplier * $expPart;
|
2010-08-28 22:05:41 -04:00
|
|
|
}
|
|
|
|
|
2024-03-19 14:38:55 +00:00
|
|
|
public static function cumulativeTo(float $x): float
|
2010-08-28 22:05:41 -04:00
|
|
|
{
|
2024-02-15 10:12:45 +00:00
|
|
|
$result = GaussianDistribution::errorFunctionCumulativeTo(-M_SQRT1_2 * $x);
|
2022-07-05 15:55:47 +02:00
|
|
|
|
2016-05-24 14:10:39 +02:00
|
|
|
return 0.5 * $result;
|
2010-08-28 22:05:41 -04:00
|
|
|
}
|
2016-05-24 14:10:39 +02:00
|
|
|
|
2023-08-02 13:29:14 +00:00
|
|
|
private static function errorFunctionCumulativeTo(float $x): float
|
2010-08-28 22:05:41 -04:00
|
|
|
{
|
2022-07-05 15:33:34 +02:00
|
|
|
// Derived from page 265 of Numerical Recipes 3rd Edition
|
2010-08-28 22:05:41 -04:00
|
|
|
$z = abs($x);
|
|
|
|
|
2016-05-24 14:10:39 +02:00
|
|
|
$t = 2.0 / (2.0 + $z);
|
|
|
|
$ty = 4 * $t - 2;
|
2010-08-28 22:05:41 -04:00
|
|
|
|
2022-07-05 15:55:47 +02:00
|
|
|
$coefficients = [
|
2016-05-24 14:10:39 +02:00
|
|
|
-1.3026537197817094,
|
|
|
|
6.4196979235649026e-1,
|
|
|
|
1.9476473204185836e-2,
|
|
|
|
-9.561514786808631e-3,
|
|
|
|
-9.46595344482036e-4,
|
|
|
|
3.66839497852761e-4,
|
|
|
|
4.2523324806907e-5,
|
|
|
|
-2.0278578112534e-5,
|
|
|
|
-1.624290004647e-6,
|
|
|
|
1.303655835580e-6,
|
|
|
|
1.5626441722e-8,
|
|
|
|
-8.5238095915e-8,
|
|
|
|
6.529054439e-9,
|
|
|
|
5.059343495e-9,
|
|
|
|
-9.91364156e-10,
|
|
|
|
-2.27365122e-10,
|
|
|
|
9.6467911e-11,
|
|
|
|
2.394038e-12,
|
|
|
|
-6.886027e-12,
|
|
|
|
8.94487e-13,
|
|
|
|
3.13092e-13,
|
|
|
|
-1.12708e-13,
|
|
|
|
3.81e-16,
|
|
|
|
7.106e-15,
|
|
|
|
-1.523e-15,
|
|
|
|
-9.4e-17,
|
|
|
|
1.21e-16,
|
2024-02-02 15:16:11 +00:00
|
|
|
-2.8e-17,
|
|
|
|
];
|
2010-08-28 22:05:41 -04:00
|
|
|
|
|
|
|
$ncof = count($coefficients);
|
|
|
|
$d = 0.0;
|
|
|
|
$dd = 0.0;
|
|
|
|
|
2024-02-21 13:48:37 +00:00
|
|
|
for ($j = $ncof - 1; $j > 0; --$j) {
|
2010-08-28 22:05:41 -04:00
|
|
|
$tmp = $d;
|
2016-05-24 14:10:39 +02:00
|
|
|
$d = $ty * $d - $dd + $coefficients[$j];
|
2010-08-28 22:05:41 -04:00
|
|
|
$dd = $tmp;
|
|
|
|
}
|
|
|
|
|
2016-05-24 14:10:39 +02:00
|
|
|
$ans = $t * exp(-$z * $z + 0.5 * ($coefficients[0] + $ty * $d) - $dd);
|
2022-07-05 15:55:47 +02:00
|
|
|
|
2010-08-28 22:05:41 -04:00
|
|
|
return ($x >= 0.0) ? $ans : (2.0 - $ans);
|
|
|
|
}
|
2016-05-24 14:10:39 +02:00
|
|
|
|
2023-08-01 12:13:24 +00:00
|
|
|
private static function inverseErrorFunctionCumulativeTo(float $p): float
|
2010-08-28 22:05:41 -04:00
|
|
|
{
|
2022-07-05 15:33:34 +02:00
|
|
|
// From page 265 of numerical recipes
|
2016-05-24 14:10:39 +02:00
|
|
|
if ($p >= 2.0) {
|
2010-08-28 22:05:41 -04:00
|
|
|
return -100;
|
|
|
|
}
|
2024-02-21 13:48:37 +00:00
|
|
|
|
2016-05-24 14:10:39 +02:00
|
|
|
if ($p <= 0.0) {
|
2010-08-28 22:05:41 -04:00
|
|
|
return 100;
|
|
|
|
}
|
|
|
|
|
|
|
|
$pp = ($p < 1.0) ? $p : 2 - $p;
|
2016-05-24 14:10:39 +02:00
|
|
|
$t = sqrt(-2 * log($pp / 2.0)); // Initial guess
|
2024-02-15 10:12:45 +00:00
|
|
|
$x = -M_SQRT1_2 * ((2.30753 + $t * 0.27061) / (1.0 + $t * (0.99229 + $t * 0.04481)) - $t);
|
2010-08-28 22:05:41 -04:00
|
|
|
|
2024-02-21 13:48:37 +00:00
|
|
|
for ($j = 0; $j < 2; ++$j) {
|
2010-08-28 22:05:41 -04:00
|
|
|
$err = GaussianDistribution::errorFunctionCumulativeTo($x) - $pp;
|
2024-02-15 10:12:45 +00:00
|
|
|
$x += $err / (M_2_SQRTPI * exp(-BasicMath::square($x)) - $x * $err); // Halley
|
2010-08-28 22:05:41 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
return ($p < 1.0) ? $x : -$x;
|
|
|
|
}
|
|
|
|
|
2023-08-01 12:13:24 +00:00
|
|
|
public static function inverseCumulativeTo(float $x, float $mean = 0.0, float $standardDeviation = 1.0): float
|
2010-08-28 22:05:41 -04:00
|
|
|
{
|
|
|
|
// From numerical recipes, page 320
|
2024-02-15 10:12:45 +00:00
|
|
|
return $mean - M_SQRT2 * $standardDeviation * GaussianDistribution::inverseErrorFunctionCumulativeTo(2 * $x);
|
2010-08-28 22:05:41 -04:00
|
|
|
}
|
2022-07-05 15:55:47 +02:00
|
|
|
}
|