Histogram with standard score normalization

This commit is contained in:
Sebastian Piwell
2016-06-15 10:32:59 -04:00
parent 294773b2cb
commit 78b827d9d6
3 changed files with 60 additions and 40 deletions

View File

@@ -48,6 +48,7 @@ DataProcessor::DataProcessor()
,_useHistogram(false)
,_normValues(glm::vec2(1.0))
,_filterValues(glm::vec2(0.0))
,_histNormValues(glm::vec2(10.f, 10.f))
{
_coordinateVariables = {"x", "y", "z", "phi", "theta"};
}
@@ -90,20 +91,21 @@ float DataProcessor::processDataPoint(float value, int option){
float mean = (1.0 / _numValues[option]) * _sum[option];
float sd = _standardDeviation[option];
float v;
if(_useHistogram){
sd = histogram->equalize(sd);
mean = histogram->equalize(mean);
value = histogram->equalize(value);
v = histogram->equalize(normalizeWithStandardScore(value, mean, sd, _histNormValues))/(float)512;
}else{
v = normalizeWithStandardScore(value, mean, sd, _normValues);
}
float v = normalizeWithStandardScore(value, mean, sd);
// float v = normalizeWithStandardScore(value, mean, sd, _normValues);
return v;
}
float DataProcessor::normalizeWithStandardScore(float value, float mean, float sd){
float DataProcessor::normalizeWithStandardScore(float value, float mean, float sd, glm::vec2 normalizationValues){
float zScoreMin = _normValues.x;
float zScoreMax = _normValues.y;
float zScoreMin = normalizationValues.x; //10.0f;//_normValues.x;
float zScoreMax = normalizationValues.y; //10.0f;//_normValues.y;
float standardScore = ( value - mean ) / sd;
// Clamp intresting values
standardScore = glm::clamp(standardScore, -zScoreMin, zScoreMax);
@@ -130,22 +132,24 @@ void DataProcessor::calculateFilterValues(std::vector<int> selectedOptions){
if(!_histograms.empty()){
for(int option : selectedOptions){
histogram = _histograms[option];
mean = (1.0/_numValues[option])*_sum[option];
standardDeviation = _standardDeviation[option];
filterMid = histogram->highestBinValue(_useHistogram);
filterWidth = mean+histogram->binWidth();
if(_useHistogram){
standardDeviation = histogram->equalize(standardDeviation);
mean = histogram->equalize(mean);
filterWidth = mean+1;
if(!_useHistogram){
mean = (1.0/_numValues[option])*_sum[option];
standardDeviation = _standardDeviation[option];
histogram = _histograms[option];
filterMid = histogram->highestBinValue(_useHistogram);
filterWidth = mean+histogram->binWidth();
filterMid = normalizeWithStandardScore(filterMid, mean, standardDeviation, _normValues);
filterWidth = fabs(0.5-normalizeWithStandardScore(filterWidth, mean, standardDeviation, _normValues));
}else{
Histogram hist = _histograms[option]->equalize();
filterMid = hist.highestBinValue(true);
std::cout << filterMid << std::endl;
filterWidth = 1.f/512.f;
}
filterMid = normalizeWithStandardScore(filterMid, mean, standardDeviation);
filterWidth = fabs(0.5-normalizeWithStandardScore(filterWidth, mean, standardDeviation));
_filterValues += glm::vec2(filterMid, filterWidth);
_filterValues += glm::vec2(filterMid, filterWidth);
}
_filterValues /= numSelected;
@@ -158,12 +162,6 @@ void DataProcessor::add(std::vector<std::vector<float>>& optionValues, std::vect
float mean, value, variance, standardDeviation;
for(int i=0; i<numOptions; i++){
if(!_histograms[i]){
_histograms[i] = std::make_shared<Histogram>(_min[i], _max[i], 512);
}
else{
_histograms[i]->changeRange(_min[i], _max[i]);
}
std::vector<float> values = optionValues[i];
numValues = values.size();
@@ -174,7 +172,6 @@ void DataProcessor::add(std::vector<std::vector<float>>& optionValues, std::vect
for(int j=0; j<numValues; j++){
value = values[j];
variance += pow(value-mean, 2);
_histograms[i]->add(value, 1);
}
standardDeviation = sqrt(variance/ numValues);
@@ -182,9 +179,30 @@ void DataProcessor::add(std::vector<std::vector<float>>& optionValues, std::vect
_sum[i] += sum[i];
_standardDeviation[i] = sqrt(pow(standardDeviation, 2) + pow(_standardDeviation[i], 2));
_numValues[i] += numValues;
float min = normalizeWithStandardScore(_min[i], mean, _standardDeviation[i], _histNormValues);
float max = normalizeWithStandardScore(_max[i], mean, _standardDeviation[i], _histNormValues);
if(!_histograms[i]){
_histograms[i] = std::make_shared<Histogram>(min, max, 512);
}
else{
_histograms[i]->changeRange(min, max);
}
for(int j=0; j<numValues; j++){
value = values[j];
_histograms[i]->add(normalizeWithStandardScore(value, mean, _standardDeviation[i], _histNormValues), 1);
}
_histograms[i]->generateEqualizer();
// _histograms[i]->print();
std::cout << std::endl;
_histograms[i]->print();
std::cout << std::endl;
std::cout << "Eq: ";
Histogram hist = _histograms[i]->equalize();
hist.print();
}
}

View File

@@ -53,7 +53,7 @@ public:
void clear();
protected:
float processDataPoint(float value, int option);
float normalizeWithStandardScore(float value, float mean, float sd);
float normalizeWithStandardScore(float value, float mean, float sd, glm::vec2 normalizationValues = glm::vec2(1.0f, 1.0f));
void initializeVectors(int numOptions);
void calculateFilterValues(std::vector<int> selectedOptions);
@@ -72,6 +72,8 @@ protected:
std::vector<float> _numValues;
std::vector<std::shared_ptr<Histogram>> _histograms;
std::set<std::string> _coordinateVariables;
glm::vec2 _histNormValues;
};
} // namespace openspace

View File

@@ -296,14 +296,16 @@ float Histogram::entropy(){
}
void Histogram::print() const {
std::cout << "number of bins: " << _numBins << std::endl
<< "range: " << _minValue << " - " << _maxValue << std::endl << std::endl;
// std::cout << "number of bins: " << _numBins << std::endl
// << "range: " << _minValue << " - " << _maxValue << std::endl << std::endl;
for (int i = 0; i < _numBins; i++) {
float low = _minValue + float(i) / _numBins * (_maxValue - _minValue);
float high = low + (_maxValue - _minValue) / float(_numBins);
std::cout << i << " [" << low << ", " << high << "]"
<< " " << _data[i] << std::endl;
// std::cout << i << " [" << low << ", " << high << "]"
// << " " << _data[i] << std::endl;
std::cout << _data[i]/(float)_numValues << ", ";
}
std::cout << std::endl;
// std::cout << std::endl << std::endl << std::endl<< "==============" << std::endl;
}
@@ -332,18 +334,16 @@ float Histogram::highestBinValue(bool equalized, int overBins){
highestBin = i;
highestValue = value;
}
// if(_data[i] > _data[highestBin])
// highestBin = i;
}
float low = _minValue + float(highestBin) / _numBins * (_maxValue - _minValue);
float high = low + (_maxValue - _minValue) / float(_numBins);
if(!equalized){
float low = _minValue + float(highestBin) / _numBins * (_maxValue - _minValue);
float high = low + (_maxValue - _minValue) / float(_numBins);
return (high+low)/2.0;
}else{
return equalize((high+low)/2.0);
return highestBin/(float)_numBins;
// return equalize((high+low)/2.0);
}
}