SST  15.1.0
StructuralSimulationToolkit
stathistogram.h
1 // Copyright 2009-2025 NTESS. Under the terms
2 // of Contract DE-NA0003525 with NTESS, the U.S.
3 // Government retains certain rights in this software.
4 //
5 // Copyright (c) 2009-2025, NTESS
6 // All rights reserved.
7 //
8 // This file is part of the SST software package. For license
9 // information, see the LICENSE file in the top level directory of the
10 // distribution.
11 
12 #ifndef SST_CORE_STATAPI_STATHISTOGRAM_H
13 #define SST_CORE_STATAPI_STATHISTOGRAM_H
14 
15 #include "sst/core/sst_types.h"
16 #include "sst/core/statapi/statbase.h"
17 #include "sst/core/statapi/statoutput.h"
18 #include "sst/core/warnmacros.h"
19 
20 namespace SST::Statistics {
21 
22 // NOTE: When calling base class members in classes derived from
23 // a templated base class. The user must use "this->" in
24 // order to call base class members (to avoid a compiler
25 // error) because they are "nondependant named" and the
26 // templated base class is a "dependant named". The
27 // compiler will not look in dependant named base classes
28 // when looking up independent names.
29 // See: http://www.parashift.com/c++-faq-lite/nondependent-name-lookup-members.html
30 
31 /**
32  \class HistogramStatistic
33  Holder of data grouped into pre-determined width bins.
34  \tparam BinDataType is the type of the data held in each bin (i.e. what data type described the width of the bin)
35 */
36 #define CountType uint64_t
37 #define NumBinsType uint32_t
38 
39 template <class BinDataType>
40 class HistogramStatistic : public Statistic<BinDataType>
41 {
42 public:
43  SST_ELI_DECLARE_STATISTIC_TEMPLATE_DERIVED(
45  BinDataType,
46  "sst",
47  "HistogramStatistic",
48  SST_ELI_ELEMENT_VERSION(1, 0, 0),
49  "Track distribution of statistic across bins",
50  "SST::Statistic<T>")
51 
52  SST_ELI_DOCUMENT_PARAMS(
53  {"minvalue", "The minimum data value to include in the histogram.", "0"},
54  {"binwidth", "The size of each histogram bin.", "5000"},
55  {"numbins", "The number of histogram bins.", "100"},
56  {"dumpbinsonoutput", "Whether to output the data range of each bin as well as its value.", "true"},
57  {"includeoutofbounds", "Whether to keep track of data that falls below or above the histogram bins in separate out-of-bounds bins.", "true"})
58 
59 
61  BaseComponent* comp, const std::string& statName, const std::string& statSubId, Params& statParams) :
62  Statistic<BinDataType>(comp, statName, statSubId, statParams)
63  {
64  // Identify what keys are Allowed in the parameters
65  std::vector<std::string> allowedKeySet;
66  allowedKeySet.push_back("minvalue");
67  allowedKeySet.push_back("binwidth");
68  allowedKeySet.push_back("numbins");
69  allowedKeySet.push_back("dumpbinsonoutput");
70  allowedKeySet.push_back("includeoutofbounds");
71  statParams.pushAllowedKeys(allowedKeySet);
72 
73  // Process the Parameters
74  m_minValue = statParams.find<BinDataType>("minvalue", 0);
75  m_binWidth = statParams.find<NumBinsType>("binwidth", 5000);
76  m_numBins = statParams.find<NumBinsType>("numbins", 100);
77  m_dumpBinsOnOutput = statParams.find<bool>("dumpbinsonoutput", true);
78  m_includeOutOfBounds = statParams.find<bool>("includeoutofbounds", true);
79 
80  // Initialize other properties
81  m_totalSummed = 0;
82  m_totalSummedSqr = 0;
83  m_OOBMinCount = 0;
84  m_OOBMaxCount = 0;
85  m_itemsBinnedCount = 0;
86  this->setCollectionCount(0);
87  }
88 
89  ~HistogramStatistic() {}
90 
93  {} // For serialization ONLY
94 
95  virtual const std::string& getStatTypeName() const override { return stat_type_; }
96 
98  {
100  SST_SER(m_minValue);
101  SST_SER(m_binWidth);
102  SST_SER(m_numBins);
103  SST_SER(m_OOBMinCount);
104  SST_SER(m_OOBMaxCount);
105  SST_SER(m_itemsBinnedCount);
106  SST_SER(m_totalSummed);
107  SST_SER(m_totalSummedSqr);
108  SST_SER(m_binsMap);
109  SST_SER(m_dumpBinsOnOutput);
110  SST_SER(m_includeOutOfBounds);
111  // SST_SER(m_Fields); // Rebuilt by stat output object
112  }
113 
114 protected:
115  /**
116  Adds a new value to the histogram. The correct bin is identified and then incremented. If no bin can be found
117  to hold the value then a new bin is created.
118  */
119  void addData_impl_Ntimes(uint64_t N, BinDataType value) override
120  {
121  // Check to see if the value is above or below the min/max values
122  if ( value < getBinsMinValue() ) {
123  m_OOBMinCount += N;
124  return;
125  }
126  if ( value > getBinsMaxValue() ) {
127  m_OOBMaxCount += N;
128  return;
129  }
130 
131  // This value is to be binned...
132  // Add the "in limits" value to the total summation's
133  m_totalSummed += N * value;
134  m_totalSummedSqr += N * (value * value);
135 
136  // Increment the Binned count (note this <= to the Statistics added Item Count)
137  m_itemsBinnedCount++;
138 
139  // Figure out what the starting bin is and find it in the map
140  // To support signed and unsigned values along with floating point types,
141  // the calculation to find the bin_start value must be done in floating point
142  // then converted to BinDataType
143  double calc1 = (double)value / (double)m_binWidth;
144  double calc2 = floor(calc1); // Find the floor of the value
145  double calc3 = m_binWidth * calc2;
146  BinDataType bin_start = (BinDataType)calc3;
147  // printf("DEBUG: value = %d, junk1 = %f, calc2 = %f, calc3 = %f : bin_start = %d, item count = %ld, \n",
148  // value, calc1, calc2, calc3, bin_start, getStatCollectionCount());
149 
150  HistoMapItr_t bin_itr = m_binsMap.find(bin_start);
151 
152  // Was the bin found?
153  if ( bin_itr == m_binsMap.end() ) {
154  // No, Create the bin and set a value of 1 to it
155  m_binsMap.insert(std::pair<BinDataType, CountType>(bin_start, (CountType)N));
156  }
157  else {
158  // Yes, Increment the specific bin's count
159  bin_itr->second += N;
160  }
161  }
162 
163  void addData_impl(BinDataType value) override { addData_impl_Ntimes(1, value); }
164 
165 private:
166  /** Count how many bins are active in this histogram */
167  NumBinsType getActiveBinCount() { return m_binsMap.size(); }
168 
169  /** Count how many bins are available */
170  NumBinsType getNumBins() { return m_numBins; }
171 
172  /** Get the width of a bin in this histogram */
173  NumBinsType getBinWidth() { return m_binWidth; }
174 
175  /**
176  Get the count of items in the bin by the start value (e.g. give me the count of items in the bin which begins at
177  value X). \return The count of items in the bin else 0.
178  */
179  CountType getBinCountByBinStart(BinDataType binStartValue)
180  {
181  // Find the Bin Start Value in the Bin Map
182  HistoMapItr_t bin_itr = m_binsMap.find(binStartValue);
183 
184  // Check to see if the Start Value was found
185  if ( bin_itr == m_binsMap.end() ) {
186  // No, return no count for this bin
187  return (CountType)0;
188  }
189  else {
190  // Yes, return the bin count
191  return m_binsMap[binStartValue];
192  }
193  }
194 
195  /**
196  Get the smallest start value of a bin in this histogram (i.e. the minimum value possibly represented by this
197  histogram)
198  */
199  BinDataType getBinsMinValue() { return m_minValue; }
200 
201  /**
202  Get the largest possible value represented by this histogram (i.e. the highest value in any of items bins
203  rounded above to the size of the bin)
204  */
205  BinDataType getBinsMaxValue()
206  {
207  // Compute the max value based on the width * num bins offset by minvalue
208  return (m_binWidth * m_numBins) + m_minValue - 1;
209  }
210 
211  /**
212  Get the total number of items collected by the statistic
213  \return The number of items that have been added to the statistic
214  */
215  uint64_t getStatCollectionCount()
216  {
217  // Get the number of items added (but not necessarily binned) to this statistic
218  return this->getCollectionCount();
219  }
220 
221  /**
222  Get the total number of items contained in all bins
223  \return The number of items contained in all bins
224  */
225  CountType getItemsBinnedCount()
226  {
227  // Get the number of items added to this statistic that were binned.
228  return m_itemsBinnedCount;
229  }
230 
231  /**
232  Sum up every item presented for storage in the histogram
233  \return The sum of all values added into the histogram
234  */
235  BinDataType getValuesSummed() { return m_totalSummed; }
236 
237  /**
238  Sum up every squared value entered into the Histogram.
239  \return The sum of all values added after squaring into the Histogram
240  */
241  BinDataType getValuesSquaredSummed() { return m_totalSummedSqr; }
242 
243  void clearStatisticData() override
244  {
245  m_totalSummed = 0;
246  m_totalSummedSqr = 0;
247  m_OOBMinCount = 0;
248  m_OOBMaxCount = 0;
249  m_itemsBinnedCount = 0;
250  m_binsMap.clear();
251  this->setCollectionCount(0);
252  }
253 
254  void registerOutputFields(StatisticFieldsOutput* statOutput) override
255  {
256  // Check to see if we have registered the Startup Fields
257  m_Fields.push_back(statOutput->registerField<BinDataType>("BinsMinValue"));
258  m_Fields.push_back(statOutput->registerField<BinDataType>("BinsMaxValue"));
259  m_Fields.push_back(statOutput->registerField<NumBinsType>("BinWidth"));
260  m_Fields.push_back(statOutput->registerField<NumBinsType>("TotalNumBins"));
261  m_Fields.push_back(statOutput->registerField<BinDataType>("Sum"));
262  m_Fields.push_back(statOutput->registerField<BinDataType>("SumSQ"));
263  m_Fields.push_back(statOutput->registerField<NumBinsType>("NumActiveBins"));
264  m_Fields.push_back(statOutput->registerField<CountType>("NumItemsCollected"));
265  m_Fields.push_back(statOutput->registerField<CountType>("NumItemsBinned"));
266 
267  if ( true == m_includeOutOfBounds ) {
268  m_Fields.push_back(statOutput->registerField<CountType>("NumOutOfBounds-MinValue"));
269  m_Fields.push_back(statOutput->registerField<CountType>("NumOutOfBounds-MaxValue"));
270  }
271 
272  // Do we also need to dump the bin counts on output
273  if ( true == m_dumpBinsOnOutput ) {
274  BinDataType binLL;
275  BinDataType binUL;
276 
277  for ( uint32_t y = 0; y < getNumBins(); y++ ) {
278  // Figure out the upper and lower values for this bin
279  binLL = (y * (uint64_t)getBinWidth()) + getBinsMinValue(); // Force full 64-bit multiply -mpf 10/8/15
280  binUL = binLL + getBinWidth() - 1;
281  // Build the string name for this bin and add it as a field
282  std::stringstream ss;
283  ss << "Bin" << y << ":" << binLL << "-" << binUL;
284  m_Fields.push_back(statOutput->registerField<CountType>(ss.str().c_str()));
285  }
286  }
287  }
288 
289  void outputStatisticFields(StatisticFieldsOutput* statOutput, bool UNUSED(EndOfSimFlag)) override
290  {
291  StatisticOutput::fieldHandle_t x = 0;
292  statOutput->outputField(m_Fields[x++], getBinsMinValue());
293  statOutput->outputField(m_Fields[x++], getBinsMaxValue());
294  statOutput->outputField(m_Fields[x++], getBinWidth());
295  statOutput->outputField(m_Fields[x++], getNumBins());
296  statOutput->outputField(m_Fields[x++], getValuesSummed());
297  statOutput->outputField(m_Fields[x++], getValuesSquaredSummed());
298  statOutput->outputField(m_Fields[x++], getActiveBinCount());
299  statOutput->outputField(m_Fields[x++], getStatCollectionCount());
300  statOutput->outputField(m_Fields[x++], getItemsBinnedCount());
301 
302  if ( true == m_includeOutOfBounds ) {
303  statOutput->outputField(m_Fields[x++], m_OOBMinCount);
304  statOutput->outputField(m_Fields[x++], m_OOBMaxCount);
305  }
306 
307  // Do we also need to dump the bin counts on output
308  if ( true == m_dumpBinsOnOutput ) {
309  BinDataType currentBinValue = getBinsMinValue();
310  for ( uint32_t y = 0; y < getNumBins(); y++ ) {
311  statOutput->outputField(m_Fields[x++], getBinCountByBinStart(currentBinValue));
312  // Increment the currentBinValue to get the next bin
313  currentBinValue += getBinWidth();
314  }
315  }
316  }
317 
318 private:
319  // Bin Map Definition
320  using HistoMap_t = std::map<BinDataType, CountType>;
321 
322  // Iterator over the histogram bins
323  using HistoMapItr_t = typename HistoMap_t::iterator;
324 
325  // The minimum value in the Histogram
326  BinDataType m_minValue;
327 
328  // The width of each Histogram bin
329  NumBinsType m_binWidth;
330 
331  // The number of bins to be supported
332  NumBinsType m_numBins;
333 
334  // Out of bounds bins
335  CountType m_OOBMinCount;
336  CountType m_OOBMaxCount;
337 
338  // Count of Items that have binned, (Different than item count as some
339  // items may be out of bounds and not binned)
340  CountType m_itemsBinnedCount;
341 
342  // The sum of all values added into the Histogram, this is calculated and the sum of all values presented
343  // to be entered into the Histogram not with bin-width multiplied by the (max-min)/2 of the bin.
344  BinDataType m_totalSummed;
345 
346  // The sum of values added to the Histogram squared. Allows calculation of derivative statistic
347  // values such as variance.
348  BinDataType m_totalSummedSqr;
349 
350  // A map of the the bin starts to the bin counts
351  HistoMap_t m_binsMap;
352 
353  // Support
354  std::vector<StatisticOutput::fieldHandle_t> m_Fields;
355  bool m_dumpBinsOnOutput;
356  bool m_includeOutOfBounds;
357 
358  inline static const std::string stat_type_ = "Histogram";
359 };
360 
361 } // namespace SST::Statistics
362 
363 #endif // SST_CORE_STATAPI_STATHISTOGRAM_H
This class is basically a wrapper for objects to declare the order in which their members should be s...
Definition: serializer.h:42
virtual void setCollectionCount(uint64_t new_count)
Set the current collection count to a defined value.
Definition: statbase.cc:92
virtual const std::string & getStatTypeName() const
Return the Statistic type name.
Definition: statbase.h:123
Holder of data grouped into pre-determined width bins.
Definition: stathistogram.h:40
Forms the template defined base class for statistics gathering within SST.
Definition: elementinfo.h:46
fieldHandle_t registerField(const char *fieldName)
Register a field to be output (templated function)
Definition: statoutput.h:267
virtual void outputField(fieldHandle_t fieldHandle, int32_t data)
Output field data.
void pushAllowedKeys(const std::vector< std::string > &keys)
Definition: params.cc:242
Definition: statoutput.h:170
Main component object for the simulation.
Definition: baseComponent.h:64
std::enable_if_t<!std::is_same_v< std::string, T >, T > find(const std::string &k, T default_value, bool &found) const
Find a Parameter value in the set, and return its value as a type T.
Definition: params.h:333
virtual void clearStatisticData()
Inform the Statistic to clear its data.
Definition: statbase.h:61
Parameter store.
Definition: params.h:63
Definition: elementinfo.h:44
virtual void serialize_order(SST::Core::Serialization::serializer &ser) override
Definition: statbase.h:415
uint64_t getCollectionCount() const
Return the current collection count.
Definition: statbase.h:172