SST  14.1.0
StructuralSimulationToolkit
realtime.h
1 // Copyright 2009-2024 NTESS. Under the terms
2 // of Contract DE-NA0003525 with NTESS, the U.S.
3 // Government retains certain rights in this software.
4 //
5 // Copyright (c) 2009-2024, NTESS
6 // All rights reserved.
7 //
8 // This file is part of the SST software package. For license
9 // information, see the LICENSE file in the top level directory of the
10 // distribution.
11 
12 #ifndef SST_CORE_REAL_TIME_ALARM_MANAGER_H
13 #define SST_CORE_REAL_TIME_ALARM_MANAGER_H
14 
15 #include "sst/core/realtimeAction.h"
16 #include "sst/core/serialization/serializable.h"
17 #include "sst/core/sst_types.h"
18 #include "sst/core/threadsafe.h"
19 #include "sst/core/warnmacros.h"
20 
21 #include <map>
22 #include <set>
23 #include <signal.h>
24 #include <time.h>
25 #include <vector>
26 
27 namespace SST {
28 
29 class Output;
30 class RankInfo;
31 class UnitAlgebra;
32 
33 /* Action cleanly exit simulation */
35 {
36 public:
37  SST_ELI_REGISTER_REALTIMEACTION(
38  ExitCleanRealTimeAction, "sst", "rt.exit.clean", SST_ELI_ELEMENT_VERSION(0, 1, 0),
39  "Signal handler that causes an immediate, but non-emergency shutdown. This is the default action for the "
40  "'--exit-after' option.")
41 
43  virtual void execute() override;
44  virtual void begin(time_t scheduled_time) override;
45 };
46 
47 /* Action to immediately exit simulation */
49 {
50 public:
51  SST_ELI_REGISTER_REALTIMEACTION(
52  ExitEmergencyRealTimeAction, "sst", "rt.exit.emergency", SST_ELI_ELEMENT_VERSION(0, 1, 0),
53  "Signal handler that causes an emergency shutdown. This is the default action for SIGTERM and SIGINT.")
54 
56  virtual void execute() override;
57 };
58 
59 /* Action to output core status */
61 {
62 public:
63  SST_ELI_REGISTER_REALTIMEACTION(
64  CoreStatusRealTimeAction, "sst", "rt.status.core", SST_ELI_ELEMENT_VERSION(0, 1, 0),
65  "Signal handler that causes SST-Core to print its status. This is the default action for SIGUSR1.")
66 
68  void execute() override;
69 };
70 
71 /* Action to output component status */
73 {
74 public:
75  SST_ELI_REGISTER_REALTIMEACTION(
76  ComponentStatusRealTimeAction, "sst", "rt.status.all", SST_ELI_ELEMENT_VERSION(0, 1, 0),
77  "Signal handler that causes SST-Core to print its status along with component status. This is the default "
78  "action for SIGUSR2.")
79 
81  void execute() override;
82 };
83 
84 /* Action to trigger a checkpoint on a time interval */
86 {
87 public:
88  SST_ELI_REGISTER_REALTIMEACTION(
89  CheckpointRealTimeAction, "sst", "rt.checkpoint", SST_ELI_ELEMENT_VERSION(0, 1, 0),
90  "Signal handler that causes SST to generate a checkpoint. This is the default action for the "
91  "'--checkpoint-wall-period' option.")
92 
94  virtual void execute() override;
95  virtual void begin(time_t scheduled_time) override;
96 
97  bool canInitiateCheckpoint() override { return true; }
98 };
99 
100 /* Action to generate a heartbeat message */
102 {
103 public:
104  SST_ELI_REGISTER_REALTIMEACTION(
105  HeartbeatRealTimeAction, "sst", "rt.heartbeat", SST_ELI_ELEMENT_VERSION(0, 1, 0),
106  "Signal handler that causes SST to generate a heartbeat message (status and some resource usage information). "
107  "This is the default action for the '--heartbeat-wall-period' option.")
108 
110  virtual void execute() override;
111  virtual void begin(time_t scheduled_time) override;
112 
113 private:
114  double last_time_;
115  static std::atomic<uint64_t> thr_max_tv_depth_;
116  static Core::ThreadSafe::Barrier exchange_barrier_;
117 };
118 
119 /* Wrapper for RealTimeActions that occur on a time interval */
121 {
122 public:
123  RealTimeIntervalAction(uint32_t interval, RealTimeAction* action);
124 
125  void begin(time_t begin_time);
126 
127  void execute(uint32_t elapsed);
128  uint32_t getNextAlarmTime() const;
129 
130 private:
131  uint32_t alarm_interval_; /* Interval to trigger alarm at (seconds) */
132  uint32_t next_alarm_time_; /* Next time an alarm should be triggered for this alarm */
133  RealTimeAction* action_; /* Action to take on when alarm triggers */
134 };
135 
136 /* This class manages alarms but does not do any actions itself
137  * All times are stored in seconds
138  */
140 {
141 public:
143  void execute() override;
144  void addIntervalAction(uint32_t interval, RealTimeAction* action);
145  virtual void begin(time_t scheduled_time) override; // Start alarms
146 private:
147  std::vector<RealTimeIntervalAction> interval_actions_;
148  bool alarm_manager_; /* The instance on thread 0/rank 0 is the manager */
149  bool rank_leader_; /* The instance on thread 0 of each rank participates in MPI exchanges */
150  time_t last_time_; /* Last time a SIGALRM was received */
151  static uint32_t elapsed_; /* A static so that each threads' instance of this class share the same one */
152  static Core::ThreadSafe::Barrier exchange_barrier_;
153 };
154 
155 /** Class to manage real-time events (signals and alarms) */
157 {
158 public:
159  RealTimeManager(RankInfo num_ranks);
160  RealTimeManager();
161 
162  /** Register actions */
163  void registerSignal(RealTimeAction* action, int signum);
164  void registerInterval(uint32_t interval, RealTimeAction* action);
165 
166  /** Begin monitoring signals */
167  void begin();
168 
169  /** Simulation run loop calls this when a signal has been received
170  * from the OS. One or more of the sig_X_from_os_ vars will be non-zero.
171  *
172  * Serial - this executes the relevant signal handler(s)
173  * Parallel - this saves the signals until the next global sync
174  */
175  void notifySignal();
176 
177  /** This is a request to execute the handler in response to a particular signal */
178  void performSignal(int signum);
179 
180  /* OS signal handling */
181  static void installSignalHandlers();
182  static void SimulationSigEndHandler(int sig);
183  static void SimulationSigUsrHandler(int sig);
184  static void SimulationSigAlrmHandler(int sig);
185 
186  /* SyncManager request to get signals. Also clears local signals */
187  bool getSignals(int& sig_end, int& sig_usr, int& sig_alrm);
188 
189  /**
190  Check whether or not any of the Actions registered with the
191  manager can initiate a checkpoint.
192  */
193  bool canInitiateCheckpoint() { return can_checkpoint_; }
194 
195  void serialize_order(SST::Core::Serialization::serializer& ser) override;
196  ImplementSerializable(SST::RealTimeManager)
197 
198 private:
199  bool serial_exec_; // Whether execution is serial or parallel
200  bool can_checkpoint_ = false; // Set to true if any Actions can trigger checkpoint
201 
202  /* The set of signal handlers for all signals */
203  std::map<int, RealTimeAction*> signal_actions_;
204 
205  static sig_atomic_t sig_alrm_from_os_;
206  static sig_atomic_t sig_usr_from_os_;
207  static sig_atomic_t sig_end_from_os_;
208 
209  int sig_alrm_;
210  int sig_usr_;
211  int sig_end_;
212 };
213 
214 } // namespace SST
215 #endif /* SST_CORE_REAL_TIME_ALARM_MANAGER_H */
bool canInitiateCheckpoint()
Check whether or not any of the Actions registered with the manager can initiate a checkpoint...
Definition: realtime.h:193
This class is basically a wrapper for objects to declare the order in which their members should be s...
Definition: serializer.h:43
Definition: realtime.h:139
Definition: realtime.h:34
Definition: realtime.h:85
Definition: action.cc:18
void notifySignal()
Simulation run loop calls this when a signal has been received from the OS.
Definition: realtime.cc:575
Definition: serializable.h:24
An event to trigger at a real-time interval.
Definition: realtimeAction.h:28
Definition: realtime.h:60
Definition: rankInfo.h:21
void begin()
Begin monitoring signals.
Definition: realtime.cc:564
Definition: realtime.h:120
Definition: realtime.h:72
bool canInitiateCheckpoint() override
Let&#39;s the core know if this action may trigger a checkpoint so that all the checkpoint infrastructure...
Definition: realtime.h:97
void registerSignal(RealTimeAction *action, int signum)
Register actions.
Definition: realtime.cc:545
Definition: realtime.h:101
Class to manage real-time events (signals and alarms)
Definition: realtime.h:156
Definition: realtime.h:48
void performSignal(int signum)
This is a request to execute the handler in response to a particular signal.
Definition: realtime.cc:629
Definition: threadsafe.h:47