SST  15.1.0
StructuralSimulationToolkit
realtime.h
1 // Copyright 2009-2025 NTESS. Under the terms
2 // of Contract DE-NA0003525 with NTESS, the U.S.
3 // Government retains certain rights in this software.
4 //
5 // Copyright (c) 2009-2025, NTESS
6 // All rights reserved.
7 //
8 // This file is part of the SST software package. For license
9 // information, see the LICENSE file in the top level directory of the
10 // distribution.
11 
12 #ifndef SST_CORE_REAL_TIME_ALARM_MANAGER_H
13 #define SST_CORE_REAL_TIME_ALARM_MANAGER_H
14 
15 #include "sst/core/realtimeAction.h"
16 #include "sst/core/serialization/serializable.h"
17 #include "sst/core/sst_types.h"
18 #include "sst/core/threadsafe.h"
19 #include "sst/core/warnmacros.h"
20 
21 #include <atomic>
22 #include <cstdint>
23 #include <ctime>
24 #include <map>
25 #include <set>
26 #include <signal.h>
27 #include <time.h>
28 #include <vector>
29 
30 namespace SST {
31 
32 class Output;
33 class RankInfo;
34 class UnitAlgebra;
35 
36 /* Action cleanly exit simulation */
38 {
39 public:
40  SST_ELI_REGISTER_REALTIMEACTION(ExitCleanRealTimeAction, "sst", "rt.exit.clean", SST_ELI_ELEMENT_VERSION(0, 1, 0),
41  "Signal handler that causes an immediate, but non-emergency shutdown. This is the default action for the "
42  "'--exit-after' option.")
43 
45  virtual void execute() override;
46  virtual void begin(time_t scheduled_time) override;
47 };
48 
49 /* Action to immediately exit simulation */
51 {
52 public:
53  SST_ELI_REGISTER_REALTIMEACTION(ExitEmergencyRealTimeAction, "sst", "rt.exit.emergency",
54  SST_ELI_ELEMENT_VERSION(0, 1, 0),
55  "Signal handler that causes an emergency shutdown. This is the default action for SIGTERM and SIGINT.")
56 
58  virtual void execute() override;
59 };
60 
61 /* Action to output core status */
63 {
64 public:
65  SST_ELI_REGISTER_REALTIMEACTION(CoreStatusRealTimeAction, "sst", "rt.status.core", SST_ELI_ELEMENT_VERSION(0, 1, 0),
66  "Signal handler that causes SST-Core to print its status. This is the default action for SIGUSR1.")
67 
69  void execute() override;
70 };
71 
72 /* Action to output component status */
74 {
75 public:
76  SST_ELI_REGISTER_REALTIMEACTION(ComponentStatusRealTimeAction, "sst", "rt.status.all",
77  SST_ELI_ELEMENT_VERSION(0, 1, 0),
78  "Signal handler that causes SST-Core to print its status along with component status. This is the default "
79  "action for SIGUSR2.")
80 
82  void execute() override;
83 };
84 
85 /* Action to trigger a checkpoint on a time interval */
87 {
88 public:
89  SST_ELI_REGISTER_REALTIMEACTION(CheckpointRealTimeAction, "sst", "rt.checkpoint", SST_ELI_ELEMENT_VERSION(0, 1, 0),
90  "Signal handler that causes SST to generate a checkpoint. This is the default action for the "
91  "'--checkpoint-wall-period' option.")
92 
94  virtual void execute() override;
95  virtual void begin(time_t scheduled_time) override;
96 
97  bool canInitiateCheckpoint() override { return true; }
98 };
99 
100 /* Action to generate a heartbeat message */
102 {
103 public:
104  SST_ELI_REGISTER_REALTIMEACTION(HeartbeatRealTimeAction, "sst", "rt.heartbeat", SST_ELI_ELEMENT_VERSION(0, 1, 0),
105  "Signal handler that causes SST to generate a heartbeat message (status and some resource usage information). "
106  "This is the default action for the '--heartbeat-wall-period' option.")
107 
109  virtual void execute() override;
110  virtual void begin(time_t scheduled_time) override;
111 
112 private:
113  double last_time_;
114  static std::atomic<uint64_t> thr_max_tv_depth_;
115  static Core::ThreadSafe::Barrier exchange_barrier_;
116 };
117 
118 
119 /* Action to trigger an interactive console */
121 {
122 public:
123  SST_ELI_REGISTER_REALTIMEACTION(InteractiveRealTimeAction, "sst", "rt.interactive",
124  SST_ELI_ELEMENT_VERSION(0, 1, 0),
125  "Signal handler that causes SST to break into an interactive console based on the --interactive-console flag.")
126 
128  void execute() override;
129  bool isValidSigalrmAction() override { return false; }
130  bool canInitiateCheckpoint() override { return true; }
131 };
132 
133 /* Wrapper for RealTimeActions that occur on a time interval */
135 {
136 public:
137  RealTimeIntervalAction(uint32_t interval, RealTimeAction* action);
138 
139  void begin(time_t begin_time);
140 
141  void execute(uint32_t elapsed);
142  uint32_t getNextAlarmTime() const;
143 
144 private:
145  uint32_t alarm_interval_; /* Interval to trigger alarm at (seconds) */
146  uint32_t next_alarm_time_; /* Next time an alarm should be triggered for this alarm */
147  RealTimeAction* action_; /* Action to take on when alarm triggers */
148 };
149 
150 /* This class manages alarms but does not do any actions itself
151  * All times are stored in seconds
152  */
154 {
155 public:
157  void execute() override;
158  void addIntervalAction(uint32_t interval, RealTimeAction* action);
159  virtual void begin(time_t scheduled_time) override; // Start alarms
160 
161 private:
162  std::vector<RealTimeIntervalAction> interval_actions_;
163  bool alarm_manager_; /* The instance on thread 0/rank 0 is the manager */
164  bool rank_leader_; /* The instance on thread 0 of each rank participates in MPI exchanges */
165  time_t last_time_; /* Last time a SIGALRM was received */
166  static uint32_t elapsed_; /* A static so that each threads' instance of this class share the same one */
167  static Core::ThreadSafe::Barrier exchange_barrier_;
168 };
169 
170 /** Class to manage real-time events (signals and alarms) */
172 {
173 public:
174  explicit RealTimeManager(RankInfo num_ranks);
175  RealTimeManager();
176 
177  /** Register actions */
178  void registerSignal(RealTimeAction* action, int signum);
179  void registerInterval(uint32_t interval, RealTimeAction* action);
180 
181  /** Begin monitoring signals */
182  void begin();
183 
184  /** Simulation run loop calls this when a signal has been received
185  * from the OS. One or more of the sig_X_from_os_ vars will be non-zero.
186  *
187  * Serial - this executes the relevant signal handler(s)
188  * Parallel - this saves the signals until the next global sync
189  */
190  void notifySignal();
191 
192  /** This is a request to execute the handler in response to a particular signal */
193  void performSignal(int signum);
194 
195  /* OS signal handling */
196  static void installSignalHandlers();
197  static void SimulationSigEndHandler(int sig);
198  static void SimulationSigUsrHandler(int sig);
199  static void SimulationSigAlrmHandler(int sig);
200 
201  /* SyncManager request to get signals. Also clears local signals */
202  bool getSignals(int& sig_end, int& sig_usr, int& sig_alrm);
203 
204  /**
205  Check whether or not any of the Actions registered with the
206  manager can initiate a checkpoint.
207  */
208  bool canInitiateCheckpoint() { return can_checkpoint_; }
209 
210  void serialize_order(SST::Core::Serialization::serializer& ser) override;
211  ImplementSerializable(SST::RealTimeManager)
212 
213 private:
214  bool serial_exec_; // Whether execution is serial or parallel
215  bool can_checkpoint_ = false; // Set to true if any Actions can trigger checkpoint
216 
217  /* The set of signal handlers for all signals */
218  std::map<int, RealTimeAction*> signal_actions_;
219 
220  static sig_atomic_t sig_alrm_from_os_;
221  static sig_atomic_t sig_usr_from_os_;
222  static sig_atomic_t sig_end_from_os_;
223 
224  int sig_alrm_;
225  int sig_usr_;
226  int sig_end_;
227 };
228 
229 } // namespace SST
230 #endif /* SST_CORE_REAL_TIME_ALARM_MANAGER_H */
bool canInitiateCheckpoint()
Check whether or not any of the Actions registered with the manager can initiate a checkpoint...
Definition: realtime.h:208
This class is basically a wrapper for objects to declare the order in which their members should be s...
Definition: serializer.h:42
Definition: realtime.h:153
Definition: realtime.h:37
Definition: realtime.h:86
Definition: action.cc:18
void notifySignal()
Simulation run loop calls this when a signal has been received from the OS.
Definition: realtime.cc:608
Definition: serializable.h:23
An event to trigger at a real-time interval.
Definition: realtimeAction.h:31
Definition: realtime.h:62
Definition: realtime.h:120
Definition: rankInfo.h:23
bool canInitiateCheckpoint() override
Let&#39;s the core know if this action may trigger a checkpoint so that all the checkpoint infrastructure...
Definition: realtime.h:130
void begin()
Begin monitoring signals.
Definition: realtime.cc:595
Definition: realtime.h:134
Definition: realtime.h:73
bool canInitiateCheckpoint() override
Let&#39;s the core know if this action may trigger a checkpoint so that all the checkpoint infrastructure...
Definition: realtime.h:97
void registerSignal(RealTimeAction *action, int signum)
Register actions.
Definition: realtime.cc:576
Definition: realtime.h:101
Class to manage real-time events (signals and alarms)
Definition: realtime.h:171
Definition: realtime.h:50
void performSignal(int signum)
This is a request to execute the handler in response to a particular signal.
Definition: realtime.cc:664
Definition: threadsafe.h:49