SST  15.1.0
StructuralSimulationToolkit
checkpointAction.h
1 // Copyright 2009-2025 NTESS. Under the terms
2 // of Contract DE-NA0003525 with NTESS, the U.S.
3 // Government retains certain rights in this software.
4 //
5 // Copyright (c) 2009-2025, NTESS
6 // All rights reserved.
7 //
8 // This file is part of the SST software package. For license
9 // information, see the LICENSE file in the top level directory of the
10 // distribution.
11 
12 #ifndef SST_CORE_CHECKPOINT_ACTION_H
13 #define SST_CORE_CHECKPOINT_ACTION_H
14 
15 #include "sst/core/action.h"
16 #include "sst/core/config.h"
17 #include "sst/core/cputimer.h"
18 #include "sst/core/output.h"
19 #include "sst/core/rankInfo.h"
20 #include "sst/core/sst_types.h"
21 #include "sst/core/threadsafe.h"
22 
23 #include <cstdint>
24 #include <set>
25 #include <string>
26 
27 namespace SST {
28 
29 class Simulation_impl;
30 class TimeConverter;
31 
32 namespace Checkpointing {
33 /* Utility functions needed to manage directories */
34 
35 /**
36  Creates a directory of the specified basename. If a directory named
37  basename already exists, it will append an _N to the end,
38  incrementing N from 1 until it finds an unused name.
39  */
40 std::string createUniqueDirectory(const std::string basename);
41 
42 /**
43  Removes a directory. For safety, this will recurse and remove each
44  file individually instead of issuing an rm -r. It will not follow
45  links, but will simply remove the link.
46  */
47 void removeDirectory(const std::string name);
48 
49 /**
50  Initializes the infrastructure needed for checkpointing. Uses the
51  createUniqueDirectory() function to create the directory, then
52  broadcasts the name to all ranks.
53  */
54 std::string initializeCheckpointInfrastructure(Config* cfg, bool rt_can_ckpt, int myRank);
55 
56 } // namespace Checkpointing
57 
58 /**
59  \class CheckpointAction
60  A recurring event to trigger checkpoint generation
61 */
62 class CheckpointAction : public Action
63 {
64 public:
65  /**
66  Create a new checkpoint object for the simulation core to initiate checkpoints
67  */
68  CheckpointAction(Config* cfg, RankInfo this_rank, Simulation_impl* sim, TimeConverter* period);
69  ~CheckpointAction() = default;
70 
71  /**
72  Indicates CheckpointAction should be inserted into the
73  TimeVortex. The insertion will only happen for serial runs, as
74  CheckpointAction is managed by the SyncManager in parallel
75  runs.
76  */
78 
79  /** Generate a checkpoint next time check() is called */
80  void setCheckpoint();
81 
82  /** Called by TimeVortex to trigger checkpoint on simulation clock interval - not used in parallel simulation */
83  void execute() override;
84 
85  /** Called by SyncManager to check whether a checkpoint should be generated */
86  SimTime_t check(SimTime_t current_time);
87 
88  /** Return next checkpoint time */
89  SimTime_t getNextCheckpointSimTime();
90 
91  static Core::ThreadSafe::Barrier barrier;
92  static uint32_t checkpoint_id;
93 
94  NotSerializable(SST::CheckpointAction);
95 
96 private:
97  CheckpointAction(const CheckpointAction&) = delete;
98  CheckpointAction& operator=(const CheckpointAction&) = delete;
99 
100  void createCheckpoint(Simulation_impl* sim); // The actual checkpoint operation
101 
102  RankInfo rank_; // RankInfo for this thread/rank
103  TimeConverter* period_; // Simulation time interval for scheduling or nullptr if not set
104  double last_cpu_time_; // Last time a checkpoint was triggered
105  bool generate_; // Whether a checkpoint should be done next time check() is called
106  SimTime_t next_sim_time_; // Next simulationt ime a checkpoint should trigger at or 0 if not applicable
107  std::string dir_format_; // Format string for checkpoint directory names
108  std::string file_format_; // Format string for checkpoint file names
109 };
110 
111 } // namespace SST
112 
113 #endif // SST_CORE_CHECKPOINT_ACTION_H
An Action is a schedulable Activity which is not an Event.
Definition: action.h:26
void execute() override
Called by TimeVortex to trigger checkpoint on simulation clock interval - not used in parallel simula...
Definition: checkpointAction.cc:145
Class to contain SST Simulation Configuration variables.
Definition: config.h:51
CheckpointAction(Config *cfg, RankInfo this_rank, Simulation_impl *sim, TimeConverter *period)
Create a new checkpoint object for the simulation core to initiate checkpoints
Definition: checkpointAction.cc:69
A class to convert between a component&#39;s view of time and the core&#39;s view of time.
Definition: timeConverter.h:27
A recurring event to trigger checkpoint generation.
Definition: checkpointAction.h:62
Definition: action.cc:18
SimTime_t getNextCheckpointSimTime()
Return next checkpoint time.
Definition: checkpointAction.cc:281
SimTime_t check(SimTime_t current_time)
Called by SyncManager to check whether a checkpoint should be generated.
Definition: checkpointAction.cc:255
Main control class for a SST Simulation.
Definition: simulation_impl.h:122
Definition: rankInfo.h:23
void setCheckpoint()
Generate a checkpoint next time check() is called.
Definition: checkpointAction.cc:275
void insertIntoTimeVortex(Simulation_impl *sim)
Indicates CheckpointAction should be inserted into the TimeVortex.
Definition: checkpointAction.cc:128
Definition: threadsafe.h:49