SST 16.0.0
Structural Simulation Toolkit
checkpointAction.h
1// Copyright 2009-2026 NTESS. Under the terms
2// of Contract DE-NA0003525 with NTESS, the U.S.
3// Government retains certain rights in this software.
4//
5// Copyright (c) 2009-2026, NTESS
6// All rights reserved.
7//
8// This file is part of the SST software package. For license
9// information, see the LICENSE file in the top level directory of the
10// distribution.
11
12#ifndef SST_CORE_CHECKPOINT_ACTION_H
13#define SST_CORE_CHECKPOINT_ACTION_H
14
15#include "sst/core/action.h"
16#include "sst/core/config.h"
17#include "sst/core/cputimer.h"
18#include "sst/core/output.h"
19#include "sst/core/rankInfo.h"
20#include "sst/core/sst_types.h"
21#include "sst/core/threadsafe.h"
22#include "sst/core/timeConverter.h"
23
24#include <cstdint>
25#include <set>
26#include <string>
27
28namespace SST {
29
30class Simulation;
31class TimeConverter;
32
33namespace Checkpointing {
34/* Utility functions needed to manage directories */
35
36/**
37 Creates a directory of the specified basename. If a directory named
38 basename already exists, it will append an _N to the end,
39 incrementing N from 1 until it finds an unused name.
40 */
41std::string createUniqueDirectory(const std::string basename);
42
43/**
44 Removes a directory. For safety, this will recurse and remove each
45 file individually instead of issuing an rm -r. It will not follow
46 links, but will simply remove the link.
47 */
48void removeDirectory(const std::string name);
49
50/**
51 Initializes the infrastructure needed for checkpointing. Uses the createUniqueDirectory() function to create the
52 directory, then broadcasts the name to all ranks.
53
54 This function is called twice. Once after graph parititioning and once right after creation of the realtime manager
55 in the Simulation object. The first call will create the directory structure if any of the command line options other
56 than realtime actions enable checkpointing. This is needed so that the system can write out the ConfigGraph for
57 repartitioned restarts while is still has it in one piece (note, this is not done for parallel loads). We check
58 again after the realtime manager is created in case the only way to trigger a checkpoint is through signals.
59 */
60std::string initializeCheckpointInfrastructure(Config* cfg, bool can_ckpt, int myRank);
61
62} // namespace Checkpointing
63
64/**
65 \class CheckpointAction
66 A recurring event to trigger checkpoint generation
67*/
68class CheckpointAction : public Action
69{
70public:
71 /**
72 Create a new checkpoint object for the simulation core to initiate checkpoints
73 */
74 CheckpointAction(Config* cfg, RankInfo this_rank, Simulation* sim, TimeConverter period);
75 ~CheckpointAction() = default;
76
77 /**
78 Indicates CheckpointAction should be inserted into the
79 TimeVortex. The insertion will only happen for serial runs, as
80 CheckpointAction is managed by the SyncManager in parallel
81 runs.
82 */
84
85 /** Get checkpoint flag */
86 bool getCheckpoint();
87
88 /** Generate a checkpoint next time check() is called */
89 void setCheckpoint();
90
91 /** Called by TimeVortex to trigger checkpoint on simulation clock interval - not used in parallel simulation */
92 void execute() override;
93
94 /** Called by SyncManager to check whether a checkpoint should be generated */
95 SimTime_t check(SimTime_t current_time);
96
97 /** Return next checkpoint time */
98 SimTime_t getNextCheckpointSimTime();
99
100 static Core::ThreadSafe::Barrier barrier;
101 static uint32_t checkpoint_id;
102
103 NotSerializable(SST::CheckpointAction);
104
105private:
106 CheckpointAction(const CheckpointAction&) = delete;
107 CheckpointAction& operator=(const CheckpointAction&) = delete;
108
109 void createCheckpoint(Simulation* sim); // The actual checkpoint operation
110
111 RankInfo rank_; // RankInfo for this thread/rank
112 TimeConverter period_; // Simulation time interval for scheduling or nullptr if not set
113 double last_cpu_time_; // Last time a checkpoint was triggered
114 bool generate_; // Whether a checkpoint should be done next time check() is called
115 SimTime_t next_sim_time_; // Next simulationt ime a checkpoint should trigger at or 0 if not applicable
116 std::string dir_format_; // Format string for checkpoint directory names
117 std::string file_format_; // Format string for checkpoint file names
118};
119
120} // namespace SST
121
122#endif // SST_CORE_CHECKPOINT_ACTION_H
A recurring event to trigger checkpoint generation.
Definition checkpointAction.h:69
void setCheckpoint()
Generate a checkpoint next time check() is called.
Definition checkpointAction.cc:280
bool getCheckpoint()
Get checkpoint flag.
Definition checkpointAction.cc:275
SimTime_t getNextCheckpointSimTime()
Return next checkpoint time.
Definition checkpointAction.cc:286
CheckpointAction(Config *cfg, RankInfo this_rank, Simulation *sim, TimeConverter period)
Create a new checkpoint object for the simulation core to initiate checkpoints.
Definition checkpointAction.cc:69
void execute() override
Called by TimeVortex to trigger checkpoint on simulation clock interval - not used in parallel simula...
Definition checkpointAction.cc:145
void insertIntoTimeVortex(Simulation *sim)
Indicates CheckpointAction should be inserted into the TimeVortex.
Definition checkpointAction.cc:128
SimTime_t check(SimTime_t current_time)
Called by SyncManager to check whether a checkpoint should be generated.
Definition checkpointAction.cc:255
Class to contain SST Simulation Configuration variables.
Definition config.h:52
Definition threadsafe.h:50
Definition rankInfo.h:24
Main control class for a SST Simulation.
Definition simulation.h:121
A class to convert between a component's view of time and the core's view of time.
Definition timeConverter.h:31