SST 15.0
Structural Simulation Toolkit
checkpointAction.h
1// Copyright 2009-2025 NTESS. Under the terms
2// of Contract DE-NA0003525 with NTESS, the U.S.
3// Government retains certain rights in this software.
4//
5// Copyright (c) 2009-2025, NTESS
6// All rights reserved.
7//
8// This file is part of the SST software package. For license
9// information, see the LICENSE file in the top level directory of the
10// distribution.
11
12#ifndef SST_CORE_CHECKPOINT_ACTION_H
13#define SST_CORE_CHECKPOINT_ACTION_H
14
15#include "sst/core/action.h"
16#include "sst/core/config.h"
17#include "sst/core/cputimer.h"
18#include "sst/core/output.h"
19#include "sst/core/rankInfo.h"
20#include "sst/core/sst_types.h"
21#include "sst/core/threadsafe.h"
22
23#include <cstdint>
24#include <set>
25#include <string>
26
27namespace SST {
28
29class Simulation_impl;
30class TimeConverter;
31
32namespace Checkpointing {
33/* Utility functions needed to manage directories */
34
35/**
36 Creates a directory of the specified basename. If a directory named
37 basename already exists, it will append an _N to the end,
38 incrementing N from 1 until it finds an unused name.
39 */
40std::string createUniqueDirectory(const std::string basename);
41
42/**
43 Removes a directory. For safety, this will recurse and remove each
44 file individually instead of issuing an rm -r. It will not follow
45 links, but will simply remove the link.
46 */
47void removeDirectory(const std::string name);
48
49/**
50 Initializes the infrastructure needed for checkpointing. Uses the
51 createUniqueDirectory() function to create the directory, then
52 broadcasts the name to all ranks.
53 */
54std::string initializeCheckpointInfrastructure(Config* cfg, bool rt_can_ckpt, int myRank);
55
56} // namespace Checkpointing
57
58/**
59 \class CheckpointAction
60 A recurring event to trigger checkpoint generation
61*/
62class CheckpointAction : public Action
63{
64public:
65 /**
66 Create a new checkpoint object for the simulation core to initiate checkpoints
67 */
68 CheckpointAction(Config* cfg, RankInfo this_rank, Simulation_impl* sim, TimeConverter* period);
69 ~CheckpointAction() = default;
70
71 /**
72 Indicates CheckpointAction should be inserted into the
73 TimeVortex. The insertion will only happen for serial runs, as
74 CheckpointAction is managed by the SyncManager in parallel
75 runs.
76 */
78
79 /** Generate a checkpoint next time check() is called */
80 void setCheckpoint();
81
82 /** Called by TimeVortex to trigger checkpoint on simulation clock interval - not used in parallel simulation */
83 void execute() override;
84
85 /** Called by SyncManager to check whether a checkpoint should be generated */
86 SimTime_t check(SimTime_t current_time);
87
88 /** Return next checkpoint time */
89 SimTime_t getNextCheckpointSimTime();
90
91 static Core::ThreadSafe::Barrier barrier;
92 static uint32_t checkpoint_id;
93
94 NotSerializable(SST::CheckpointAction);
95
96private:
97 CheckpointAction(const CheckpointAction&) = delete;
98 CheckpointAction& operator=(const CheckpointAction&) = delete;
99
100 void createCheckpoint(Simulation_impl* sim); // The actual checkpoint operation
101
102 RankInfo rank_; // RankInfo for this thread/rank
103 TimeConverter* period_; // Simulation time interval for scheduling or nullptr if not set
104 double last_cpu_time_; // Last time a checkpoint was triggered
105 bool generate_; // Whether a checkpoint should be done next time check() is called
106 SimTime_t next_sim_time_; // Next simulationt ime a checkpoint should trigger at or 0 if not applicable
107 std::string dir_format_; // Format string for checkpoint directory names
108 std::string file_format_; // Format string for checkpoint file names
109};
110
111} // namespace SST
112
113#endif // SST_CORE_CHECKPOINT_ACTION_H
A recurring event to trigger checkpoint generation.
Definition checkpointAction.h:63
void setCheckpoint()
Generate a checkpoint next time check() is called.
Definition checkpointAction.cc:272
void insertIntoTimeVortex(Simulation_impl *sim)
Indicates CheckpointAction should be inserted into the TimeVortex.
Definition checkpointAction.cc:128
SimTime_t getNextCheckpointSimTime()
Return next checkpoint time.
Definition checkpointAction.cc:278
CheckpointAction(Config *cfg, RankInfo this_rank, Simulation_impl *sim, TimeConverter *period)
Create a new checkpoint object for the simulation core to initiate checkpoints.
Definition checkpointAction.cc:69
void execute() override
Called by TimeVortex to trigger checkpoint on simulation clock interval - not used in parallel simula...
Definition checkpointAction.cc:145
SimTime_t check(SimTime_t current_time)
Called by SyncManager to check whether a checkpoint should be generated.
Definition checkpointAction.cc:252
Class to contain SST Simulation Configuration variables.
Definition config.h:41
Definition threadsafe.h:46
Definition rankInfo.h:24
Main control class for a SST Simulation.
Definition simulation_impl.h:87
A class to convert between a component's view of time and the core's view of time.
Definition timeConverter.h:28