-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathexamplemain.cpp
151 lines (117 loc) · 4.72 KB
/
examplemain.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#include <RLGymPPO_CPP/Learner.h>
#include <RLGymSim_CPP/Utils/RewardFunctions/CommonRewards.h>
#include <RLGymSim_CPP/Utils/RewardFunctions/CombinedReward.h>
#include <RLGymSim_CPP/Utils/TerminalConditions/NoTouchCondition.h>
#include <RLGymSim_CPP/Utils/TerminalConditions/GoalScoreCondition.h>
#include <RLGymSim_CPP/Utils/OBSBuilders/DefaultOBS.h>
#include <RLGymSim_CPP/Utils/StateSetters/RandomState.h>
#include <RLGymSim_CPP/Utils/ActionParsers/DiscreteAction.h>
#include "RLBotClient.h"
using namespace RLGPC; // RLGymPPO
using namespace RLGSC; // RLGymSim
// This is our step callback, it's called every step from every RocketSim game
// WARNING: This is called from multiple threads, often simultaneously,
// so don't access things apart from these arguments unless you know what you're doing.
// gameMetrics: The metrics for this specific game
void OnStep(GameInst* gameInst, const RLGSC::Gym::StepResult& stepResult, Report& gameMetrics) {
auto& gameState = stepResult.state;
for (auto& player : gameState.players) {
// Track average player speed
float speed = player.phys.vel.Length();
gameMetrics.AccumAvg("player_speed", speed);
// Track ball touch ratio
gameMetrics.AccumAvg("ball_touch_ratio", player.ballTouchedStep);
// Track in-air ratio
gameMetrics.AccumAvg("in_air_ratio", !player.carState.isOnGround);
}
}
// This is our iteration callback, it's called every time we complete an iteration, after learning
// Here we can add custom metrics to the metrics report, for example
void OnIteration(Learner* learner, Report& allMetrics) {
AvgTracker avgPlayerSpeed = {};
AvgTracker avgBallTouchRatio = {};
AvgTracker avgAirRatio = {};
// Get metrics for every gameInst
auto allGameMetrics = learner->GetAllGameMetrics();
for (auto& gameReport : allGameMetrics) {
avgPlayerSpeed += gameReport.GetAvg("player_speed");
avgBallTouchRatio += gameReport.GetAvg("ball_touch_ratio");
avgAirRatio += gameReport.GetAvg("in_air_ratio");
}
allMetrics["player_speed"] = avgPlayerSpeed.Get();
allMetrics["ball_touch_ratio"] = avgBallTouchRatio.Get();
allMetrics["in_air_ratio"] = avgAirRatio.Get();
}
// Create the RLGymSim environment for each of our games
EnvCreateResult EnvCreateFunc() {
constexpr int TICK_SKIP = 8;
constexpr float NO_TOUCH_TIMEOUT_SECS = 10.f;
auto rewards = new CombinedReward( // Format is { RewardFunc(), weight }
{
// Small reward for facing the ball
{ new FaceBallReward(), 0.1f },
// Moderate reward for going towards the ball
{ new VelocityPlayerToBallReward(), 0.5f },
// Bigger reward for having the ball go towards the goal
{ new VelocityBallToGoalReward(), 1.0f },
// Giant reward for scoring, giant penalty for being scored on
{ new EventReward({.teamGoal = 1.f, .concede = -1.f}), 50.f },
}
);
std::vector<TerminalCondition*> terminalConditions = {
new NoTouchCondition(NO_TOUCH_TIMEOUT_SECS * 120 / TICK_SKIP),
new GoalScoreCondition()
};
auto obs = new DefaultOBS();
auto actionParser = new DiscreteAction();
auto stateSetter = new RandomState(true, true, true);
Match* match = new Match(
rewards,
terminalConditions,
obs,
actionParser,
stateSetter,
1, // Team size
true // Spawn opponents
);
Gym* gym = new Gym(match, TICK_SKIP);
return { match, gym };
}
int main() {
// Initialize RocketSim with collision meshes
RocketSim::Init("./collision_meshes");
// Make configuration for the learner
LearnerConfig cfg = {};
// Play around with these to see what the optimal is for your machine, more isn't always better
cfg.numThreads = 16;
cfg.numGamesPerThread = 24;
// We want a large itr/batch size
// You'll want to increase this as your bot improves, up to an extent
int tsPerItr = 100 * 1000;
cfg.timestepsPerIteration = tsPerItr;
cfg.ppo.batchSize = tsPerItr;
cfg.ppo.miniBatchSize = 25 * 1000; // Lower this if too much VRAM is being allocated
cfg.expBufferSize = tsPerItr * 3;
// This is just set to 1 to match rlgym-ppo example
// I've found the best value is somewhere between 2 and 4
// Increasing this will lower SPS, but increase step efficiency
cfg.ppo.epochs = 1;
// Reasonable starting entropy
cfg.ppo.entCoef = 0.01f;
// Good learning rate to start, may start to be too high around 100m steps
cfg.ppo.policyLR = 2e-4;
cfg.ppo.criticLR = 2e-4;
// Default model size
cfg.ppo.policyLayerSizes = { 256, 256, 256 };
cfg.ppo.criticLayerSizes = { 256, 256, 256 };
cfg.sendMetrics = true; // Send metrics
cfg.renderMode = false; // Don't render
// Make the learner with the environment creation function and the config we just made
Learner learner = Learner(EnvCreateFunc, cfg);
// Set up our callbacks
learner.stepCallback = OnStep;
learner.iterationCallback = OnIteration;
// Start learning!
learner.Learn();
return 0;
}