adding final updates
This commit is contained in:
parent
353a576da5
commit
f44424e02f
10
README.md
10
README.md
|
|
@ -18,17 +18,13 @@ Made a symlink -> ln -s ~/dev/balatro-rl/RLBridge /mnt/gamerlinuxssd/SteamLibrar
|
|||
### File-based Communication
|
||||
- [x] JSON file communication system
|
||||
- [x] Lua file writer in mod
|
||||
- [x] Python file watcher with watchdog
|
||||
- [x] Game state transmission (hand cards, chips, available actions)
|
||||
- [x] Action reception and execution
|
||||
|
||||
### RL Training
|
||||
- [x] Python RL environment setup
|
||||
- [x] AI model architecture
|
||||
- [x] Training loop integration
|
||||
- [x] Python RL custom environment setup
|
||||
|
||||
### Game Features
|
||||
- [x] Always have restart_run as an action option assuming the game is ongoing
|
||||
- [x] Make it so that if we lose, we can restart, or if we win a round and see the "cash out"
|
||||
page, then we also restart. but getting to the "cash out" state should give a ton of reward to incentivize
|
||||
the AI
|
||||
|
|
@ -47,6 +43,10 @@ chips. Perhpas we just want to get wins of rounds just scoring chips is not enou
|
|||
- We would probably store the raw requests and raw responses, and if we win, we can save, if not we can reset the list
|
||||
- The idea is that I'll have the seed so I can just look at the actions the requests and responses, plugin the seed manually in the game and play it out myself
|
||||
- Add something where we only keep top 5. I don't to have a long log of a bunch of wins
|
||||
- [x] Should I reward higher for beating the game in the least amount of hands possible? Notice that if it plays more hands it gets more reward vs if it plays 1 hand even if it's a really good hand
|
||||
- [x] on that note should we give more reward for having MONSTER hands. for example they are getting rewards based on blind size, but what if they surpass that by a bunch like get the blind size or greater in one hand? maybe that solves the above problem?
|
||||
- [ ] Speed up training somehow. Is parallelization possible? Maybe through docker and buying Balatro on multiple steam accounts
|
||||
|
||||
|
||||
### DEBUGGING
|
||||
- [ ] I think it's counting the reset as an episode? review how it calculates episodes for logging in rewards I think something MIGHT be wrong. Also just check it in general because AI wrote it I might need to udpate
|
||||
|
|
|
|||
|
|
@ -29,10 +29,11 @@ class BalatroRewardCalculator:
|
|||
self.winning_chips = 0 # Store chips when blind is defeated
|
||||
|
||||
# Percentage-based reward thresholds (% of blind requirement)
|
||||
# Updated to encourage bigger single hands
|
||||
self.REWARD_THRESHOLDS = {
|
||||
"excellent": 80.0, # 80%+ of blind requirement
|
||||
"good": 50.0, # 50-79% of blind requirement
|
||||
"decent": 25.0 # 25-49% of blind requirement
|
||||
"excellent": 75.0, # 75%+ of blind requirement (lowered from 80%)
|
||||
"good": 40.0, # 40-74% of blind requirement (lowered from 50%)
|
||||
"decent": 20.0 # 20-39% of blind requirement (lowered from 25%)
|
||||
}
|
||||
|
||||
def calculate_reward(self, current_state: Dict[str, Any],
|
||||
|
|
@ -81,7 +82,11 @@ class BalatroRewardCalculator:
|
|||
# Calculate percentage of blind requirement this hand achieved
|
||||
chip_percentage = (chip_gain / blind_chips) * 100
|
||||
|
||||
if chip_percentage >= self.REWARD_THRESHOLDS["excellent"]:
|
||||
# Monster hand bonus - overkill reward for beating blind in one shot
|
||||
if chip_percentage >= 100:
|
||||
reward += 20.0 # Huge bonus for one-shot blind completion
|
||||
reward_breakdown.append(f"MONSTER HAND - One-shot blind kill (+{chip_gain} chips, {chip_percentage:.1f}%): +20.0")
|
||||
elif chip_percentage >= self.REWARD_THRESHOLDS["excellent"]:
|
||||
reward += 10.0
|
||||
reward_breakdown.append(f"Excellent hand (+{chip_gain} chips, {chip_percentage:.1f}% of blind): +10.0")
|
||||
elif chip_percentage >= self.REWARD_THRESHOLDS["good"]:
|
||||
|
|
@ -101,8 +106,34 @@ class BalatroRewardCalculator:
|
|||
# === BLIND COMPLETION ===
|
||||
# Main goal - beat the blind (only reward once per episode)
|
||||
if blind_defeated and not self.blind_already_defeated and game_over == 0:
|
||||
reward += 50.0 # SUCCESS! Normalized from +500 to +50
|
||||
reward_breakdown.append(f"BLIND DEFEATED: +50.0")
|
||||
base_reward = 50.0
|
||||
|
||||
# Calculate hands used (starting hands - hands_left)
|
||||
round_info = inner_game_state.get('round', {})
|
||||
hands_left = round_info.get('hands_left', 0)
|
||||
# Assume we start with 4 hands in ante 1
|
||||
hands_used = 4 - hands_left
|
||||
|
||||
# Efficiency bonus - reward fewer hands used
|
||||
if hands_used == 1:
|
||||
efficiency_bonus = 25.0
|
||||
reward_breakdown.append(f"ONE-HAND VICTORY BONUS: +{efficiency_bonus}")
|
||||
elif hands_used == 2:
|
||||
efficiency_bonus = 15.0
|
||||
reward_breakdown.append(f"Two-hand efficiency bonus: +{efficiency_bonus}")
|
||||
elif hands_used == 3:
|
||||
efficiency_bonus = 8.0
|
||||
reward_breakdown.append(f"Three-hand efficiency bonus: +{efficiency_bonus}")
|
||||
else:
|
||||
efficiency_bonus = max(0, 5.0 - hands_used) # Diminishing returns
|
||||
if efficiency_bonus > 0:
|
||||
reward_breakdown.append(f"Efficiency bonus: +{efficiency_bonus:.1f}")
|
||||
else:
|
||||
efficiency_bonus = 0
|
||||
|
||||
total_blind_reward = base_reward + efficiency_bonus
|
||||
reward += total_blind_reward
|
||||
reward_breakdown.append(f"BLIND DEFEATED: +{base_reward} (base)")
|
||||
self.blind_already_defeated = True
|
||||
self.winning_chips = current_chips # Store winning chip count
|
||||
|
||||
|
|
|
|||
|
|
@ -243,7 +243,7 @@ if __name__ == "__main__":
|
|||
print(f"📂 Found checkpoint: {latest_checkpoint}")
|
||||
|
||||
model = train_agent(
|
||||
total_timesteps=100000,
|
||||
total_timesteps=250000,
|
||||
save_path="./models/balatro_trained",
|
||||
resume_from=str(latest_checkpoint) if latest_checkpoint else None
|
||||
)
|
||||
|
|
|
|||
|
|
@ -271,7 +271,11 @@ class BalatroStateMapper:
|
|||
hand_name = current_hand.get('handname', 'None')
|
||||
if not hand_name:
|
||||
hand_name = "None"
|
||||
hand_index = hand_types.index(hand_name)
|
||||
|
||||
try:
|
||||
hand_index = hand_types.index(hand_name)
|
||||
except ValueError:
|
||||
hand_index = 0 # Default to "None" if hand type not found
|
||||
|
||||
features.extend(make_onehot(hand_index, len(hand_types)))
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ class ReplaySystem:
|
|||
self.REPLAY_FILE_PATH = "replays.json"
|
||||
|
||||
def try_save_replay(self, file_path: str, seed: str, actions: List[Dict[str, Any]], score: float, chips: int):
|
||||
"""Save the current replay to a file if the score is among the top MAX_REPLAYS."""
|
||||
"""Save the current replay to a file if the chips is among the top MAX_REPLAYS."""
|
||||
timestamp = datetime.now().isoformat()
|
||||
|
||||
replay_data = {
|
||||
|
|
@ -26,17 +26,17 @@ class ReplaySystem:
|
|||
if len(replays) < self.MAX_REPLAYS:
|
||||
replays.append(replay_data)
|
||||
else:
|
||||
# Check if this score is higher than the lowest score
|
||||
replays.sort(key=lambda x: x['score'], reverse=True)
|
||||
if score > replays[-1]['score']:
|
||||
# Replace the lowest scoring replay
|
||||
# Check if this chips count is higher than the lowest chips count
|
||||
replays.sort(key=lambda x: x['chips'], reverse=True)
|
||||
if chips > replays[-1]['chips']:
|
||||
# Replace the lowest chip count replay
|
||||
replays[-1] = replay_data
|
||||
else:
|
||||
# Score is not high enough, don't add it
|
||||
# Chips count is not high enough, don't add it
|
||||
return len(replays)
|
||||
|
||||
# Sort by score (highest first) and keep only top MAX_REPLAYS
|
||||
replays.sort(key=lambda x: x['score'], reverse=True)
|
||||
# Sort by chips (highest first) and keep only top MAX_REPLAYS
|
||||
replays.sort(key=lambda x: x['chips'], reverse=True)
|
||||
replays = replays[:self.MAX_REPLAYS]
|
||||
|
||||
# Save back to file
|
||||
|
|
@ -65,9 +65,9 @@ class ReplaySystem:
|
|||
json.dump(replays, f, indent=4)
|
||||
|
||||
def sort_replays(self, file_path: str) -> List[Dict[str, Any]]:
|
||||
"""Sort replays by score and return the top MAX_REPLAYS."""
|
||||
"""Sort replays by chips and return the top MAX_REPLAYS."""
|
||||
replays = self.load_replays(file_path)
|
||||
replays.sort(key=lambda x: x['score'], reverse=True)
|
||||
replays.sort(key=lambda x: x['chips'], reverse=True)
|
||||
return replays[:self.MAX_REPLAYS]
|
||||
|
||||
def get_top_replays(self, file_path: str, count: int = None) -> List[Dict[str, Any]]:
|
||||
|
|
@ -76,7 +76,7 @@ class ReplaySystem:
|
|||
count = self.MAX_REPLAYS
|
||||
|
||||
replays = self.load_replays(file_path)
|
||||
replays.sort(key=lambda x: x['score'], reverse=True)
|
||||
replays.sort(key=lambda x: x['chips'], reverse=True)
|
||||
return replays[:count]
|
||||
|
||||
def clear_replays(self, file_path: str):
|
||||
|
|
|
|||
1508
replays.json
1508
replays.json
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue