import gym
import tensorflow as tf
import numpy as np
import random
from collections import deque
# Hyper Parameters
GAMMA = 0.95 # discount factor
LEARNING_RATE=0.01
class Actor():# PI
def __init__(self, env, sess):
# init some parameters
self.time_step = 0
self.state_dim = env.observation_space.shape[0]
self.action_dim = env.action_space.n
# 策略
self.create_softmax_network()
# Init session
self.session = sess
self.session.run(tf.global_variables_initializer())
def create_softmax_network(self):
# network weights
W1 = self.weight_variable([self.state_dim, 20])
b1 = self.bias_variable([20])
W2 = self.weight_variable([20, self.action_dim])
b2 = self.bias_variable([self.action_dim])
# input layer
self.state_input = tf.placeholder("float", [None, self.state_dim])
self.tf_acts =
Marco de CA de aprendizaje por refuerzo
Supongo que te gusta
Origin blog.csdn.net/gz153016/article/details/110440961
Recomendado
Clasificación