Index A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W A A2CAgentFactory (class in tianshou.highlevel.agent) A2CExperimentBuilder (class in tianshou.highlevel.experiment) A2CParams (class in tianshou.highlevel.params.policy_params) A2CPolicy (class in tianshou.policy.modelfree.a2c) A2CTrainingStats (class in tianshou.policy.modelfree.a2c) acc_exp (GailTrainingStats attribute) acc_pi (GailTrainingStats attribute) act (ActBatchProtocol attribute) (CollectActionBatchProtocol attribute) (RolloutBatchProtocol attribute) act_normalized (CollectActionBatchProtocol attribute) ActBatchProtocol (class in tianshou.data.types) action() (ContinuousToDiscrete method) (MultiDiscreteToDiscrete method) action_bound_method (ParamsMixinActionScaling attribute) action_dim (ActionSpaceInfo property) ACTION_DIST_KEY (StepHookAddActionDistribution attribute) action_info (SpaceInfo attribute) action_scaling (ParamsMixinActionScaling attribute) action_shape (ActionSpaceInfo attribute) action_space (BaseCollector property) (RandomActor property) action_type (BasePolicy property) ActionSpaceInfo (class in tianshou.utils.space_info) actor (ActorCriticOpt property) (ActorFuture attribute) Actor (class in tianshou.utils.net.continuous) (class in tianshou.utils.net.discrete) actor (ParamTransformerData attribute) actor_critic_module (ActorCriticOpt attribute) actor_delay (REDQParams attribute) actor_loss (A2CTrainingStats attribute) (BCQTrainingStats attribute) (DDPGTrainingStats attribute) (DiscreteCRRTrainingStats attribute) (NPGTrainingStats attribute) (SACTrainingStats attribute) (TD3TrainingStats attribute) actor_lr (ParamsMixinActorAndCritic attribute) (ParamsMixinActorAndDualCritics attribute) actor_lr_scheduler_factory (ParamsMixinActorAndCritic attribute) (ParamsMixinActorAndDualCritics attribute) actor_pred() (CQLPolicy method) actor_step_size (NPGParams attribute) ActorCritic (class in tianshou.utils.net.common) ActorCriticAgentFactory (class in tianshou.highlevel.agent) ActorCriticOpt (class in tianshou.highlevel.module.module_opt) ActorDualCriticsAgentFactory (class in tianshou.highlevel.agent) ActorFactory (class in tianshou.highlevel.module.actor) ActorFactoryContinuous (class in tianshou.highlevel.module.actor) ActorFactoryContinuousDeterministicNet (class in tianshou.highlevel.module.actor) ActorFactoryContinuousGaussianNet (class in tianshou.highlevel.module.actor) ActorFactoryDefault (class in tianshou.highlevel.module.actor) ActorFactoryDiscreteNet (class in tianshou.highlevel.module.actor) ActorFactoryTransientStorageDecorator (class in tianshou.highlevel.module.actor) ActorFuture (class in tianshou.highlevel.module.actor) ActorFutureProviderProtocol (class in tianshou.highlevel.module.actor) ActorProb (class in tianshou.utils.net.continuous) ActStateBatchProtocol (class in tianshou.data.types) add() (CachedReplayBuffer method) (HERReplayBuffer method) (HERReplayBufferManager method) (MovAvg method) (PrioritizedReplayBuffer method) (ReplayBuffer method) (ReplayBufferManager method) adv (BatchWithAdvantagesProtocol attribute) advantage_normalization (NPGParams attribute) (PPOParams attribute) AgentFactory (class in tianshou.highlevel.agent) alloc_by_keys_diff() (in module tianshou.data.batch) alpha (REDQParams attribute) (REDQTrainingStats attribute) (SACTrainingStats attribute) alpha_loss (REDQTrainingStats attribute) (SACTrainingStats attribute) apply_values_transform() (Batch method) (BatchProtocol method) assert_continuous() (EnvType method) assert_discrete() (EnvType method) AsyncCollector (class in tianshou.data.collector) AutoAlphaFactory (class in tianshou.highlevel.params.alpha) AutoAlphaFactoryDefault (class in tianshou.highlevel.params.alpha) B backend (JoblibConfig attribute) backtrack_coeff (TRPOParams attribute) BaseActor (class in tianshou.utils.net.common) BaseCollector (class in tianshou.data.collector) BaseLogger (class in tianshou.utils.logger.base) BaseNoise (class in tianshou.exploration.random) BasePolicy (class in tianshou.policy.base) BaseTrainer (class in tianshou.trainer.base) BaseVectorEnv (class in tianshou.env.venvs) Batch (class in tianshou.data.batch) batch_size (SamplingConfig attribute) BatchProtocol (class in tianshou.data.batch) BatchWithAdvantagesProtocol (class in tianshou.data.types) BatchWithReturnsProtocol (class in tianshou.data.types) BCQPolicy (class in tianshou.policy.imitation.bcq) BCQTrainingStats (class in tianshou.policy.imitation.bcq) BDQNTrainingStats (class in tianshou.policy.modelfree.bdq) best_reward (InfoStats attribute) best_reward_std (InfoStats attribute) best_score (InfoStats attribute) BranchingDQNPolicy (class in tianshou.policy.modelfree.bdq) BranchingNet (class in tianshou.utils.net.common) buffer_size (SamplingConfig attribute) build() (ExperimentBuilder method) build_seeded_collection() (ExperimentBuilder method) C C51Policy (class in tianshou.policy.modelfree.c51) C51TrainingStats (class in tianshou.policy.modelfree.c51) CachedReplayBuffer (class in tianshou.data.buffer.cached) calc_actor_loss() (CQLPolicy method) calc_pi_values() (CQLPolicy method) calc_random_values() (CQLPolicy method) callback() (EpochTestCallback method) (EpochTestCallbackDQNSetEps method) (EpochTrainCallback method) (EpochTrainCallbackDQNEpsLinearDecay method) (EpochTrainCallbackDQNSetEps method) cat() (Batch static method) (BatchProtocol static method) cat_() (Batch method) (BatchProtocol method) cell (RecurrentStateBatch attribute) change_value() (ParamTransformerActionScaling method) (ParamTransformerChangeValue method) (ParamTransformerFloatEnvParamFactory method) (ParamTransformerNoiseFactory method) check() (TraceDeterminismTest method) clip_loss (PPOTrainingStats attribute) clip_loss_grad (DQNParams attribute) clone_optimizer() (in module tianshou.utils.optim) close() (BaseCollector method) (BaseVectorEnv method) (Collector method) (EnvWorker method) (PettingZooEnv method) (VectorEnvWrapper method) close_env() (DummyEnvWorker method) (EnvWorker method) (RayEnvWorker method) (SubprocEnvWorker method) CloudpickleWrapper (class in tianshou.env.utils) collect() (BaseCollector method) collect_speed (CollectStats attribute) (LoggedCollectStats attribute) collect_time (CollectStats attribute) (LoggedCollectStats attribute) CollectActionBatchProtocol (class in tianshou.data.collector) Collector (class in tianshou.data.collector) CollectStats (class in tianshou.data.collector) CollectStatsBase (class in tianshou.data.collector) CollectStepBatchProtocol (class in tianshou.data.collector) compute_action() (BasePolicy method) compute_action_batch() (RandomActor method) compute_dim_to_summary_stats() (in module tianshou.data.stats) compute_episodic_return() (BasePolicy static method) compute_nstep_return() (BasePolicy static method) compute_q_value() (C51Policy method) (DQNPolicy method) (QRDQNPolicy method) CONTINUOUS (EnvType attribute) ContinuousActorType (class in tianshou.highlevel.module.actor) ContinuousEnvironments (class in tianshou.highlevel.env) ContinuousToDiscrete (class in tianshou.env.gym_wrappers) copy() (ExperimentBuilder method) correct_log_prob_gaussian_tanh() (in module tianshou.policy.modelfree.sac) CosineEmbeddingNetwork (class in tianshou.utils.net.discrete) cql_alpha (CQLTrainingStats attribute) cql_alpha_loss (CQLTrainingStats attribute) cql_loss (DiscreteCQLTrainingStats attribute) (DiscreteCRRTrainingStats attribute) CQLPolicy (class in tianshou.policy.imitation.cql) CQLTrainingStats (class in tianshou.policy.imitation.cql) create_actor_critic_module_opt() (ActorCriticAgentFactory method) create_auto_alpha() (AutoAlphaFactory method) (AutoAlphaFactoryDefault method) create_dist_fn() (ActorFactory method) (ActorFactoryContinuousDeterministicNet method) (ActorFactoryContinuousGaussianNet method) (ActorFactoryDefault method) (ActorFactoryDiscreteNet method) (ActorFactoryTransientStorageDecorator method) (DistributionFunctionFactory method) (DistributionFunctionFactoryCategorical method) (DistributionFunctionFactoryIndependentGaussians method) create_env() (EnvFactory method) create_envs() (EnvFactory method) create_experiment_world() (Experiment method) create_intermediate_module() (IntermediateModuleFactory method) (IntermediateModuleFactoryFromActorFactory method) create_kwargs() (Params method) create_launcher() (RegisteredExpLauncher method) create_logger() (LoggerFactory method) (LoggerFactoryDefault method) create_module() (ActorFactory method) (ActorFactoryContinuousDeterministicNet method) (ActorFactoryContinuousGaussianNet method) (ActorFactoryDefault method) (ActorFactoryDiscreteNet method) (ActorFactoryTransientStorageDecorator method) (CriticEnsembleFactory method) (CriticEnsembleFactoryContinuousNet method) (CriticEnsembleFactoryDefault method) (CriticFactory method) (CriticFactoryContinuousNet method) (CriticFactoryDefault method) (CriticFactoryDiscreteNet method) (CriticFactoryReuseActor method) (ImplicitQuantileNetworkFactory method) (IntermediateModuleFactory method) (ModuleFactory method) create_module_opt() (ActorFactory method) (CriticEnsembleFactory method) (CriticFactory method) create_noise() (NoiseFactory method) (NoiseFactoryMaxActionScaledGaussian method) create_optimizer() (OptimizerFactory method) create_optimizer_for_params() (OptimizerFactory method) (OptimizerFactoryAdam method) (OptimizerFactoryRMSprop method) (OptimizerFactoryTorch method) create_policy() (AgentFactory method) create_scheduler() (LRSchedulerFactory method) (LRSchedulerFactoryLinear method) create_train_test_collector() (AgentFactory method) create_trainer() (AgentFactory method) (OffPolicyAgentFactory method) (OnPolicyAgentFactory method) create_uniform_action_dist() (in module tianshou.utils.torch_utils) create_value() (EnvValueFactory method) (FloatEnvValueFactoryMaxActionScaled method) (in module tianshou.data.batch) create_venv() (EnvFactory method) (EnvFactoryRegistered method) (EnvPoolFactory method) (VectorEnvType method) create_wrapped_policy() (PolicyWrapperFactory method) (PolicyWrapperFactoryIntrinsicCuriosity method) critic (ActorCriticOpt property) Critic (class in tianshou.utils.net.continuous) (class in tianshou.utils.net.discrete) critic1 (ParamTransformerData attribute) critic1_loss (BCQTrainingStats attribute) (SACTrainingStats attribute) (TD3TrainingStats attribute) critic1_lr (ParamsMixinActorAndDualCritics attribute) critic1_lr_scheduler_factory (ParamsMixinActorAndDualCritics attribute) critic2 (ParamTransformerData attribute) critic2_loss (BCQTrainingStats attribute) (SACTrainingStats attribute) (TD3TrainingStats attribute) critic2_lr (ParamsMixinActorAndDualCritics attribute) critic2_lr_scheduler_factory (ParamsMixinActorAndDualCritics attribute) critic_loss (DDPGTrainingStats attribute) (DiscreteCRRTrainingStats attribute) critic_lr (ParamsMixinActorAndCritic attribute) critic_lr_scheduler_factory (ParamsMixinActorAndCritic attribute) CriticBase (class in tianshou.utils.net.continuous) CriticEnsembleFactory (class in tianshou.highlevel.module.critic) CriticEnsembleFactoryContinuousNet (class in tianshou.highlevel.module.critic) CriticEnsembleFactoryDefault (class in tianshou.highlevel.module.critic) CriticFactory (class in tianshou.highlevel.module.critic) CriticFactoryContinuousNet (class in tianshou.highlevel.module.critic) CriticFactoryDefault (class in tianshou.highlevel.module.critic) CriticFactoryDiscreteNet (class in tianshou.highlevel.module.critic) CriticFactoryReuseActor (class in tianshou.highlevel.module.critic) D DataclassPPrintMixin (class in tianshou.utils.print) DataParallelNet (class in tianshou.utils.net.common) DataScope (class in tianshou.utils.logger.base) DDPGAgentFactory (class in tianshou.highlevel.agent) DDPGExperimentBuilder (class in tianshou.highlevel.experiment) DDPGParams (class in tianshou.highlevel.params.policy_params) DDPGPolicy (class in tianshou.policy.modelfree.ddpg) DDPGTrainingStats (class in tianshou.policy.modelfree.ddpg) decode() (VAE method) DEFAULT_HIDDEN_SIZES (ActorFactoryDefault attribute) (CriticEnsembleFactoryDefault attribute) (CriticFactoryDefault attribute) deprecation() (in module tianshou.utils.warning) DETERMINISTIC (ContinuousActorType attribute) deterministic_eval (PGParams attribute) (REDQParams attribute) (SACParams attribute) device (ExperimentConfig attribute) (ParamTransformerData attribute) disc() (GAILPolicy method) disc_loss (GailTrainingStats attribute) discount_factor (DQNParams attribute) (PGParams attribute) DISCRETE (EnvType attribute) DiscreteBCQPolicy (class in tianshou.policy.imitation.discrete_bcq) DiscreteBCQTrainingStats (class in tianshou.policy.imitation.discrete_bcq) DiscreteCQLPolicy (class in tianshou.policy.imitation.discrete_cql) DiscreteCQLTrainingStats (class in tianshou.policy.imitation.discrete_cql) DiscreteCriticOnlyAgentFactory (class in tianshou.highlevel.agent) DiscreteCRRPolicy (class in tianshou.policy.imitation.discrete_crr) DiscreteCRRTrainingStats (class in tianshou.policy.imitation.discrete_crr) DiscreteEnvironments (class in tianshou.highlevel.env) DiscreteSACAgentFactory (class in tianshou.highlevel.agent) DiscreteSACExperimentBuilder (class in tianshou.highlevel.experiment) DiscreteSACParams (class in tianshou.highlevel.params.policy_params) DiscreteSACPolicy (class in tianshou.policy.modelfree.discrete_sac) DiscreteSACTrainingStats (class in tianshou.policy.modelfree.discrete_sac) dist (CollectActionBatchProtocol attribute) (CollectStepBatchProtocol attribute) (DistBatchProtocol attribute) dist_to_atleast_2d() (in module tianshou.data.batch) DistBatchProtocol (class in tianshou.data.types) DistLogProbBatchProtocol (class in tianshou.data.types) DistributionFunctionFactory (class in tianshou.highlevel.params.dist_fn) DistributionFunctionFactoryCategorical (class in tianshou.highlevel.params.dist_fn) DistributionFunctionFactoryIndependentGaussians (class in tianshou.highlevel.params.dist_fn) DQNAgentFactory (class in tianshou.highlevel.agent) DQNExperimentBuilder (class in tianshou.highlevel.experiment) DQNParams (class in tianshou.highlevel.params.policy_params) DQNPolicy (class in tianshou.policy.modelfree.dqn) DQNTrainingStats (class in tianshou.policy.modelfree.dqn) dropnull() (Batch method) (BatchProtocol method) (ReplayBuffer method) dual_clip (PPOParams attribute) DUMMY (VectorEnvType attribute) DummyEnvWorker (class in tianshou.env.worker.dummy) DummyTqdm (class in tianshou.utils.progress_bar) DummyVectorEnv (class in tianshou.env.venvs) E empty() (Batch static method) (BatchProtocol static method) empty_() (Batch method) (BatchProtocol method) ENABLE_VALIDATION (in module tianshou.config) ensemble_size (REDQParams attribute) EnsembleLinear (class in tianshou.utils.net.common) ent_coef (A2CParams attribute) ent_loss (A2CTrainingStats attribute) (PPOTrainingStats attribute) entropy_loss (FQFTrainingStats attribute) env_num (BaseCollector property) env_step (LoggedCollectStats attribute) env_steps_E (RLiableExperimentResult attribute) env_steps_train_E (RLiableExperimentResult attribute) EnvFactory (class in tianshou.highlevel.env) EnvFactoryRegistered (class in tianshou.highlevel.env) Environments (class in tianshou.highlevel.env) EnvMode (class in tianshou.highlevel.env) EnvPoolFactory (class in tianshou.highlevel.env) envs (ParamTransformerData attribute) (World attribute) EnvType (class in tianshou.highlevel.env) EnvValueFactory (class in tianshou.highlevel.params.env_param) EnvWorker (class in tianshou.env.worker.base) episode_mc_return_to_go() (in module tianshou.policy.base) episode_per_collect (SamplingConfig attribute) EpisodeBatchProtocol (class in tianshou.data.collector) EpisodeRolloutHook (class in tianshou.data.collector) EpisodeRolloutHookMCReturn (class in tianshou.data.collector) EpisodeRolloutHookMCReturn.OutputDict (class in tianshou.data.collector) EpisodeRolloutHookMerged (class in tianshou.data.collector) EpisodeRolloutHookProtocol (class in tianshou.data.collector) epoch (EpochStats attribute) epoch_stop_callback (TrainerCallbacks attribute) epoch_test_callback (TrainerCallbacks attribute) epoch_train_callback (TrainerCallbacks attribute) EpochStats (class in tianshou.data.stats) EpochStopCallback (class in tianshou.highlevel.trainer) EpochStopCallbackRewardThreshold (class in tianshou.highlevel.trainer) EpochTestCallback (class in tianshou.highlevel.trainer) EpochTestCallbackDQNSetEps (class in tianshou.highlevel.trainer) EpochTrainCallback (class in tianshou.highlevel.trainer) EpochTrainCallbackDQNEpsLinearDecay (class in tianshou.highlevel.trainer) EpochTrainCallbackDQNSetEps (class in tianshou.highlevel.trainer) eps_clip (PPOParams attribute) estimation_step (DDPGParams attribute) (DQNParams attribute) (REDQParams attribute) (TD3Params attribute) eval_results() (RLiableExperimentResult method) exp_dir (RLiableExperimentResult attribute) Experiment (class in tianshou.highlevel.experiment) experiment_config (ExperimentBuilder property) EXPERIMENT_PICKLE_FILENAME (Experiment attribute) ExperimentBuilder (class in tianshou.highlevel.experiment) ExperimentCollection (class in tianshou.highlevel.experiment) ExperimentConfig (class in tianshou.highlevel.experiment) ExperimentResult (class in tianshou.highlevel.experiment) ExpLauncher (class in tianshou.evaluation.launcher) exploration_noise (ParamsMixinExplorationNoise attribute) exploration_noise() (BasePolicy method) (BranchingDQNPolicy method) (DDPGPolicy method) (DiscreteSACPolicy method) (DQNPolicy method) (ICMPolicy method) (MultiAgentPolicyManager method) F f() (NoisyLinear method) filter_messages() (TraceLog method) finalize() (BaseLogger method) (LazyLogger method) (TensorboardLogger method) (WandbLogger method) FloatEnvValueFactory (class in tianshou.highlevel.params.env_param) FloatEnvValueFactoryMaxActionScaled (class in tianshou.highlevel.params.env_param) format_log_message() (in module tianshou.utils.determinism) forward() (Actor method), [1] (ActorProb method) (BaseActor method) (BasePolicy method) (BCQPolicy method) (BranchingDQNPolicy method) (BranchingNet method) (CosineEmbeddingNetwork method) (Critic method), [1] (CriticBase method) (DataParallelNet method) (DDPGPolicy method) (DiscreteBCQPolicy method) (DiscreteSACPolicy method) (DQNPolicy method) (EnsembleLinear method) (FQFPolicy method) (FractionProposalNetwork method) (FullQuantileFunction method) (ICMPolicy method) (ImitationPolicy method) (ImplicitQuantileNetwork method) (IntrinsicCuriosityModule method) (IQNPolicy method) (MARLRandomPolicy method) (MLP method) (MultiAgentPolicyManager method) (Net method) (NetBase method) (NoisyLinear method) (Perturbation method) (PGPolicy method) (PSRLPolicy method) (RandomActionPolicy method) (RandomActor method) (Recurrent method) (RecurrentActorProb method) (RecurrentCritic method) (REDQPolicy method) (SACPolicy method) (VAE method) FQFBatchProtocol (class in tianshou.data.types) FQFPolicy (class in tianshou.policy.modelfree.fqf) FQFTrainingStats (class in tianshou.policy.modelfree.fqf) fraction_loss (FQFTrainingStats attribute) FractionProposalNetwork (class in tianshou.utils.net.discrete) fractions (FQFBatchProtocol attribute) from_data() (ReplayBuffer class method) from_data_dict() (LoggedCollectStats class method) from_directory() (Experiment class method) from_env() (EnvType static method) (SpaceInfo class method) from_factory() (ContinuousEnvironments static method) (DiscreteEnvironments static method) from_factory_and_type() (Environments static method) from_hdf5() (in module tianshou.data.utils.converter) from_sequence() (SequenceSummaryStats class method) from_sequences() (PPOTrainingStats class method) from_space() (ActionSpaceInfo class method) (ObservationSpaceInfo class method) from_spaces() (SpaceInfo class method) full_episode_mc_return (EpisodeRolloutHookMCReturn.OutputDict attribute) FULL_EPISODE_MC_RETURN_KEY (EpisodeRolloutHookMCReturn attribute) FullQuantileFunction (class in tianshou.utils.net.discrete) G gae_lambda (ParamsMixinGeneralAdvantageEstimation attribute) GAILPolicy (class in tianshou.policy.imitation.gail) GailTrainingStats (class in tianshou.policy.imitation.gail) gamma (DDPGParams attribute) (TD3Params attribute) gather_info() (in module tianshou.trainer.utils) GAUSSIAN (ContinuousActorType attribute) GaussianNoise (class in tianshou.exploration.random) gen_doc() (BaseTrainer static method) get() (Batch method) (BatchProtocol method) (MovAvg method) (ParamTransformer static method) (ReplayBuffer method) (ShArray method) get_action_shape() (ContinuousEnvironments method) (DiscreteEnvironments method) (Environments method) get_action_space() (Environments method) get_actor_future() (ActorFutureProviderProtocol method) get_buffer_indices() (ReplayBuffer method) get_dict_state_decorator() (in module tianshou.utils.net.common) get_env_attr() (BaseVectorEnv method) (DummyEnvWorker method) (EnvWorker method) (RayEnvWorker method) (SubprocEnvWorker method) (VectorEnvWrapper method) get_filename() (PolicyPersistence.Mode method) get_full_log() (TraceLog method) get_keys() (Batch method) (BatchProtocol method) get_len_of_dist() (in module tianshou.data.batch) get_log() (TraceLoggerContext method) get_logger_class() (LoggerFactory method) (LoggerFactoryDefault method) get_loss_stats_dict() (MapTrainingStats method) (TrainingStats method) get_obs_rms() (VectorEnvNormObs method) get_observation_shape() (ContinuousEnvironments method) (DiscreteEnvironments method) (Environments method) get_observation_space() (Environments method) get_on_episode_done_hook() (Collector method) get_on_step_hook() (Collector method) get_output_dim() (Actor method), [1] (ActorProb method) (BaseActor method) (in module tianshou.utils.net.common) (RandomActor method) get_prefix_sum_idx() (SegmentTree method) get_preprocess_net() (Actor method), [1] (ActorProb method) (BaseActor method) (RandomActor method) get_save_best_fn() (PolicyPersistence method) get_save_checkpoint_fn() (PolicyPersistence method) get_seeding_info_as_str() (Experiment method) get_sliced_dist() (in module tianshou.data.batch) get_stddev_from_dist() (in module tianshou.data.collector) get_trainer_fn() (EpochStopCallback method) (EpochTestCallback method) (EpochTrainCallback method) get_type() (ContinuousEnvironments method) (DiscreteEnvironments method) (Environments method) get_weight() (PrioritizedReplayBuffer method) getattr_with_matching_alt_value() (in module tianshou.utils.net.common) GetParamTransformersProtocol (class in tianshou.highlevel.params.policy_params) git_status (TraceDeterminismTest.Result attribute) gradient_step (InfoStats attribute) gradient_steps (PPOTrainingStats attribute) H hasnull() (Batch method) (BatchProtocol method) (ReplayBuffer method) HERReplayBuffer (class in tianshou.data.buffer.her) HERReplayBufferManager (class in tianshou.data.buffer.manager) HERVectorReplayBuffer (class in tianshou.data.buffer.vecbuf) hidden (RecurrentStateBatch attribute) hidden_sizes (IQNParams attribute) hidden_state (CollectActionBatchProtocol attribute) I i_loss (DiscreteBCQTrainingStats attribute) ICMPolicy (class in tianshou.policy.modelbased.icm) ICMTrainingStats (class in tianshou.policy.modelbased.icm) imitation_logits (ImitationBatchProtocol attribute) ImitationBatchProtocol (class in tianshou.data.types) ImitationPolicy (class in tianshou.policy.imitation.base) ImitationTrainingStats (class in tianshou.policy.imitation.base) ImplicitQuantileNetwork (class in tianshou.utils.net.discrete) ImplicitQuantileNetworkFactory (class in tianshou.highlevel.module.special) INFO (DataScope attribute) info (ObsBatchProtocol attribute) info() (ContinuousEnvironments method) (Environments method) info_stat (EpochStats attribute) InfoStats (class in tianshou.data.stats) init_linear_orthogonal() (in module tianshou.highlevel.module.core) init_weight() (PrioritizedReplayBuffer method) IntermediateModule (class in tianshou.highlevel.module.intermediate) IntermediateModuleFactory (class in tianshou.highlevel.module.intermediate) IntermediateModuleFactoryFromActorFactory (class in tianshou.highlevel.module.actor) IntrinsicCuriosityModule (class in tianshou.utils.net.discrete) IQNAgentFactory (class in tianshou.highlevel.agent) IQNExperimentBuilder (class in tianshou.highlevel.experiment) IQNParams (class in tianshou.highlevel.params.policy_params) IQNPolicy (class in tianshou.policy.modelfree.iqn) IQNTrainingStats (class in tianshou.policy.modelfree.iqn) is_auto_alpha (REDQPolicy property) (SACPolicy property) is_continuous() (EnvType method) is_discrete (RandomActor property) is_discrete() (EnvType method) is_double (DQNParams attribute) is_enabled (TraceLogger attribute) is_within_training_step (BasePolicy attribute) isnull() (Batch method) (BatchProtocol method) (ReplayBuffer method) J joblib (RegisteredExpLauncher attribute) JoblibConfig (class in tianshou.evaluation.launcher) JoblibExpLauncher (class in tianshou.evaluation.launcher) K kl (NPGTrainingStats attribute) L launch() (ExpLauncher method) LazyLogger (class in tianshou.utils.logger.base) learn() (A2CPolicy method) (BasePolicy method) (BCQPolicy method) (BranchingDQNPolicy method) (C51Policy method) (CQLPolicy method) (DDPGPolicy method) (DiscreteBCQPolicy method) (DiscreteCQLPolicy method) (DiscreteCRRPolicy method) (DiscreteSACPolicy method) (DQNPolicy method) (FQFPolicy method) (GAILPolicy method) (ICMPolicy method) (ImitationPolicy method) (IQNPolicy method) (MARLRandomPolicy method) (MultiAgentPolicyManager method) (NPGPolicy method) (PGPolicy method) (PPOPolicy method) (PSRLPolicy method) (QRDQNPolicy method) (RainbowPolicy method) (RandomActionPolicy method) (REDQPolicy method) (SACPolicy method) (TD3BCPolicy method) (TD3Policy method) (TRPOPolicy method) lens (CollectStats attribute) lens_stat (CollectStats attribute) (LoggedCollectStats attribute) load() (WandbLogger method) load_and_eval_experiments() (in module tianshou.evaluation.rliable_evaluation_hl) load_from_disk() (RLiableExperimentResult class method) load_hdf5() (ReplayBuffer class method) load_state_dict() (MultipleLRSchedulers method) log (TraceDeterminismTest.Result attribute) log() (TraceLogger class method) log_buffer (TraceLogger attribute) log_file_enabled (ExperimentConfig attribute) LOG_FILENAME (Experiment attribute) log_formatter (TraceLogger attribute) log_info_data() (BaseLogger method) LOG_LEVEL (TraceLogger attribute) log_lines (TraceLog attribute) log_prob (DistLogProbBatchProtocol attribute) log_test_data() (BaseLogger method) log_train_data() (BaseLogger method) log_update_data() (BaseLogger method) LoggedCollectStats (class in tianshou.evaluation.rliable_evaluation_hl) LoggedSummaryData (class in tianshou.evaluation.rliable_evaluation_hl) logger (World attribute) LoggerFactory (class in tianshou.highlevel.logger) LoggerFactoryDefault (class in tianshou.highlevel.logger) logits (ModelOutputBatchProtocol attribute) logp_old (LogpOldProtocol attribute) LogpOldProtocol (class in tianshou.data.types) loss (A2CTrainingStats attribute) (DQNTrainingStats attribute) (ImitationTrainingStats attribute) (PGTrainingStats attribute) (PPOTrainingStats attribute) (RainbowTrainingStats attribute) lr (ParamsMixinLearningRateWithScheduler attribute) lr_scheduler_factory (ParamsMixinLearningRateWithScheduler attribute) LRSchedulerFactory (class in tianshou.highlevel.params.lr_scheduler) LRSchedulerFactoryLinear (class in tianshou.highlevel.params.lr_scheduler) M MalformedBufferError map_action() (BasePolicy method) map_action_inverse() (BasePolicy method) MAPRolloutBatchProtocol (class in tianshou.policy.multiagent.mapolicy) MapTrainingStats (class in tianshou.policy.multiagent.mapolicy) MARLRandomPolicy (class in tianshou.policy.random) MARLRandomTrainingStats (class in tianshou.policy.random) max (LoggedSummaryData attribute) (SequenceSummaryStats attribute) max_action (ActionSpaceInfo attribute) max_backtracks (TRPOParams attribute) max_batchsize (ParamsMixinGeneralAdvantageEstimation attribute) max_grad_norm (A2CParams attribute) max_kl (TRPOParams attribute) MaxActionScaled (class in tianshou.highlevel.params.env_param) MaxActionScaledGaussian (class in tianshou.highlevel.params.noise) mc_return_to_go (EpisodeRolloutHookMCReturn.OutputDict attribute) MC_RETURN_TO_GO_KEY (EpisodeRolloutHookMCReturn attribute) mean (LoggedSummaryData attribute) (SequenceSummaryStats attribute) mean() (MovAvg method) MESSAGE_TAG (TraceLogger attribute) min (LoggedSummaryData attribute) (SequenceSummaryStats attribute) min_action (ActionSpaceInfo attribute) miniblock() (in module tianshou.utils.net.common) MLP (class in tianshou.utils.net.common) ModelOutputBatchProtocol (class in tianshou.data.types) module tianshou.config tianshou.data.batch tianshou.data.buffer.base tianshou.data.buffer.cached tianshou.data.buffer.her tianshou.data.buffer.manager tianshou.data.buffer.prio tianshou.data.buffer.vecbuf tianshou.data.collector tianshou.data.stats tianshou.data.types tianshou.data.utils.converter tianshou.data.utils.segtree tianshou.env.gym_wrappers tianshou.env.pettingzoo_env tianshou.env.utils tianshou.env.venv_wrappers tianshou.env.venvs tianshou.env.worker.base tianshou.env.worker.dummy tianshou.env.worker.ray tianshou.env.worker.subproc tianshou.evaluation.launcher tianshou.evaluation.rliable_evaluation_hl tianshou.exploration.random tianshou.highlevel.agent tianshou.highlevel.config tianshou.highlevel.env tianshou.highlevel.experiment tianshou.highlevel.logger tianshou.highlevel.module.actor tianshou.highlevel.module.core tianshou.highlevel.module.critic tianshou.highlevel.module.intermediate tianshou.highlevel.module.module_opt tianshou.highlevel.module.special tianshou.highlevel.optim tianshou.highlevel.params.alpha tianshou.highlevel.params.dist_fn tianshou.highlevel.params.env_param tianshou.highlevel.params.lr_scheduler tianshou.highlevel.params.noise tianshou.highlevel.params.policy_params tianshou.highlevel.params.policy_wrapper tianshou.highlevel.persistence tianshou.highlevel.trainer tianshou.highlevel.world tianshou.policy.base tianshou.policy.imitation.base tianshou.policy.imitation.bcq tianshou.policy.imitation.cql tianshou.policy.imitation.discrete_bcq tianshou.policy.imitation.discrete_cql tianshou.policy.imitation.discrete_crr tianshou.policy.imitation.gail tianshou.policy.imitation.td3_bc tianshou.policy.modelbased.icm tianshou.policy.modelbased.psrl tianshou.policy.modelfree.a2c tianshou.policy.modelfree.bdq tianshou.policy.modelfree.c51 tianshou.policy.modelfree.ddpg tianshou.policy.modelfree.discrete_sac tianshou.policy.modelfree.dqn tianshou.policy.modelfree.fqf tianshou.policy.modelfree.iqn tianshou.policy.modelfree.npg tianshou.policy.modelfree.pg tianshou.policy.modelfree.ppo tianshou.policy.modelfree.qrdqn tianshou.policy.modelfree.rainbow tianshou.policy.modelfree.redq tianshou.policy.modelfree.sac tianshou.policy.modelfree.td3 tianshou.policy.modelfree.trpo tianshou.policy.multiagent.mapolicy tianshou.policy.random tianshou.trainer.base tianshou.trainer.utils tianshou.utils.conversion tianshou.utils.determinism tianshou.utils.logger.base tianshou.utils.logger.tensorboard tianshou.utils.logger.wandb tianshou.utils.logging tianshou.utils.lr_scheduler tianshou.utils.net.common tianshou.utils.net.continuous tianshou.utils.net.discrete tianshou.utils.optim tianshou.utils.print tianshou.utils.progress_bar tianshou.utils.space_info tianshou.utils.statistics tianshou.utils.torch_utils tianshou.utils.warning module (IntermediateModule attribute) (ModuleOpt attribute) ModuleFactory (class in tianshou.highlevel.module.core) ModuleOpt (class in tianshou.highlevel.module.module_opt) MovAvg (class in tianshou.utils.statistics) MultiAgentPolicyManager (class in tianshou.policy.multiagent.mapolicy) MultiDiscreteToDiscrete (class in tianshou.env.gym_wrappers) MultipleLRSchedulers (class in tianshou.utils.lr_scheduler) N n_collected_episodes (CollectStatsBase attribute) (LoggedCollectStats attribute) n_collected_steps (CollectStatsBase attribute) (LoggedCollectStats attribute) n_jobs (JoblibConfig attribute) Net (class in tianshou.utils.net.common) NetBase (class in tianshou.utils.net.common) next() (ReplayBuffer method) (ReplayBufferManager method) noise_clip (TD3Params attribute) NoiseFactory (class in tianshou.highlevel.params.noise) NoiseFactoryMaxActionScaledGaussian (class in tianshou.highlevel.params.noise) NoisyLinear (class in tianshou.utils.net.discrete) norm() (RunningMeanStd method) NPGAgentFactory (class in tianshou.highlevel.agent) NPGExperimentBuilder (class in tianshou.highlevel.experiment) NPGParams (class in tianshou.highlevel.params.policy_params) NPGPolicy (class in tianshou.policy.modelfree.npg) NPGTrainingStats (class in tianshou.policy.modelfree.npg) num_branches (BranchingDQNPolicy property) num_cosines (IQNParams attribute) num_epochs (SamplingConfig attribute) num_quantiles (IQNParams attribute) num_test_envs (SamplingConfig attribute) num_test_episodes (SamplingConfig attribute) num_train_envs (SamplingConfig attribute) O obs (ObsBatchProtocol attribute) obs_dim (ObservationSpaceInfo property) obs_next (RolloutBatchProtocol attribute) obs_shape (ObservationSpaceInfo attribute) ObsBatchProtocol (class in tianshou.data.types) observation_info (SpaceInfo attribute) ObservationSpaceInfo (class in tianshou.utils.space_info) observe() (PSRLModel method) OfflineTrainer (class in tianshou.trainer.base) OffPolicyAgentFactory (class in tianshou.highlevel.agent) OffpolicyTrainer (class in tianshou.trainer.base) online_sample_size (IQNParams attribute) OnPolicyAgentFactory (class in tianshou.highlevel.agent) OnpolicyTrainer (class in tianshou.trainer.base) optim (ActorCriticOpt attribute) (ModuleOpt attribute) (ParamTransformerData attribute) optim_critic_iters (NPGParams attribute) optim_factory (ParamTransformerData attribute) optim_step() (in module tianshou.utils.optim) OptimizerFactory (class in tianshou.highlevel.optim) OptimizerFactoryAdam (class in tianshou.highlevel.optim) OptimizerFactoryRMSprop (class in tianshou.highlevel.optim) OptimizerFactoryTorch (class in tianshou.highlevel.optim) OptimizerWithLearningRateProtocol (class in tianshou.highlevel.optim) OUNoise (class in tianshou.exploration.random) output_dim (IntermediateModule attribute) P Params (class in tianshou.highlevel.params.policy_params) ParamsMixinActionScaling (class in tianshou.highlevel.params.policy_params) ParamsMixinActorAndCritic (class in tianshou.highlevel.params.policy_params) ParamsMixinActorAndDualCritics (class in tianshou.highlevel.params.policy_params) ParamsMixinExplorationNoise (class in tianshou.highlevel.params.policy_params) ParamsMixinGeneralAdvantageEstimation (class in tianshou.highlevel.params.policy_params) ParamsMixinLearningRateWithScheduler (class in tianshou.highlevel.params.policy_params) ParamTransformer (class in tianshou.highlevel.params.policy_params) ParamTransformerActionScaling (class in tianshou.highlevel.params.policy_params) ParamTransformerActorAndCriticLRScheduler (class in tianshou.highlevel.params.policy_params) ParamTransformerActorDualCriticsLRScheduler (class in tianshou.highlevel.params.policy_params) ParamTransformerAutoAlpha (class in tianshou.highlevel.params.policy_params) ParamTransformerChangeValue (class in tianshou.highlevel.params.policy_params) ParamTransformerData (class in tianshou.highlevel.params.policy_params) ParamTransformerDrop (class in tianshou.highlevel.params.policy_params) ParamTransformerFloatEnvParamFactory (class in tianshou.highlevel.params.policy_params) ParamTransformerLRScheduler (class in tianshou.highlevel.params.policy_params) ParamTransformerMultiLRScheduler (class in tianshou.highlevel.params.policy_params) ParamTransformerNoiseFactory (class in tianshou.highlevel.params.policy_params) persist() (Persistence method) (PersistenceGroup method) (PolicyPersistence method) persist_directory (World attribute) persist_path() (World method) PERSIST_POLICY (PersistEvent attribute) Persistence (class in tianshou.highlevel.persistence) persistence_base_dir (ExperimentConfig attribute) persistence_enabled (ExperimentConfig attribute) PersistenceGroup (class in tianshou.highlevel.persistence) PersistEvent (class in tianshou.highlevel.persistence) Perturbation (class in tianshou.utils.net.continuous) PettingZooEnv (class in tianshou.env.pettingzoo_env) PGAgentFactory (class in tianshou.highlevel.agent) PGExperimentBuilder (class in tianshou.highlevel.experiment) PGParams (class in tianshou.highlevel.params.policy_params) PGPolicy (class in tianshou.policy.modelfree.pg) PGTrainingStats (class in tianshou.policy.modelfree.pg) policies (MultiAgentPolicyManager attribute) POLICY (PolicyPersistence.Mode attribute) policy (World attribute) policy_entry (CollectActionBatchProtocol attribute) policy_noise (TD3Params attribute) policy_persistence_mode (ExperimentConfig attribute) policy_restore_directory (ExperimentConfig attribute) POLICY_STATE_DICT (PolicyPersistence.Mode attribute) policy_update_fn() (BaseTrainer method) (OfflineTrainer method) (OffpolicyTrainer method) (OnpolicyTrainer method) policy_within_training_step() (in module tianshou.utils.torch_utils) PolicyPersistence (class in tianshou.highlevel.persistence) PolicyPersistence.Mode (class in tianshou.highlevel.persistence) PolicyWrapperFactory (class in tianshou.highlevel.params.policy_wrapper) PolicyWrapperFactoryIntrinsicCuriosity (class in tianshou.highlevel.params.policy_wrapper) pop() (Batch method) (BatchProtocol method) post_process_fn() (BasePolicy method) (ICMPolicy method) PPOAgentFactory (class in tianshou.highlevel.agent) PPOExperimentBuilder (class in tianshou.highlevel.experiment) PPOParams (class in tianshou.highlevel.params.policy_params) PPOPolicy (class in tianshou.policy.modelfree.ppo) PPOTrainingStats (class in tianshou.policy.modelfree.ppo) pprint_asdict() (DataclassPPrintMixin method) pprints_asdict() (DataclassPPrintMixin method) pred_dist_std_array (CollectStats attribute) pred_dist_std_array_stat (CollectStats attribute) prepare_dict_for_logging() (BaseLogger method) (LazyLogger method) (TensorboardLogger method) (WandbLogger method) prev() (ReplayBuffer method) (ReplayBufferManager method) print_log() (TraceLog method) PrioBatchProtocol (class in tianshou.data.types) PrioritizedReplayBuffer (class in tianshou.data.buffer.prio) PrioritizedReplayBufferManager (class in tianshou.data.buffer.manager) PrioritizedVectorReplayBuffer (class in tianshou.data.buffer.vecbuf) process_buffer() (BasePolicy method) (CQLPolicy method) process_fn() (A2CPolicy method) (BasePolicy method) (BranchingDQNPolicy method) (CQLPolicy method) (DDPGPolicy method) (DQNPolicy method) (GAILPolicy method) (ICMPolicy method) (MultiAgentPolicyManager method) (NPGPolicy method) (PGPolicy method) (PPOPolicy method) ProtocolCalledException psrl_rew_mean (PSRLTrainingStats attribute) psrl_rew_std (PSRLTrainingStats attribute) PSRLModel (class in tianshou.policy.modelbased.psrl) PSRLPolicy (class in tianshou.policy.modelbased.psrl) PSRLTrainingStats (class in tianshou.policy.modelbased.psrl) Q q_loss (DiscreteBCQTrainingStats attribute) q_value (ImitationBatchProtocol attribute) qr_loss (DiscreteCQLTrainingStats attribute) QRDQNPolicy (class in tianshou.policy.modelfree.qrdqn) QRDQNTrainingStats (class in tianshou.policy.modelfree.qrdqn) quantile_loss (FQFTrainingStats attribute) QuantileRegressionBatchProtocol (class in tianshou.data.types) quantiles_tau (FQFBatchProtocol attribute) R RainbowPolicy (class in tianshou.policy.modelfree.rainbow) RainbowTrainingStats (class in tianshou.policy.modelfree.rainbow) RandomActionAgentFactory (class in tianshou.highlevel.agent) RandomActionExperimentBuilder (class in tianshou.highlevel.experiment) RandomActionPolicy (class in tianshou.policy.base) RandomActor (class in tianshou.utils.net.common) RAY (VectorEnvType attribute) RayEnvWorker (class in tianshou.env.worker.ray) RayVectorEnv (class in tianshou.env.venvs) recompute_advantage (PPOParams attribute) Recurrent (class in tianshou.utils.net.common) RecurrentActorProb (class in tianshou.utils.net.continuous) RecurrentCritic (class in tianshou.utils.net.continuous) RecurrentStateBatch (class in tianshou.data.types) recv() (EnvWorker method) (RayEnvWorker method) (SubprocEnvWorker method) REDQAgentFactory (class in tianshou.highlevel.agent) REDQExperimentBuilder (class in tianshou.highlevel.experiment) REDQParams (class in tianshou.highlevel.params.policy_params) REDQPolicy (class in tianshou.policy.modelfree.redq) REDQTrainingStats (class in tianshou.policy.modelfree.redq) reduce() (SegmentTree method) reduce_log_to_messages() (TraceLog method) refresh_all_sequence_stats() (CollectStats method) refresh_len_stats() (CollectStats method) refresh_return_stats() (CollectStats method) refresh_std_array_stats() (CollectStats method) reg_loss (DiscreteBCQTrainingStats attribute) RegisteredExpLauncher (class in tianshou.evaluation.launcher) render() (BaseVectorEnv method) (DummyEnvWorker method) (EnvWorker method) (PettingZooEnv method) (RayEnvWorker method) (SubprocEnvWorker method) (VectorEnvWrapper method) repeat_per_collect (SamplingConfig attribute) replace_empty_batches_by_none() (Batch method) replace_policy() (MultiAgentPolicyManager method) replay_buffer_ignore_obs_next (SamplingConfig attribute) replay_buffer_save_only_last_obs (SamplingConfig attribute) replay_buffer_stack_num (SamplingConfig attribute) ReplayBuffer (class in tianshou.data.buffer.base) ReplayBufferManager (class in tianshou.data.buffer.manager) reset() (AsyncCollector method) (BaseCollector method) (BaseNoise method) (BaseTrainer method) (BaseVectorEnv method) (DummyEnvWorker method) (EnvWorker method) (GaussianNoise method) (HERReplayBuffer method) (NoisyLinear method) (OUNoise method) (PettingZooEnv method) (RayEnvWorker method) (ReplayBuffer method) (ReplayBufferManager method) (SubprocEnvWorker method) (VectorEnvNormObs method) (VectorEnvWrapper method) reset_buffer() (BaseCollector method) reset_env() (AsyncCollector method) (BaseCollector method) (Collector method) reset_stat() (BaseCollector method) restore() (Persistence method) (PersistenceGroup method) (PolicyPersistence method) restore_data() (BaseLogger method) (LazyLogger method) (TensorboardLogger method) (WandbLogger method) restore_directory (World attribute) restore_logged_data() (BaseLogger static method) (LazyLogger static method) (TensorboardLogger static method) (WandbLogger static method) restore_path() (World method) RESTORE_POLICY (RestoreEvent attribute) RestoreEvent (class in tianshou.highlevel.persistence) returns (BatchWithReturnsProtocol attribute) (CollectStats attribute) returns_stat (CollectStats attribute) (LoggedCollectStats attribute) rew (RolloutBatchProtocol attribute) reward_normalization (DQNParams attribute) (PGParams attribute) rewrite_transitions() (HERReplayBuffer method) RLiableExperimentResult (class in tianshou.evaluation.rliable_evaluation_hl) RolloutBatchProtocol (class in tianshou.data.types) run() (BaseTrainer method) (Experiment method) (ExperimentCollection method) run_on_episode_done() (Collector method) run_on_step_hook() (Collector method) RunningMeanStd (class in tianshou.utils.statistics) S SACAgentFactory (class in tianshou.highlevel.agent) SACExperimentBuilder (class in tianshou.highlevel.experiment) SACParams (class in tianshou.highlevel.params.policy_params) SACPolicy (class in tianshou.policy.modelfree.sac) SACTrainingStats (class in tianshou.policy.modelfree.sac) sample() (NoisyLinear method) (PrioritizedReplayBuffer method) (ReplayBuffer method) sample_indices() (HERReplayBuffer method) (PrioritizedReplayBuffer method) (ReplayBuffer method) (ReplayBufferManager method) sample_reward() (PSRLModel method) sample_size (IQNParams attribute) sample_trans_prob() (PSRLModel method) sampling_config (ExperimentBuilder property) SamplingConfig (class in tianshou.highlevel.config) save() (Experiment method) (ShArray method) save_data() (BaseLogger method) (LazyLogger method) (TensorboardLogger method) (WandbLogger method) save_hdf5() (HERReplayBuffer method) (HERReplayBufferManager method) (ReplayBuffer method) save_log() (TraceLog method) seed (ExperimentConfig attribute) seed() (BaseVectorEnv method) (DummyEnvWorker method) (EnvWorker method) (PettingZooEnv method) (RayEnvWorker method) (SubprocEnvWorker method) (VectorEnvWrapper method) SegmentTree (class in tianshou.data.utils.segtree) send() (DummyEnvWorker method) (EnvWorker method) (RayEnvWorker method) (SubprocEnvWorker method) SequenceSummaryStats (class in tianshou.data.stats) sequential (RegisteredExpLauncher attribute) SequentialExpLauncher (class in tianshou.evaluation.launcher) set_agent_id() (BasePolicy method) set_array_at_key() (Batch method) (BatchProtocol method) (ReplayBuffer method) set_batch() (HERReplayBuffer method) (HERReplayBufferManager method) (ReplayBuffer method) (ReplayBufferManager method) set_beta() (PrioritizedReplayBuffer method) (PrioritizedVectorReplayBuffer method) set_collect_time() (CollectStats method) set_env_attr() (BaseVectorEnv method) (DummyEnvWorker method) (EnvWorker method) (RayEnvWorker method) (SubprocEnvWorker method) (VectorEnvWrapper method) set_eps() (DQNPolicy method) (ICMPolicy method) set_exp_noise() (DDPGPolicy method) set_numerical_fields_to_precision() (in module tianshou.utils.logging) set_obs_rms() (VectorEnvNormObs method) set_on_episode_done_hook() (Collector method) set_on_step_hook() (Collector method) set_persistence() (Environments method) set_policy_wrapper_factory() (AgentFactory method) set_postfix() (DummyTqdm method) set_trainer_callbacks() (AgentFactory method) shape (Batch property) (BatchProtocol property) ShArray (class in tianshou.env.worker.subproc) ShmemVectorEnv (class in tianshou.env.venvs) should_stop() (EpochStopCallback method) (EpochStopCallbackRewardThreshold method) smoothed_loss (TrainingStats attribute) soft_update() (BasePolicy method) solve_policy() (PSRLModel method) space_info (RandomActor property) SpaceInfo (class in tianshou.utils.space_info) split() (Batch method) (BatchProtocol method) stack() (Batch static method) (BatchProtocol static method) stack_() (Batch method) (BatchProtocol method) start_timesteps (SamplingConfig attribute) start_timesteps_random (SamplingConfig attribute) state (ActStateBatchProtocol attribute) (ImitationBatchProtocol attribute) state_dict() (MultipleLRSchedulers method) std (LoggedSummaryData attribute) (SequenceSummaryStats attribute) std() (MovAvg method) step() (BaseVectorEnv method) (EnvWorker method) (MultipleLRSchedulers method) (PettingZooEnv method) (TruncatedAsTerminated method) (VectorEnvNormObs method) (VectorEnvWrapper method) step_per_collect (SamplingConfig attribute) step_per_epoch (SamplingConfig attribute) step_size (TRPOTrainingStats attribute) StepHook (class in tianshou.data.collector) StepHookAddActionDistribution (class in tianshou.data.collector) StepHookProtocol (class in tianshou.data.collector) subbuffer_edges (ReplayBuffer property) (ReplayBufferManager property) SUBPROC (VectorEnvType attribute) SUBPROC_SHARED_MEM_AUTO (VectorEnvType attribute) SUBPROC_SHARED_MEM_DEFAULT_CONTEXT (VectorEnvType attribute) SUBPROC_SHARED_MEM_FORK_CONTEXT (VectorEnvType attribute) SubprocEnvWorker (class in tianshou.env.worker.subproc) SubprocVectorEnv (class in tianshou.env.venvs) subset_size (REDQParams attribute) sync_weight() (BCQPolicy method) (CQLPolicy method) (DDPGPolicy method) (DiscreteCRRPolicy method) (DQNPolicy method) (REDQPolicy method) (SACPolicy method) (TD3Policy method) T target_mode (REDQParams attribute) target_sample_size (IQNParams attribute) target_update_freq (DQNParams attribute) tau (DDPGParams attribute) (TD3Params attribute) taus (QuantileRegressionBatchProtocol attribute) TD3AgentFactory (class in tianshou.highlevel.agent) TD3BCPolicy (class in tianshou.policy.imitation.td3_bc) TD3BCTrainingStats (class in tianshou.policy.imitation.td3_bc) TD3ExperimentBuilder (class in tianshou.highlevel.experiment) TD3Params (class in tianshou.highlevel.params.policy_params) TD3Policy (class in tianshou.policy.modelfree.td3) TD3TrainingStats (class in tianshou.policy.modelfree.td3) TensorboardLogger (class in tianshou.utils.logger.tensorboard) terminated (RolloutBatchProtocol attribute) TEST (DataScope attribute) (EnvMode attribute) test_collect_stat (EpochStats attribute) test_collector (World attribute) test_episode (InfoStats attribute) test_episode() (in module tianshou.trainer.utils) test_episode_returns_RE (RLiableExperimentResult attribute) test_step (InfoStats attribute) test_step() (BaseTrainer method) test_time (TimingStats attribute) tianshou.config module tianshou.data.batch module tianshou.data.buffer.base module tianshou.data.buffer.cached module tianshou.data.buffer.her module tianshou.data.buffer.manager module tianshou.data.buffer.prio module tianshou.data.buffer.vecbuf module tianshou.data.collector module tianshou.data.stats module tianshou.data.types module tianshou.data.utils.converter module tianshou.data.utils.segtree module tianshou.env.gym_wrappers module tianshou.env.pettingzoo_env module tianshou.env.utils module tianshou.env.venv_wrappers module tianshou.env.venvs module tianshou.env.worker.base module tianshou.env.worker.dummy module tianshou.env.worker.ray module tianshou.env.worker.subproc module tianshou.evaluation.launcher module tianshou.evaluation.rliable_evaluation_hl module tianshou.exploration.random module tianshou.highlevel.agent module tianshou.highlevel.config module tianshou.highlevel.env module tianshou.highlevel.experiment module tianshou.highlevel.logger module tianshou.highlevel.module.actor module tianshou.highlevel.module.core module tianshou.highlevel.module.critic module tianshou.highlevel.module.intermediate module tianshou.highlevel.module.module_opt module tianshou.highlevel.module.special module tianshou.highlevel.optim module tianshou.highlevel.params.alpha module tianshou.highlevel.params.dist_fn module tianshou.highlevel.params.env_param module tianshou.highlevel.params.lr_scheduler module tianshou.highlevel.params.noise module tianshou.highlevel.params.policy_params module tianshou.highlevel.params.policy_wrapper module tianshou.highlevel.persistence module tianshou.highlevel.trainer module tianshou.highlevel.world module tianshou.policy.base module tianshou.policy.imitation.base module tianshou.policy.imitation.bcq module tianshou.policy.imitation.cql module tianshou.policy.imitation.discrete_bcq module tianshou.policy.imitation.discrete_cql module tianshou.policy.imitation.discrete_crr module tianshou.policy.imitation.gail module tianshou.policy.imitation.td3_bc module tianshou.policy.modelbased.icm module tianshou.policy.modelbased.psrl module tianshou.policy.modelfree.a2c module tianshou.policy.modelfree.bdq module tianshou.policy.modelfree.c51 module tianshou.policy.modelfree.ddpg module tianshou.policy.modelfree.discrete_sac module tianshou.policy.modelfree.dqn module tianshou.policy.modelfree.fqf module tianshou.policy.modelfree.iqn module tianshou.policy.modelfree.npg module tianshou.policy.modelfree.pg module tianshou.policy.modelfree.ppo module tianshou.policy.modelfree.qrdqn module tianshou.policy.modelfree.rainbow module tianshou.policy.modelfree.redq module tianshou.policy.modelfree.sac module tianshou.policy.modelfree.td3 module tianshou.policy.modelfree.trpo module tianshou.policy.multiagent.mapolicy module tianshou.policy.random module tianshou.trainer.base module tianshou.trainer.utils module tianshou.utils.conversion module tianshou.utils.determinism module tianshou.utils.logger.base module tianshou.utils.logger.tensorboard module tianshou.utils.logger.wandb module tianshou.utils.logging module tianshou.utils.lr_scheduler module tianshou.utils.net.common module tianshou.utils.net.continuous module tianshou.utils.net.discrete module tianshou.utils.optim module tianshou.utils.print module tianshou.utils.progress_bar module tianshou.utils.space_info module tianshou.utils.statistics module tianshou.utils.torch_utils module tianshou.utils.warning module timing (InfoStats attribute) TimingStats (class in tianshou.data.stats) to_at_least_2d() (Batch method) (BatchProtocol method) to_dict() (Batch method) (BatchProtocol method) to_hdf5() (in module tianshou.data.utils.converter) to_list_of_dicts() (Batch method) (BatchProtocol method) to_numpy() (Batch method) (BatchProtocol method) (in module tianshou.data.utils.converter) to_numpy_() (Batch method) (BatchProtocol method) to_optional_float() (in module tianshou.utils.conversion) to_torch() (Batch method) (BatchProtocol method) (in module tianshou.data.utils.converter) to_torch_() (Batch method) (BatchProtocol method) to_torch_as() (in module tianshou.data.utils.converter) torch_param_hash() (in module tianshou.utils.determinism) torch_train_mode() (in module tianshou.utils.torch_utils) total_time (TimingStats attribute) TraceDeterminismTest (class in tianshou.utils.determinism) TraceDeterminismTest.Result (class in tianshou.utils.determinism) TraceLog (class in tianshou.utils.determinism) TraceLogger (class in tianshou.utils.determinism) TraceLoggerContext (class in tianshou.utils.determinism) TRAIN (DataScope attribute) (EnvMode attribute) train (ExperimentConfig attribute) train() (BCQPolicy method) (CQLPolicy method) (DDPGPolicy method) (DiscreteBCQPolicy method) (DQNPolicy method) (ICMPolicy method) (MultiAgentPolicyManager method) (SACPolicy method) (TD3Policy method) train_collect_stat (EpochStats attribute) train_collector (World attribute) train_episode (InfoStats attribute) train_episode_returns_RE (RLiableExperimentResult attribute) train_step (InfoStats attribute) train_time (TimingStats attribute) (TrainingStats attribute) train_time_collect (TimingStats attribute) train_time_update (TimingStats attribute) trainer (World attribute) trainer_result (ExperimentResult attribute) TrainerCallbacks (class in tianshou.highlevel.trainer) training_stat (EpochStats attribute) training_step() (BaseTrainer method) TrainingContext (class in tianshou.highlevel.trainer) TrainingStats (class in tianshou.policy.base) TrainingStatsWrapper (class in tianshou.policy.base) transform() (ParamTransformer method) (ParamTransformerActorAndCriticLRScheduler method) (ParamTransformerActorDualCriticsLRScheduler method) (ParamTransformerAutoAlpha method) (ParamTransformerChangeValue method) (ParamTransformerDrop method) (ParamTransformerLRScheduler method) (ParamTransformerMultiLRScheduler method) TRPOAgentFactory (class in tianshou.highlevel.agent) TRPOExperimentBuilder (class in tianshou.highlevel.experiment) TRPOParams (class in tianshou.highlevel.params.policy_params) TRPOPolicy (class in tianshou.policy.modelfree.trpo) TRPOTrainingStats (class in tianshou.policy.modelfree.trpo) truncated (RolloutBatchProtocol attribute) TruncatedAsTerminated (class in tianshou.env.gym_wrappers) U unfinished_index() (ReplayBuffer method) (ReplayBufferManager method) UNSUPPORTED (ContinuousActorType attribute) UPDATE (DataScope attribute) update() (BasePolicy method) (Batch method) (BatchProtocol method) (DummyTqdm method) (HERReplayBuffer method) (HERReplayBufferManager method) (PrioritizedReplayBuffer method) (ReplayBuffer method) (ReplayBufferManager method) (RunningMeanStd method) update_actor_freq (TD3Params attribute) update_at_episode_done() (CollectStats method) update_at_step_batch() (CollectStats method) update_per_step (SamplingConfig attribute) update_speed (TimingStats attribute) update_weight() (PrioritizedReplayBuffer method) V v_s (BatchWithAdvantagesProtocol attribute) VAE (class in tianshou.utils.net.continuous) vae_loss (BCQTrainingStats attribute) value_clip (PPOParams attribute) value_iteration() (PSRLModel static method) value_mask() (BasePolicy static method) VectorEnvNormObs (class in tianshou.env.venv_wrappers) VectorEnvType (class in tianshou.highlevel.env) VectorEnvWrapper (class in tianshou.env.venv_wrappers) VectorReplayBuffer (class in tianshou.data.buffer.vecbuf) verbose (JoblibConfig attribute) (TraceLogger attribute) vf_coef (A2CParams attribute) vf_loss (A2CTrainingStats attribute) (NPGTrainingStats attribute) (PPOTrainingStats attribute) W wait() (DummyEnvWorker static method) (EnvWorker static method) (RayEnvWorker static method) (SubprocEnvWorker static method) WandbLogger (class in tianshou.utils.logger.wandb) WATCH (EnvMode attribute) watch (ExperimentConfig attribute) watch_num_episodes (ExperimentConfig attribute) watch_render (ExperimentConfig attribute) weight (PrioBatchProtocol attribute) with_a2c_params() (A2CExperimentBuilder method) with_autogenerated_stats() (CollectStats class method) with_ddpg_params() (DDPGExperimentBuilder method) with_dqn_params() (DQNExperimentBuilder method) with_epoch_stop_callback() (ExperimentBuilder method) with_epoch_test_callback() (ExperimentBuilder method) with_epoch_train_callback() (ExperimentBuilder method) with_iqn_params() (IQNExperimentBuilder method) with_logger_factory() (ExperimentBuilder method) with_model_factory() (DQNExperimentBuilder method) with_model_factory_default() (DQNExperimentBuilder method) with_name() (ExperimentBuilder method) with_npg_params() (NPGExperimentBuilder method) with_optim_factory() (ExperimentBuilder method) with_optim_factory_default() (ExperimentBuilder method) with_pg_params() (PGExperimentBuilder method) with_policy_wrapper_factory() (ExperimentBuilder method) with_ppo_params() (PPOExperimentBuilder method) with_preprocess_network_factory() (IQNExperimentBuilder method) with_redq_params() (REDQExperimentBuilder method) with_sac_params() (DiscreteSACExperimentBuilder method) (SACExperimentBuilder method) with_td3_params() (TD3ExperimentBuilder method) with_trpo_params() (TRPOExperimentBuilder method) World (class in tianshou.highlevel.world) world (ExperimentResult attribute) wrapped_stats (TrainingStatsWrapper property) write() (BaseLogger method) (LazyLogger method) (TensorboardLogger method) (WandbLogger method)