Spaces:

skar0
/

cartpole-demo

Runtime error

App Files Files Community

skar0 commited on Mar 4, 2023

Commit

d6a6bab

1 Parent(s): 820bb68

Changed git commands over to Hugging Face

Browse files

Files changed (1) hide show

cartpole.py +37 -25

cartpole.py CHANGED Viewed

@@ -11,7 +11,7 @@
 #     name: python3
 # ---
-# + id="QAY_RQOLcRtA" executionInfo={"status": "ok", "timestamp": 1677942285188, "user_tz": 0, "elapsed": 1942, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}} colab={"base_uri": "https://localhost:8080/"} outputId="ee4de327-947e-4f4e-9d34-514460da288a"
 MAIN = __name__ == "__main__"
 if MAIN:
     print('Mounting drive...')
@@ -19,23 +19,32 @@ if MAIN:
     drive.mount('/content/drive')
 # %cd /content/drive/MyDrive/Colab Notebooks/cartpole-demo
-# + colab={"base_uri": "https://localhost:8080/"} id="GgSNZRJh4EjV" executionInfo={"status": "ok", "timestamp": 1677942324397, "user_tz": 0, "elapsed": 39212, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}} outputId="8fd1eecc-12d1-4bae-cd15-dd541f1d84c7"
 # !pip install einops
 # !pip install wandb
 # !pip install jupytext
 # !pip install pygame
 # !pip install torchtyping
 # !pip install gradio
-# + colab={"base_uri": "https://localhost:8080/"} id="1g58HZUb8Ltl" executionInfo={"status": "ok", "timestamp": 1677942492332, "user_tz": 0, "elapsed": 2440, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}} outputId="d2f2ab57-c2c0-49aa-fdef-323556a2e4b6"
 # !git config --global user.email "[email protected]"
-# !git config --global user.name "ojh31"
-# !cat pat.txt | xargs git remote set-url origin
 # !jupytext --to py cartpole.ipynb
 # !git fetch
 # !git status
-# + id="vEczQ48wC40O" executionInfo={"status": "ok", "timestamp": 1677942330521, "user_tz": 0, "elapsed": 4062, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 import os
 import glob
 import sys
@@ -66,7 +75,7 @@ from typeguard import typechecked
 # + id="K7T8bs1Y76ZK" executionInfo={"status": "ok", "timestamp": 1677942330521, "user_tz": 0, "elapsed": 8, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}} colab={"base_uri": "https://localhost:8080/"} outputId="f59ffef0-7156-4f27-d992-a392d59a1c73"
 # %env "WANDB_NOTEBOOK_NAME" "cartpole.py"
-# + id="Q5E93-BGRjuy" executionInfo={"status": "ok", "timestamp": 1677942330522, "user_tz": 0, "elapsed": 8, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 def make_env(
     env_id: str, seed: int, idx: int, capture_video: bool, run_name: str
 ):
@@ -93,7 +102,7 @@ def make_env(
     return thunk
-# + id="Kf152ROwHjM_" executionInfo={"status": "ok", "timestamp": 1677942330522, "user_tz": 0, "elapsed": 7, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 def test_minibatch_indexes(minibatch_indexes):
     for n in range(5):
         frac, minibatch_size = np.random.randint(1, 8, size=(2,))
@@ -105,7 +114,7 @@ def test_minibatch_indexes(minibatch_indexes):
         np.testing.assert_equal(np.sort(np.stack(indices).flatten()), np.arange(batch_size))
-# + id="mhvduVeOHkln" executionInfo={"status": "ok", "timestamp": 1677942330522, "user_tz": 0, "elapsed": 7, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 def test_calc_entropy_bonus(calc_entropy_bonus):
     probs = Categorical(logits=t.randn((3, 4)))
     ent_coef = 0.5
@@ -114,7 +123,7 @@ def test_calc_entropy_bonus(calc_entropy_bonus):
     t.testing.assert_close(expected, actual)
-# + id="Aya60GeCGA5X" executionInfo={"status": "ok", "timestamp": 1677942330875, "user_tz": 0, "elapsed": 360, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
     t.nn.init.orthogonal_(layer.weight, std)
     t.nn.init.constant_(layer.bias, bias_const)
@@ -146,7 +155,7 @@ class Agent(nn.Module):
-# + id="6PwPZHlLGDYu" executionInfo={"status": "ok", "timestamp": 1677942330875, "user_tz": 0, "elapsed": 4, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 # %%
 @t.inference_mode()
 def compute_advantages(
@@ -190,7 +199,7 @@ def compute_advantages(
-# + id="uYSSMnF-GPvm" executionInfo={"status": "ok", "timestamp": 1677942330875, "user_tz": 0, "elapsed": 3, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 # %%
 @dataclass
 class Minibatch:
@@ -252,7 +261,7 @@ def make_minibatches(
-# + id="K7wXDJ9MGOWu" executionInfo={"status": "ok", "timestamp": 1677942330876, "user_tz": 0, "elapsed": 4, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 # %%
 def calc_policy_loss(
     probs: Categorical, mb_action: t.Tensor, mb_advantages: t.Tensor,
@@ -277,7 +286,7 @@ def calc_policy_loss(
-# + id="CmyxU6JWGMsG" executionInfo={"status": "ok", "timestamp": 1677942330876, "user_tz": 0, "elapsed": 4, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 # %%
 def calc_value_function_loss(
     critic: nn.Sequential, mb_obs: t.Tensor, mb_returns: t.Tensor, v_coef: float
@@ -294,7 +303,7 @@ def calc_value_function_loss(
-# + id="npyWs6xjGLkP" executionInfo={"status": "ok", "timestamp": 1677942331469, "user_tz": 0, "elapsed": 597, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 # %%
 def calc_entropy_loss(probs: Categorical, ent_coef: float):
     '''Return the entropy loss term.
@@ -310,7 +319,7 @@ if MAIN:
     test_calc_entropy_bonus(calc_entropy_loss)
-# + id="nqJeg1kZGKSG" executionInfo={"status": "ok", "timestamp": 1677942331470, "user_tz": 0, "elapsed": 5, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 # %%
 class PPOScheduler:
     def __init__(self, optimizer: optim.Adam, initial_lr: float, end_lr: float, num_updates: int):
@@ -345,7 +354,7 @@ def make_optimizer(
-# + id="mgZ7-wsRCxJW" executionInfo={"status": "ok", "timestamp": 1677942331470, "user_tz": 0, "elapsed": 5, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 @dataclass
 class PPOArgs:
     exp_name: str = 'cartpole.py'
@@ -373,7 +382,7 @@ class PPOArgs:
     minibatch_size: int = 128
-# + id="xeIu-J3ZwGyq" executionInfo={"status": "ok", "timestamp": 1677942356492, "user_tz": 0, "elapsed": 218, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 def wandb_init(name: str, args: PPOArgs):
     wandb.init(
         project=args.wandb_project_name,
@@ -387,14 +396,14 @@ def wandb_init(name: str, args: PPOArgs):
     )
-# + id="gMYWqhsryYHy" executionInfo={"status": "ok", "timestamp": 1677942331470, "user_tz": 0, "elapsed": 4, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 def set_seed(seed: int):
     random.seed(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
-# + id="T9j_L0Wpyrgz" executionInfo={"status": "ok", "timestamp": 1677942331471, "user_tz": 0, "elapsed": 5, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 @typechecked
 def rollout_phase(
     next_obs: t.Tensor, next_done: t.Tensor,
@@ -472,14 +481,14 @@ def rollout_phase(
     )
-# + id="xdDhABIk5jyb" executionInfo={"status": "ok", "timestamp": 1677942331471, "user_tz": 0, "elapsed": 5, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 def reset_env(envs, device):
     next_obs = torch.Tensor(envs.reset()).to(device)
     next_done = torch.zeros(envs.num_envs).to(device)
     return next_obs, next_done
-# + id="5CoMpUVU7rFT" executionInfo={"status": "ok", "timestamp": 1677942331471, "user_tz": 0, "elapsed": 5, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 def get_action_shape(envs: gym.vector.SyncVectorEnv):
     action_shape = envs.single_action_space.shape
     assert action_shape is not None
@@ -489,7 +498,7 @@ def get_action_shape(envs: gym.vector.SyncVectorEnv):
     return action_shape
-# + id="FHmn5kSUGFFu" executionInfo={"status": "ok", "timestamp": 1677942366007, "user_tz": 0, "elapsed": 251, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
 # %%
 def train_ppo(args: PPOArgs):
     t0 = int(time.time())
@@ -628,8 +637,11 @@ if MAIN:
     args = PPOArgs()
     train_ppo(args)
-# + colab={"base_uri": "https://localhost:8080/"} id="xJW6KL7QIj4s" outputId="7c529849-6d46-4a6a-def5-e1c0ef652c64"
 # !python demo.py
-# + id="P7ZfUlAqImIr" executionInfo={"status": "aborted", "timestamp": 1677942332655, "user_tz": 0, "elapsed": 4, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}

 #     name: python3
 # ---
+# + id="QAY_RQOLcRtA" executionInfo={"status": "ok", "timestamp": 1677945244865, "user_tz": 0, "elapsed": 19712, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}} colab={"base_uri": "https://localhost:8080/"} outputId="be179435-1667-40af-8a80-7bc63a472715"
 MAIN = __name__ == "__main__"
 if MAIN:
     print('Mounting drive...')
     drive.mount('/content/drive')
 # %cd /content/drive/MyDrive/Colab Notebooks/cartpole-demo
+# + colab={"base_uri": "https://localhost:8080/"} id="GgSNZRJh4EjV" executionInfo={"status": "ok", "timestamp": 1677945316689, "user_tz": 0, "elapsed": 57846, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}} outputId="6aeb7bf3-e186-449d-cdc4-c66f778244b2"
 # !pip install einops
 # !pip install wandb
 # !pip install jupytext
 # !pip install pygame
 # !pip install torchtyping
 # !pip install gradio
+# !pip install huggingface_hub
+# + colab={"base_uri": "https://localhost:8080/"} id="1g58HZUb8Ltl" executionInfo={"status": "ok", "timestamp": 1677945458077, "user_tz": 0, "elapsed": 16862, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}} outputId="62ffc9cd-ff0b-4473-c17a-4593a14526cf"
+# !git config --global credential.helper store
+# !git config --global user.name "skar0"
 # !git config --global user.email "[email protected]"
+# !huggingface-cli login
 # !jupytext --to py cartpole.ipynb
 # !git fetch
+# # !chmod +x .git/hooks/pre-push
 # !git status
+# + id="dYeFdxVIWOqc" executionInfo={"status": "ok", "timestamp": 1677945546175, "user_tz": 0, "elapsed": 318, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}}
+# + colab={"base_uri": "https://localhost:8080/"} id="5xFqBnKzVN60" executionInfo={"status": "ok", "timestamp": 1677945556589, "user_tz": 0, "elapsed": 7558, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}} outputId="535e6c5e-17f6-4342-8a9d-ff54f4c82187"
+# !git push
+# + id="vEczQ48wC40O"
 import os
 import glob
 import sys
 # + id="K7T8bs1Y76ZK" executionInfo={"status": "ok", "timestamp": 1677942330521, "user_tz": 0, "elapsed": 8, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}} colab={"base_uri": "https://localhost:8080/"} outputId="f59ffef0-7156-4f27-d992-a392d59a1c73"
 # %env "WANDB_NOTEBOOK_NAME" "cartpole.py"
+# + id="Q5E93-BGRjuy"
 def make_env(
     env_id: str, seed: int, idx: int, capture_video: bool, run_name: str
 ):
     return thunk
+# + id="Kf152ROwHjM_"
 def test_minibatch_indexes(minibatch_indexes):
     for n in range(5):
         frac, minibatch_size = np.random.randint(1, 8, size=(2,))
         np.testing.assert_equal(np.sort(np.stack(indices).flatten()), np.arange(batch_size))
+# + id="mhvduVeOHkln"
 def test_calc_entropy_bonus(calc_entropy_bonus):
     probs = Categorical(logits=t.randn((3, 4)))
     ent_coef = 0.5
     t.testing.assert_close(expected, actual)
+# + id="Aya60GeCGA5X"
 def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
     t.nn.init.orthogonal_(layer.weight, std)
     t.nn.init.constant_(layer.bias, bias_const)
+# + id="6PwPZHlLGDYu"
 # %%
 @t.inference_mode()
 def compute_advantages(
+# + id="uYSSMnF-GPvm"
 # %%
 @dataclass
 class Minibatch:
+# + id="K7wXDJ9MGOWu"
 # %%
 def calc_policy_loss(
     probs: Categorical, mb_action: t.Tensor, mb_advantages: t.Tensor,
+# + id="CmyxU6JWGMsG"
 # %%
 def calc_value_function_loss(
     critic: nn.Sequential, mb_obs: t.Tensor, mb_returns: t.Tensor, v_coef: float
+# + id="npyWs6xjGLkP"
 # %%
 def calc_entropy_loss(probs: Categorical, ent_coef: float):
     '''Return the entropy loss term.
     test_calc_entropy_bonus(calc_entropy_loss)
+# + id="nqJeg1kZGKSG"
 # %%
 class PPOScheduler:
     def __init__(self, optimizer: optim.Adam, initial_lr: float, end_lr: float, num_updates: int):
+# + id="mgZ7-wsRCxJW"
 @dataclass
 class PPOArgs:
     exp_name: str = 'cartpole.py'
     minibatch_size: int = 128
+# + id="xeIu-J3ZwGyq"
 def wandb_init(name: str, args: PPOArgs):
     wandb.init(
         project=args.wandb_project_name,
     )
+# + id="gMYWqhsryYHy"
 def set_seed(seed: int):
     random.seed(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
+# + id="T9j_L0Wpyrgz"
 @typechecked
 def rollout_phase(
     next_obs: t.Tensor, next_done: t.Tensor,
     )
+# + id="xdDhABIk5jyb"
 def reset_env(envs, device):
     next_obs = torch.Tensor(envs.reset()).to(device)
     next_done = torch.zeros(envs.num_envs).to(device)
     return next_obs, next_done
+# + id="5CoMpUVU7rFT"
 def get_action_shape(envs: gym.vector.SyncVectorEnv):
     action_shape = envs.single_action_space.shape
     assert action_shape is not None
     return action_shape
+# + id="FHmn5kSUGFFu"
 # %%
 def train_ppo(args: PPOArgs):
     t0 = int(time.time())
     args = PPOArgs()
     train_ppo(args)
+# + colab={"base_uri": "https://localhost:8080/"} id="xJW6KL7QIj4s" executionInfo={"status": "ok", "timestamp": 1677942639015, "user_tz": 0, "elapsed": 105286, "user": {"displayName": "Oskar Hollinsworth", "userId": "00307706571197304608"}} outputId="7c529849-6d46-4a6a-def5-e1c0ef652c64"
 # !python demo.py
+# + id="P7ZfUlAqImIr"
+# !pip freeze > requirements.txt
+# + id="x_bhyL3GLnhr"