-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlegged_robot_dtc.py
588 lines (460 loc) · 30.6 KB
/
legged_robot_dtc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
# import torch.nn.functional
from legged_gym import LEGGED_GYM_ROOT_DIR, envs
import time
from warnings import WarningMessage
import numpy as np
import os
from isaacgym.torch_utils import *
from isaacgym import gymtorch, gymapi, gymutil
import torch.nn.functional
import torch
from torch import Tensor
from typing import Tuple, Dict
from legged_gym import LEGGED_GYM_ROOT_DIR
from legged_gym.envs.base.base_task import BaseTask
from legged_gym.utils.terrain import Terrain
from legged_gym.utils.math import quat_apply_yaw, wrap_to_pi, torch_rand_sqrt_float
from legged_gym.utils.helpers import class_to_dict
from .legged_robot_config import LeggedRobotCfg
from scipy.spatial.transform import Rotation as R
import torchvision.transforms as T
import time
from .legged_robot_config import LeggedRobotCfg
from legged_gym.envs import LeggedRobot
class LeggedRobotDTC(LeggedRobot):
cfg : LeggedRobotCfg
def __init__(self, cfg, sim_params, physics_engine, sim_device, headless):
super().__init__(cfg, sim_params, physics_engine, sim_device, headless)
@staticmethod
def rotate_positions(base_pos, thetas):
def batch_rotation_matrix_z(thetas):
cos_thetas = torch.cos(thetas)
sin_thetas = torch.sin(thetas)
# Create a batch of rotation matrices
zeros = torch.zeros_like(thetas)
ones = torch.ones_like(thetas)
Rz = torch.stack([
cos_thetas, -sin_thetas, zeros,
sin_thetas, cos_thetas, zeros,
zeros, zeros, ones
], dim=1).reshape(-1, 3, 3)
return Rz
Rz = batch_rotation_matrix_z(thetas)
rotated_pos = torch.bmm(Rz, base_pos.transpose(1, 2)).transpose(1, 2)
return rotated_pos
def post_physics_step(self):
""" check terminations, compute observations and rewards
calls self._post_physics_step_callback() for common computations
calls self._draw_debug_vis() if needed
"""
self.gym.refresh_actor_root_state_tensor(self.sim)
self.gym.refresh_net_contact_force_tensor(self.sim)
self.gym.refresh_rigid_body_state_tensor(self.sim)
self.episode_length_buf += 1
self.common_step_counter += 1
# prepare quantities
self.last_base_ang_vel = self.base_ang_vel.clone()
self.base_quat[:] = self.root_states[:, 3:7]
self.base_lin_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10]) #robot velocity
self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13])
# update lin / ang vel / cmd buffer
self.lin_vel_buffer[:-1] = self.lin_vel_buffer[1:].clone()
self.lin_vel_buffer[-1] = self.base_lin_vel[:, :2]
self.ang_vel_buffer[:-1] = self.ang_vel_buffer[1:].clone()
self.ang_vel_buffer[-1] = self.base_ang_vel[:, 2].unsqueeze(1)
self.cmd_buffer[:-1] = self.cmd_buffer[1:].clone()
self.cmd_buffer[-1] = self.commands
#to estimate the pos
self.base_pos[:] = self.root_states[:, :3]
#test
self.projected_gravity[:] = quat_rotate_inverse(self.base_quat, self.gravity_vec)
self.foot_velocities = self.rigid_body_state.view(self.num_envs, self.num_bodies, 13
)[:, self.feet_indices, 7:10]
self.foot_positions = self.rigid_body_state.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices,
0:3]
##PMTrajectoryGenerator test:###
# self.cpg_phase_information = self.pmtg.update_observation()
self._post_physics_step_callback()
###########################################################################
#! added by shaoze
self.hip_positions = self.rigid_body_state.view(self.num_envs, self.num_bodies, 13)[:, self.thigh_indices, 0:3]
#! for DTC foothold prediction
hip_to_base = self.hip_positions - self.base_pos.unsqueeze(1).repeat(1,4,1)
yaw_vel_cmd = self.commands[:, 2]
rotated_hip_to_base = self.rotate_positions(hip_to_base, yaw_vel_cmd)
p_shoulder_i = self.base_pos.unsqueeze(1).repeat(1,4,1) + rotated_hip_to_base
t_stance = self.cfg.sim.dt * self.cfg.control.decimation
fdbk_gain_k = 0.03
cmd_lin_vel = torch.cat((self.commands[:, :2], torch.zeros(self.num_envs, 1, device=self.device)), dim=1)
p_symmetric = t_stance / 2 * self.base_lin_vel.unsqueeze(1).repeat(1,4,1) + \
fdbk_gain_k * (self.base_lin_vel.unsqueeze(1).repeat(1,4,1) - cmd_lin_vel.unsqueeze(1).repeat(1,4,1))
self.pred_footholds = p_shoulder_i + p_symmetric
pred_footholds_to_robot = self.pred_footholds - self.base_pos.unsqueeze(1).repeat(1,4,1)
self.pred_footholds_to_robot = torch.zeros_like(pred_footholds_to_robot)
for i in range(4):
self.pred_footholds_to_robot[:,i,:] = quat_rotate_inverse(self.base_quat, pred_footholds_to_robot[:,i,:])
#! DTC foothold score computation based on terrain
if self.cfg.terrain.measure_heights:
#! foothold score based on "Perceptive Locomotion in Rough Terrain"
measure_height_to_base = self.measured_heights - self.base_pos[:, 2].unsqueeze(1).repeat(1, len(self.cfg.terrain.measured_points_x)*len(self.cfg.terrain.measured_points_y))
measured_heights_grid = measure_height_to_base.clone().view(self.num_envs, len(self.cfg.terrain.measured_points_x), len(self.cfg.terrain.measured_points_y))
#! filter exceptional points (compared to root)
exception_heights = (measured_heights_grid > 1) | (measured_heights_grid < -1)
measured_heights_grid = measured_heights_grid.clamp_(min=-0.5, max=0.5)
d_x,d_y = torch.gradient(measured_heights_grid, dim=[1,2], spacing = 0.05) #! Note: spacing should be changed when the resolution of the terrain is changed
self.slope = torch.sqrt(d_x**2 + d_y**2)
h_mean = torch.mean(measured_heights_grid, dim=(1,2))
roughness = measured_heights_grid - h_mean.unsqueeze(1).unsqueeze(2).repeat(1, len(self.cfg.terrain.measured_points_x), len(self.cfg.terrain.measured_points_y))
roughness = torch.abs(roughness)
# roughness_score = torch.nn.functional.normalize(roughness, p=2, dim=(1,2))
# slope_score = torch.nn.functional.normalize(self.slope, p=2, dim=(1,2))
edge = torch.sqrt(torch.var(measured_heights_grid, dim=(1,2))).unsqueeze(1).unsqueeze(2).repeat(1, len(self.cfg.terrain.measured_points_x), len(self.cfg.terrain.measured_points_y))
edge = edge.clamp_(min=0.0, max=0.3)
lambda_1, lambda_2, lambda_3 = 0.2, 1, 0.3
foothold_score = lambda_1 * edge + lambda_2 * self.slope + lambda_3 * roughness
foothold_score = foothold_score.view(self.num_envs, -1)
foothold_score = torch.where(foothold_score < 0.1, foothold_score, torch.tensor(10.0, dtype=torch.float, device=self.device) )
# take distance to nominal foothold
# breakpoint()
height_points_flattened = self.height_points.flatten(end_dim = -2)
quat_flattened = self.base_quat.repeat(1, self.measured_heights.shape[1]).flatten()
heights_world = quat_apply_yaw(quat_flattened, height_points_flattened)
self.heights_world = heights_world.reshape(self.num_envs, -1, 3) + self.base_pos.unsqueeze(1).repeat(1, len(self.cfg.terrain.measured_points_x)*len(self.cfg.terrain.measured_points_y), 1)
self.heights_world[:, :, 2] = self.measured_heights
footholds_repeated_envs = self.pred_footholds.unsqueeze(1).repeat(1, len(self.cfg.terrain.measured_points_x)*len(self.cfg.terrain.measured_points_y), 1, 1)
heights_world_repeated4 = self.heights_world.unsqueeze(2).repeat(1,1,4,1)
dis_to_nominal = torch.norm(footholds_repeated_envs[:, :, :, :-1] - heights_world_repeated4[:, :, :, :-1], dim=-1)
# dis_to_nominal: [num_envs, num_points, 4]
# clip the dis with filling dis > 0.1 with 1.0, these points will be filtered
dis_to_nominal = torch.where(dis_to_nominal < 0.16, dis_to_nominal, torch.tensor(10.0, dtype=torch.float, device=self.device) )
#! for debug & visualize
self.nominal_footholds_indice = dis_to_nominal.min(dim=1)[1]
foothold_score = foothold_score.unsqueeze(2).repeat(1,1,4)
foothold_score = foothold_score * 0.2 + dis_to_nominal * 0.8
#! filter exceptional points
foothold_score = torch.where(exception_heights.view(self.num_envs, -1).unsqueeze(2).repeat(1,1,4), torch.tensor(10.0, dtype=torch.float, device=self.device), foothold_score)
self.foothold_score = foothold_score
# foothold_score: [num_envs, num_points, 4] (each point respect to each foothold)
# get the top n footholds
ktop_num = 1 #! do not change
optimal_values, optimal_indices = torch.topk(foothold_score, k=ktop_num, dim=1, largest=False, sorted=True)
self.optimal_foothold_indice = optimal_indices
#! compute for observation
x_indices = torch.remainder(optimal_indices, self.cfg.terrain.measured_y_dim)
y_indices = torch.div(optimal_indices, self.cfg.terrain.measured_y_dim, rounding_mode='trunc')
# Map matrix indices to actual x, y values
measured_points_x = torch.tensor(self.cfg.terrain.measured_points_x).unsqueeze(0).unsqueeze(0).repeat(self.num_envs, ktop_num, 4).to(self.device)
measured_points_y = torch.tensor(self.cfg.terrain.measured_points_y).unsqueeze(0).unsqueeze(0).repeat(self.num_envs, ktop_num, 4).to(self.device)
decoded_x_values = torch.gather(measured_points_x, -1, x_indices)
decoded_y_values = torch.gather(measured_points_y, -1, y_indices)
foothold_obs = torch.cat((decoded_x_values, decoded_y_values), dim=-2)
self.foothold_obs = foothold_obs.view(self.num_envs, -1)
#! compute for rewards
if ktop_num == 1:
optimal_indices = optimal_indices.squeeze(1)
batch_indices = torch.arange(optimal_indices.size(0)).unsqueeze(-1).expand_as(optimal_indices)
self.optimal_footholds_world = self.heights_world[batch_indices, optimal_indices]
###########################################################################
# compute observations, rewards, resets, ...
self.check_termination()
self.compute_reward()
env_ids = self.reset_buf.nonzero(as_tuple=False).flatten()
self.reset_idx(env_ids)
self.compute_observations() # in some cases a simulation step might be required to refresh some obs (for example body positions)
self.last_actions_2[:] = self.last_actions[:]
self.last_actions[:] = self.actions[:]
self.last_dof_vel[:] = self.dof_vel[:]
self.last_root_vel[:] = self.root_states[:, 7:13]
self.last_foot_velocities[:] = self.foot_velocities[:]
#! added by wz
self.base_ang_vel_last = self.base_ang_vel.clone()
self.base_lin_vel_last = self.base_lin_vel.clone()
if self.viewer and self.enable_viewer_sync and self.debug_viz:
self._draw_debug_vis()
def check_termination(self):
""" Check if environments need to be reset
"""
self.reset_buf = torch.any(torch.norm(self.contact_forces[:, self.termination_contact_indices, :], dim=-1) > 100., dim=1)
self.time_out_buf = self.episode_length_buf > self.max_episode_length # no terminal reward for time-outs
self.reset_buf |= self.time_out_buf
# self.reset_buf |= (self.projected_gravity[:,2] > -0.2)
#! stand up
self.reset_buf |= (self.projected_gravity[:,2] > 0.2)
#! X30
# self.reset_buf |= ((torch.mean(self.root_states[:, 2].unsqueeze(1) - self.measured_heights[:, 55:132], dim=1)) < 0.25)
#! lite3
self.reset_buf |= ((torch.mean(self.root_states[:, 2].unsqueeze(1) -
self.measured_heights[:, 10 * 21: (33-10)*21].clip(min=-0.),
dim=1)) < 0.15) #! 55-132 changed according to terrain resolution
# # added foot < 0
# min_foot_z = torch.min(self.foot_positions[:,:,-1], dim=-1)[0]
# self.reset_buf |= (min_foot_z < 0)
def compute_observations(self):
""" Computes observations
"""
cpg_phase_information = self.cpg_phase_information
#! no teacher-student arch
self.obs_buf = torch.cat((
self.base_ang_vel * self.obs_scales.ang_vel,
self.projected_gravity, #* self.obs_scales.gravity,
self.commands[:, :3] * self.commands_scale,
(self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos,
self.dof_vel * self.obs_scales.dof_vel,
self.actions,
# cpg_phase_information *1.0 #PMTrajectoryGenerator test:###
###############################
#! DTC
self.foothold_obs, # 2 * 4 = 8
###############################
),dim=-1)
if self.cfg.terrain.measure_heights:
self.heights = torch.clip(self.root_states[:, 2].unsqueeze(1) - self.cfg.rewards.base_height_target - self.measured_heights, -1, 1.) * self.obs_scales.height_measurements
self.privileged_obs_buf = torch.cat(
(
self.heights +(2 * torch.rand_like(self.heights) - 1) * 0.1 + self.height_noise_offset,
self.forces[:,0,:]* self.obs_scales.force,
self.heights ,
), dim=1)
# add noise if needed
if self.add_noise:
# print('self.noise_scale_vec: ',self.noise_scale_vec)
# print('obs_buf',self.obs_buf)
self.obs_buf += (2 * torch.rand_like(self.obs_buf) - 1) * self.noise_scale_vec[:53]
# print('obs_buf_after',self.obs_buf)
def _reset_root_states(self, env_ids):
""" Resets ROOT states position and velocities of selected environmments
Sets base position based on the curriculum
Selects randomized base velocities within -0.5:0.5 [m/s, rad/s]
Args:
env_ids (List[int]): Environemnt ids
"""
# base position
if self.custom_origins:
# self.base_init_state[2]=torch_rand_float(self.cfg.init_state.pos_z_range[0], self.cfg.init_state.pos_z_range[1], (1,1),device=self.device)
self.root_states[env_ids] = self.base_init_state
self.root_states[env_ids, :3] += self.env_origins[env_ids]
self.root_states[env_ids, :2] += torch_rand_float(-.5, .5, (len(env_ids), 2), device=self.device) # xy position within 1m of the center
else:
self.root_states[env_ids] = self.base_init_state
self.root_states[env_ids, :3] += self.env_origins[env_ids]
# self.root_states[env_ids, 0] = self.root_states[env_ids, 0]-10
# base velocities
self.root_states[env_ids, 7:13] = torch_rand_float(-0.5, 0.5, (len(env_ids), 6), device=self.device) # [7:10]: lin vel, [10:13]: ang vel
env_ids_int32 = env_ids.to(dtype=torch.int32)
self.gym.set_actor_root_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.root_states),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
def _create_envs(self):
""" Creates environments:
1. loads the robot URDF/MJCF asset,
2. For each environment
2.1 creates the environment,
2.2 calls DOF and Rigid shape properties callbacks,
2.3 create actor with these properties and add them to the env
3. Store indices of different bodies of the robot
"""
asset_path = self.cfg.asset.file.format(LEGGED_GYM_ROOT_DIR=LEGGED_GYM_ROOT_DIR)
asset_root = os.path.dirname(asset_path)
asset_file = os.path.basename(asset_path)
asset_options = gymapi.AssetOptions()
asset_options.default_dof_drive_mode = self.cfg.asset.default_dof_drive_mode
asset_options.collapse_fixed_joints = self.cfg.asset.collapse_fixed_joints
asset_options.replace_cylinder_with_capsule = self.cfg.asset.replace_cylinder_with_capsule
asset_options.flip_visual_attachments = self.cfg.asset.flip_visual_attachments
asset_options.fix_base_link = self.cfg.asset.fix_base_link
asset_options.density = self.cfg.asset.density
asset_options.angular_damping = self.cfg.asset.angular_damping
asset_options.linear_damping = self.cfg.asset.linear_damping
asset_options.max_angular_velocity = self.cfg.asset.max_angular_velocity
asset_options.max_linear_velocity = self.cfg.asset.max_linear_velocity
asset_options.armature = self.cfg.asset.armature
asset_options.thickness = self.cfg.asset.thickness
asset_options.disable_gravity = self.cfg.asset.disable_gravity
robot_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
self.num_dof = self.gym.get_asset_dof_count(robot_asset)
self.num_bodies = self.gym.get_asset_rigid_body_count(robot_asset)
dof_props_asset = self.gym.get_asset_dof_properties(robot_asset)
rigid_shape_props_asset = self.gym.get_asset_rigid_shape_properties(robot_asset)
self.default_friction = rigid_shape_props_asset[1].friction
self.default_restitution = rigid_shape_props_asset[1].restitution
# self.default_restitution = 1.
self._init_custom_buffers__()
# save body names from the asset
body_names = self.gym.get_asset_rigid_body_names(robot_asset)
self.dof_names = self.gym.get_asset_dof_names(robot_asset)
self.num_bodies = len(body_names)
self.num_dofs = len(self.dof_names)
feet_names = [s for s in body_names if self.cfg.asset.foot_name in s]
# hip_names = [s for s in body_names if self.cfg.asset.hip_name in s]
hip_names = ["FL_HipX_joint", "FR_HipX_joint", "HL_HipX_joint", "HR_HipX_joint"] #! use dof name for hip indices
thigh_names = [s for s in body_names if self.cfg.asset.thigh_name in s] #! use body name for thigh indice
self.hip_indices = torch.zeros(len(hip_names), dtype=torch.long, device=self.device, requires_grad=False)
self.thigh_indices = torch.zeros(len(thigh_names), dtype=torch.long, device=self.device, requires_grad=False)
for i, name in enumerate(hip_names):
self.hip_indices[i] = self.dof_names.index(name)
for i, name in enumerate(thigh_names):
self.thigh_indices[i] = body_names.index(name)
penalized_contact_names = []
for name in self.cfg.asset.penalize_contacts_on:
penalized_contact_names.extend([s for s in body_names if name in s])
termination_contact_names = []
for name in self.cfg.asset.terminate_after_contacts_on:
termination_contact_names.extend([s for s in body_names if name in s])
collision_contact_names = []
for name in self.cfg.asset.collision_state:
collision_contact_names.extend([s for s in body_names if name in s])
base_init_state_list = self.cfg.init_state.pos + self.cfg.init_state.rot + self.cfg.init_state.lin_vel + self.cfg.init_state.ang_vel
self.base_init_state = to_torch(base_init_state_list, device=self.device, requires_grad=False)
start_pose = gymapi.Transform()
start_pose.p = gymapi.Vec3(*self.base_init_state[:3])
self._get_env_origins()
env_lower = gymapi.Vec3(0., 0., 0.)
env_upper = gymapi.Vec3(0., 0., 0.)
self.actor_handles = []
self.envs = []
# camera test
self.sensor_handles = []
def str_to_bit(s):
return int(s.replace(' ', ''), 2)
collision_filter = self.cfg.init_state.collision_filter
for i in range(self.num_envs):
# create env instance
env_handle = self.gym.create_env(self.sim, env_lower, env_upper, int(np.sqrt(self.num_envs)))
pos = self.env_origins[i].clone()
pos[:2] += torch_rand_float(-1., 1., (2,1), device=self.device).squeeze(1)
start_pose.p = gymapi.Vec3(*pos)
rigid_shape_props = self._process_rigid_shape_props(rigid_shape_props_asset, i)
# print(rigid_shape_props)
for shape_id in range(len(rigid_shape_props)):
rigid_shape_props[shape_id].filter = list(collision_filter.values())[shape_id]
self.gym.set_asset_rigid_shape_properties(robot_asset, rigid_shape_props)
actor_handle = self.gym.create_actor(env_handle, robot_asset, start_pose, self.cfg.asset.name, i, self.cfg.asset.self_collisions, 0)
rsp = self.gym.get_actor_rigid_shape_properties(env_handle, actor_handle)
for shape_id in range(len(rigid_shape_props)):
rsp[shape_id].filter = list(collision_filter.values())[shape_id]
self.gym.set_actor_rigid_shape_properties(env_handle, actor_handle, rsp)
dof_props = self._process_dof_props(dof_props_asset, i)
self.gym.set_actor_dof_properties(env_handle, actor_handle, dof_props)
body_props = self.gym.get_actor_rigid_body_properties(env_handle, actor_handle)
body_props = self._process_rigid_body_props(body_props, i)
self.gym.set_actor_rigid_body_properties(env_handle, actor_handle, body_props, recomputeInertia=True)
#temp
self.envs.append(env_handle)
self.actor_handles.append(actor_handle)
#camera test
# sensor_handle_dict = self._create_sensors(env_handle, actor_handle)
# self.sensor_handles.append(sensor_handle_dict)
self.feet_indices = torch.zeros(len(feet_names), dtype=torch.long, device=self.device, requires_grad=False)
for i in range(len(feet_names)):
self.feet_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], feet_names[i])
self.penalised_contact_indices = torch.zeros(len(penalized_contact_names), dtype=torch.long, device=self.device, requires_grad=False)
for i in range(len(penalized_contact_names)):
# print(penalized_contact_names)
self.penalised_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], penalized_contact_names[i])
self.termination_contact_indices = torch.zeros(len(termination_contact_names), dtype=torch.long, device=self.device, requires_grad=False)
for i in range(len(termination_contact_names)):
self.termination_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], termination_contact_names[i])
self.collision_contact_indices = torch.zeros(len(collision_contact_names), dtype=torch.long, device=self.device, requires_grad=False)
for i in range(len(collision_contact_names)):
self.collision_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], collision_contact_names[i])
def _draw_debug_vis(self):
""" Draws visualizations for dubugging (slows down simulation a lot).
Default behaviour: draws height measurement points
"""
self.gym.clear_lines(self.viewer)
self.gym.refresh_rigid_body_state_tensor(self.sim)
base_sphere_geom = gymutil.WireframeSphereGeometry(radius=0.1, color=(0, 0, 1))
foothold_nominal_sphere_geom= gymutil.WireframeSphereGeometry(radius=0.035, color=(0, 0, 1))
foothold_edge_sphere_geom = gymutil.WireframeSphereGeometry(radius=0.03, color=(1, 0, 0))
foothold_optimal_sphere_geom = gymutil.WireframeSphereGeometry(radius=0.03, color=(0, 1, 0))
command_hip = gymutil.WireframeSphereGeometry(radius=0.05, color=(1, 1, 0))
# draw height lines
if self.terrain.cfg.measure_heights:
i = self.lookat_id
x_all = self.heights_world[i, :, 0].cpu().numpy()
y_all = self.heights_world[i, :, 1].cpu().numpy()
z_all = self.heights_world[i, :, 2].cpu().numpy()
foothold_score = torch.min(self.foothold_score, dim=2)[0][i, :].cpu()
for x,y,z, score in zip(x_all, y_all, z_all, foothold_score):
sphere_pose = gymapi.Transform(gymapi.Vec3(x, y, z), r=None)
if score > 0.9 and score < 8:
gymutil.draw_lines(foothold_edge_sphere_geom, self.gym, self.viewer, self.envs[i], sphere_pose)
# else:
# gymutil.draw_lines(sphere_geom, self.gym, self.viewer, self.envs[i], sphere_pose)
# gymutil.draw_lines(foothold_edge_sphere_geom, self.gym, self.viewer, self.envs[i], sphere_pose)
# nominal foot_hold
nominal_footholds_indice = self.nominal_footholds_indice[i, :].cpu().numpy()
nominal_footholds_x = x_all[nominal_footholds_indice]
nominal_footholds_y = y_all[nominal_footholds_indice]
nominal_footholds_z = z_all[nominal_footholds_indice]
for x,y,z in zip(nominal_footholds_x, nominal_footholds_y, nominal_footholds_z):
sphere_pose = gymapi.Transform(gymapi.Vec3(x, y, z), r=None)
gymutil.draw_lines(foothold_nominal_sphere_geom, self.gym, self.viewer, self.envs[i], sphere_pose)
# draw optimal indices
optimal_foothold_x = self.optimal_footholds_world[i, :, 0].cpu().numpy()
optimal_foothold_y = self.optimal_footholds_world[i, :, 1].cpu().numpy()
optimal_foothold_z = self.optimal_footholds_world[i, :, 2].cpu().numpy()
for x,y,z in zip(optimal_foothold_x, optimal_foothold_y, optimal_foothold_z):
sphere_pose = gymapi.Transform(gymapi.Vec3(x, y, z), r=None)
gymutil.draw_lines(foothold_optimal_sphere_geom, self.gym, self.viewer, self.envs[i], sphere_pose)
# hip position
# for pos in self.hip_positions[i, :]:
# sphere_pose = gymapi.Transform(gymapi.Vec3(*pos), r=None)
# gymutil.draw_lines(hip_sphere_geom, self.gym, self.viewer, self.envs[i], sphere_pose)
for foothold in self.pred_footholds[i, :]:
sphere_pose = gymapi.Transform(gymapi.Vec3(*foothold), r=None)
gymutil.draw_lines(command_hip, self.gym, self.viewer, self.envs[i], sphere_pose)
def _reward_big_pitch(self):
rew = torch.sum( (torch.abs(self.projected_gravity[:, 0:1]) > 0.6), dim = 1)
return rew
def _reward_feet_stumble(self):
# Penalize feet hitting vertical surfaces
return torch.any(torch.norm(self.contact_forces[:, self.feet_indices, :2], dim=2) >\
3 *torch.abs(self.contact_forces[:, self.feet_indices, 2]), dim=1)
def _reward_base_height(self):
# stand higher the robot, the better
foot_to_body = self.root_states[:, 2] - torch.mean(self.foot_positions[:, :, 2], dim=-1)
return torch.square(foot_to_body - self.cfg.rewards.base_height_target)
def _reward_foothold_miss(self):
# penalize missing footholds
min_foot_z = torch.min(self.foot_positions[:,:,-1], dim=-1)[0]
return torch.where(min_foot_z < 0, torch.tensor(1., dtype=torch.float32, device=self.device), torch.tensor(0., dtype=torch.float32, device=self.device))
#! soft tracking lin / ang vel
def _reward_soft_tracking_lin_vel(self, tolerance = 0., lookback=3):
'''
refer to "Learning Agile Locomotion on Risky Terrains" Position Tracking rwd
use formula 1 / (1 + |v - cmd|^2), normed by last **lookback** steps
'''
dis_norm2 = torch.sum(torch.square((self.cmd_buffer[-lookback:, :, :2] - self.lin_vel_buffer[-lookback, :, :2]) / self.command_ranges["lin_vel_x"][1]), dim=-1)
if tolerance != 0:
dis_norm2 = torch.where(dis_norm2 <= tolerance ** 2, 0., 1.)
# error = 1. / (1. + dis_norm2)
error = torch.exp(-dis_norm2 / self.cfg.rewards.tracking_sigma)
normalized_error = torch.mean(error, dim = 0)
return normalized_error
def _reward_soft_tracking_ang_vel(self, tolerance = 0.15, lookback=4):
'''
refer to "Learning Agile Locomotion on Risky Terrains" Head Tracking rwd
use formula 1 / (1 + |v - cmd|^2), normed by last **lookback** steps
'''
dis_norm2 = torch.square((self.cmd_buffer[-lookback:, :, 2] - self.ang_vel_buffer[-lookback:, :].squeeze(-1)) / self.command_ranges["ang_vel_yaw"][1])
if tolerance != 0:
dis_norm2 = torch.where(dis_norm2 <= tolerance ** 2, 0., 1.)
# error = 1. / (1. + dis_norm2)
error = torch.exp(-dis_norm2 / self.cfg.rewards.tracking_sigma)
normalized_error = torch.mean(error, dim = 0)
return normalized_error
def _reward_tracking_ang_vel(self):
# Tracking of angular velocity commands (yaw)
ang_vel_error = torch.square(self.commands[:, 2] - self.base_ang_vel[:, 2])
return torch.exp(-ang_vel_error/self.cfg.rewards.tracking_sigma)
#! DTC tracking optimal footholds reward
def _reward_tracking_optimal_footholds(self):
dis = self.foot_positions[:, :, :-1] - self.optimal_footholds_world[:, :, :-1]
dis = torch.norm(dis, dim = -1)
contact = self.contact_filt.float() # * 2 - 1.0, # 0:4
epsilon = 0.8
reward_per_foot = -torch.log(epsilon + dis)
reward_filt = torch.where(contact == 1, reward_per_foot, torch.tensor(0., dtype = torch.float32, device=self.device))
reward_sum = torch.sum(reward_filt, dim = -1)
return reward_sum