removing_node = cluster.add_node(num_cpus=0)
cluster.remove_node(removing_node, allow_graceful=True)
with pytest.raises(RayTestTimeoutException):
wait_for_errors(ray_constants.REMOVED_NODE_ERROR, 3, timeout=2)
// There is no connection error to a dead node.
info = relevant_errors(ray_constants.RAYLET_CONNECTION_ERROR)
assert len(info) == 0
After Change
cluster = Cluster()
cluster.add_node(num_cpus=0, _internal_config=config)
ray.init(address=cluster.address)
p = init_error_pubsub()
errors = get_error_message(p, 1, timeout=5)
assert len(errors) == 0
// This node is killed by SIGKILL, ray_monitor will mark it to dead.
dead_node = cluster.add_node(num_cpus=0)
cluster.remove_node(dead_node, allow_graceful=False)
errors = get_error_message(p, 1, ray_constants.REMOVED_NODE_ERROR)
assert len(errors) == 1
// This node is killed by SIGKILL, ray_monitor will mark it to dead.
dead_node = cluster.add_node(num_cpus=0)
cluster.remove_node(dead_node, allow_graceful=False)
errors = get_error_message(p, 1, ray_constants.REMOVED_NODE_ERROR)
assert len(errors) == 1
// This node is killed by SIGTERM, ray_monitor will not mark it again.
removing_node = cluster.add_node(num_cpus=0)
cluster.remove_node(removing_node, allow_graceful=True)
errors = get_error_message(p, 1, timeout=2)
assert len(errors) == 0
// There is no connection error to a dead node.
errors = get_error_message(p, 1, timeout=2)
assert len(errors) == 0
p.close()
@pytest.mark.parametrize(
"ray_start_cluster_head", [{