return responses, blocking
v1 = serve.deployment(name=name, version="1", num_replicas=2)(v1)
v1.deploy()
responses1, _ = make_nonblocking_calls({"1": 2})
pids1 = responses1["1"]
// ref2 will block a single replica until the signal is sent. Check that
// some requests are now blocking.
ref2 = call.remote(block=True)
responses2, blocking2 = make_nonblocking_calls(
{
"1": 1
}, expect_blocking=True)
assert list(responses2["1"])[0] in pids1
// Redeploy new version. Since there is one replica blocking, only one new
// replica should be started up.
v2 = v1.options(backend_def=v2, version="2")
goal_ref = v2.deploy(_blocking=False)
assert not client._wait_for_goal(goal_ref, timeout=0.1)
responses3, blocking3 = make_nonblocking_calls(
{
"1": 1